2
0

MachineTraceMetrics.cpp 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327
  1. //===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/CodeGen/MachineTraceMetrics.h"
  10. #include "llvm/ADT/PostOrderIterator.h"
  11. #include "llvm/ADT/SparseSet.h"
  12. #include "llvm/CodeGen/MachineBasicBlock.h"
  13. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  14. #include "llvm/CodeGen/MachineLoopInfo.h"
  15. #include "llvm/CodeGen/MachineRegisterInfo.h"
  16. #include "llvm/CodeGen/Passes.h"
  17. #include "llvm/MC/MCSubtargetInfo.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/Format.h"
  20. #include "llvm/Support/raw_ostream.h"
  21. #include "llvm/Target/TargetInstrInfo.h"
  22. #include "llvm/Target/TargetRegisterInfo.h"
  23. #include "llvm/Target/TargetSubtargetInfo.h"
  24. using namespace llvm;
  25. #define DEBUG_TYPE "machine-trace-metrics"
  26. char MachineTraceMetrics::ID = 0;
  27. char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
  28. INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
  29. "machine-trace-metrics", "Machine Trace Metrics", false, true)
  30. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  31. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  32. INITIALIZE_PASS_END(MachineTraceMetrics,
  33. "machine-trace-metrics", "Machine Trace Metrics", false, true)
  34. MachineTraceMetrics::MachineTraceMetrics()
  35. : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
  36. MRI(nullptr), Loops(nullptr) {
  37. std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
  38. }
  39. void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
  40. AU.setPreservesAll();
  41. AU.addRequired<MachineBranchProbabilityInfo>();
  42. AU.addRequired<MachineLoopInfo>();
  43. MachineFunctionPass::getAnalysisUsage(AU);
  44. }
  45. bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
  46. MF = &Func;
  47. const TargetSubtargetInfo &ST = MF->getSubtarget();
  48. TII = ST.getInstrInfo();
  49. TRI = ST.getRegisterInfo();
  50. MRI = &MF->getRegInfo();
  51. Loops = &getAnalysis<MachineLoopInfo>();
  52. SchedModel.init(ST.getSchedModel(), &ST, TII);
  53. BlockInfo.resize(MF->getNumBlockIDs());
  54. ProcResourceCycles.resize(MF->getNumBlockIDs() *
  55. SchedModel.getNumProcResourceKinds());
  56. return false;
  57. }
  58. void MachineTraceMetrics::releaseMemory() {
  59. MF = nullptr;
  60. BlockInfo.clear();
  61. for (unsigned i = 0; i != TS_NumStrategies; ++i) {
  62. delete Ensembles[i];
  63. Ensembles[i] = nullptr;
  64. }
  65. }
  66. //===----------------------------------------------------------------------===//
  67. // Fixed block information
  68. //===----------------------------------------------------------------------===//
  69. //
  70. // The number of instructions in a basic block and the CPU resources used by
  71. // those instructions don't depend on any given trace strategy.
  72. /// Compute the resource usage in basic block MBB.
  73. const MachineTraceMetrics::FixedBlockInfo*
  74. MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
  75. assert(MBB && "No basic block");
  76. FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
  77. if (FBI->hasResources())
  78. return FBI;
  79. // Compute resource usage in the block.
  80. FBI->HasCalls = false;
  81. unsigned InstrCount = 0;
  82. // Add up per-processor resource cycles as well.
  83. unsigned PRKinds = SchedModel.getNumProcResourceKinds();
  84. SmallVector<unsigned, 32> PRCycles(PRKinds);
  85. for (const auto &MI : *MBB) {
  86. if (MI.isTransient())
  87. continue;
  88. ++InstrCount;
  89. if (MI.isCall())
  90. FBI->HasCalls = true;
  91. // Count processor resources used.
  92. if (!SchedModel.hasInstrSchedModel())
  93. continue;
  94. const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
  95. if (!SC->isValid())
  96. continue;
  97. for (TargetSchedModel::ProcResIter
  98. PI = SchedModel.getWriteProcResBegin(SC),
  99. PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
  100. assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
  101. PRCycles[PI->ProcResourceIdx] += PI->Cycles;
  102. }
  103. }
  104. FBI->InstrCount = InstrCount;
  105. // Scale the resource cycles so they are comparable.
  106. unsigned PROffset = MBB->getNumber() * PRKinds;
  107. for (unsigned K = 0; K != PRKinds; ++K)
  108. ProcResourceCycles[PROffset + K] =
  109. PRCycles[K] * SchedModel.getResourceFactor(K);
  110. return FBI;
  111. }
  112. ArrayRef<unsigned>
  113. MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
  114. assert(BlockInfo[MBBNum].hasResources() &&
  115. "getResources() must be called before getProcResourceCycles()");
  116. unsigned PRKinds = SchedModel.getNumProcResourceKinds();
  117. assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
  118. return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
  119. }
  120. //===----------------------------------------------------------------------===//
  121. // Ensemble utility functions
  122. //===----------------------------------------------------------------------===//
  123. MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
  124. : MTM(*ct) {
  125. BlockInfo.resize(MTM.BlockInfo.size());
  126. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  127. ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
  128. ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
  129. }
  130. // Virtual destructor serves as an anchor.
  131. MachineTraceMetrics::Ensemble::~Ensemble() {}
  132. const MachineLoop*
  133. MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
  134. return MTM.Loops->getLoopFor(MBB);
  135. }
  136. // Update resource-related information in the TraceBlockInfo for MBB.
  137. // Only update resources related to the trace above MBB.
  138. void MachineTraceMetrics::Ensemble::
  139. computeDepthResources(const MachineBasicBlock *MBB) {
  140. TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  141. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  142. unsigned PROffset = MBB->getNumber() * PRKinds;
  143. // Compute resources from trace above. The top block is simple.
  144. if (!TBI->Pred) {
  145. TBI->InstrDepth = 0;
  146. TBI->Head = MBB->getNumber();
  147. std::fill(ProcResourceDepths.begin() + PROffset,
  148. ProcResourceDepths.begin() + PROffset + PRKinds, 0);
  149. return;
  150. }
  151. // Compute from the block above. A post-order traversal ensures the
  152. // predecessor is always computed first.
  153. unsigned PredNum = TBI->Pred->getNumber();
  154. TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
  155. assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
  156. const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
  157. TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
  158. TBI->Head = PredTBI->Head;
  159. // Compute per-resource depths.
  160. ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
  161. ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
  162. for (unsigned K = 0; K != PRKinds; ++K)
  163. ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
  164. }
  165. // Update resource-related information in the TraceBlockInfo for MBB.
  166. // Only update resources related to the trace below MBB.
  167. void MachineTraceMetrics::Ensemble::
  168. computeHeightResources(const MachineBasicBlock *MBB) {
  169. TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  170. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  171. unsigned PROffset = MBB->getNumber() * PRKinds;
  172. // Compute resources for the current block.
  173. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
  174. ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
  175. // The trace tail is done.
  176. if (!TBI->Succ) {
  177. TBI->Tail = MBB->getNumber();
  178. std::copy(PRCycles.begin(), PRCycles.end(),
  179. ProcResourceHeights.begin() + PROffset);
  180. return;
  181. }
  182. // Compute from the block below. A post-order traversal ensures the
  183. // predecessor is always computed first.
  184. unsigned SuccNum = TBI->Succ->getNumber();
  185. TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
  186. assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
  187. TBI->InstrHeight += SuccTBI->InstrHeight;
  188. TBI->Tail = SuccTBI->Tail;
  189. // Compute per-resource heights.
  190. ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
  191. for (unsigned K = 0; K != PRKinds; ++K)
  192. ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
  193. }
  194. // Check if depth resources for MBB are valid and return the TBI.
  195. // Return NULL if the resources have been invalidated.
  196. const MachineTraceMetrics::TraceBlockInfo*
  197. MachineTraceMetrics::Ensemble::
  198. getDepthResources(const MachineBasicBlock *MBB) const {
  199. const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  200. return TBI->hasValidDepth() ? TBI : nullptr;
  201. }
  202. // Check if height resources for MBB are valid and return the TBI.
  203. // Return NULL if the resources have been invalidated.
  204. const MachineTraceMetrics::TraceBlockInfo*
  205. MachineTraceMetrics::Ensemble::
  206. getHeightResources(const MachineBasicBlock *MBB) const {
  207. const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
  208. return TBI->hasValidHeight() ? TBI : nullptr;
  209. }
  210. /// Get an array of processor resource depths for MBB. Indexed by processor
  211. /// resource kind, this array contains the scaled processor resources consumed
  212. /// by all blocks preceding MBB in its trace. It does not include instructions
  213. /// in MBB.
  214. ///
  215. /// Compare TraceBlockInfo::InstrDepth.
  216. ArrayRef<unsigned>
  217. MachineTraceMetrics::Ensemble::
  218. getProcResourceDepths(unsigned MBBNum) const {
  219. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  220. assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
  221. return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
  222. }
  223. /// Get an array of processor resource heights for MBB. Indexed by processor
  224. /// resource kind, this array contains the scaled processor resources consumed
  225. /// by this block and all blocks following it in its trace.
  226. ///
  227. /// Compare TraceBlockInfo::InstrHeight.
  228. ArrayRef<unsigned>
  229. MachineTraceMetrics::Ensemble::
  230. getProcResourceHeights(unsigned MBBNum) const {
  231. unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
  232. assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
  233. return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
  234. }
  235. //===----------------------------------------------------------------------===//
  236. // Trace Selection Strategies
  237. //===----------------------------------------------------------------------===//
  238. //
  239. // A trace selection strategy is implemented as a sub-class of Ensemble. The
  240. // trace through a block B is computed by two DFS traversals of the CFG
  241. // starting from B. One upwards, and one downwards. During the upwards DFS,
  242. // pickTracePred() is called on the post-ordered blocks. During the downwards
  243. // DFS, pickTraceSucc() is called in a post-order.
  244. //
  245. // We never allow traces that leave loops, but we do allow traces to enter
  246. // nested loops. We also never allow traces to contain back-edges.
  247. //
  248. // This means that a loop header can never appear above the center block of a
  249. // trace, except as the trace head. Below the center block, loop exiting edges
  250. // are banned.
  251. //
  252. // Return true if an edge from the From loop to the To loop is leaving a loop.
  253. // Either of To and From can be null.
  254. static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
  255. return From && !From->contains(To);
  256. }
  257. // MinInstrCountEnsemble - Pick the trace that executes the least number of
  258. // instructions.
  259. namespace {
  260. class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
  261. const char *getName() const override { return "MinInstr"; }
  262. const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
  263. const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
  264. public:
  265. MinInstrCountEnsemble(MachineTraceMetrics *mtm)
  266. : MachineTraceMetrics::Ensemble(mtm) {}
  267. };
  268. }
  269. // Select the preferred predecessor for MBB.
  270. const MachineBasicBlock*
  271. MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
  272. if (MBB->pred_empty())
  273. return nullptr;
  274. const MachineLoop *CurLoop = getLoopFor(MBB);
  275. // Don't leave loops, and never follow back-edges.
  276. if (CurLoop && MBB == CurLoop->getHeader())
  277. return nullptr;
  278. unsigned CurCount = MTM.getResources(MBB)->InstrCount;
  279. const MachineBasicBlock *Best = nullptr;
  280. unsigned BestDepth = 0;
  281. for (const MachineBasicBlock *Pred : MBB->predecessors()) {
  282. const MachineTraceMetrics::TraceBlockInfo *PredTBI =
  283. getDepthResources(Pred);
  284. // Ignore cycles that aren't natural loops.
  285. if (!PredTBI)
  286. continue;
  287. // Pick the predecessor that would give this block the smallest InstrDepth.
  288. unsigned Depth = PredTBI->InstrDepth + CurCount;
  289. if (!Best || Depth < BestDepth)
  290. Best = Pred, BestDepth = Depth;
  291. }
  292. return Best;
  293. }
  294. // Select the preferred successor for MBB.
  295. const MachineBasicBlock*
  296. MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
  297. if (MBB->pred_empty())
  298. return nullptr;
  299. const MachineLoop *CurLoop = getLoopFor(MBB);
  300. const MachineBasicBlock *Best = nullptr;
  301. unsigned BestHeight = 0;
  302. for (const MachineBasicBlock *Succ : MBB->successors()) {
  303. // Don't consider back-edges.
  304. if (CurLoop && Succ == CurLoop->getHeader())
  305. continue;
  306. // Don't consider successors exiting CurLoop.
  307. if (isExitingLoop(CurLoop, getLoopFor(Succ)))
  308. continue;
  309. const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
  310. getHeightResources(Succ);
  311. // Ignore cycles that aren't natural loops.
  312. if (!SuccTBI)
  313. continue;
  314. // Pick the successor that would give this block the smallest InstrHeight.
  315. unsigned Height = SuccTBI->InstrHeight;
  316. if (!Best || Height < BestHeight)
  317. Best = Succ, BestHeight = Height;
  318. }
  319. return Best;
  320. }
  321. // Get an Ensemble sub-class for the requested trace strategy.
  322. MachineTraceMetrics::Ensemble *
  323. MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
  324. assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
  325. Ensemble *&E = Ensembles[strategy];
  326. if (E)
  327. return E;
  328. // Allocate new Ensemble on demand.
  329. switch (strategy) {
  330. case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
  331. default: llvm_unreachable("Invalid trace strategy enum");
  332. }
  333. }
  334. void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
  335. DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
  336. BlockInfo[MBB->getNumber()].invalidate();
  337. for (unsigned i = 0; i != TS_NumStrategies; ++i)
  338. if (Ensembles[i])
  339. Ensembles[i]->invalidate(MBB);
  340. }
  341. void MachineTraceMetrics::verifyAnalysis() const {
  342. if (!MF)
  343. return;
  344. #ifndef NDEBUG
  345. assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
  346. for (unsigned i = 0; i != TS_NumStrategies; ++i)
  347. if (Ensembles[i])
  348. Ensembles[i]->verify();
  349. #endif
  350. }
  351. //===----------------------------------------------------------------------===//
  352. // Trace building
  353. //===----------------------------------------------------------------------===//
  354. //
  355. // Traces are built by two CFG traversals. To avoid recomputing too much, use a
  356. // set abstraction that confines the search to the current loop, and doesn't
  357. // revisit blocks.
  358. namespace {
  359. struct LoopBounds {
  360. MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
  361. SmallPtrSet<const MachineBasicBlock*, 8> Visited;
  362. const MachineLoopInfo *Loops;
  363. bool Downward;
  364. LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
  365. const MachineLoopInfo *loops)
  366. : Blocks(blocks), Loops(loops), Downward(false) {}
  367. };
  368. }
  369. // Specialize po_iterator_storage in order to prune the post-order traversal so
  370. // it is limited to the current loop and doesn't traverse the loop back edges.
  371. namespace llvm {
  372. template<>
  373. class po_iterator_storage<LoopBounds, true> {
  374. LoopBounds &LB;
  375. public:
  376. po_iterator_storage(LoopBounds &lb) : LB(lb) {}
  377. void finishPostorder(const MachineBasicBlock*) {}
  378. bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
  379. // Skip already visited To blocks.
  380. MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
  381. if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
  382. return false;
  383. // From is null once when To is the trace center block.
  384. if (From) {
  385. if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
  386. // Don't follow backedges, don't leave FromLoop when going upwards.
  387. if ((LB.Downward ? To : From) == FromLoop->getHeader())
  388. return false;
  389. // Don't leave FromLoop.
  390. if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
  391. return false;
  392. }
  393. }
  394. // To is a new block. Mark the block as visited in case the CFG has cycles
  395. // that MachineLoopInfo didn't recognize as a natural loop.
  396. return LB.Visited.insert(To).second;
  397. }
  398. };
  399. }
  400. /// Compute the trace through MBB.
  401. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
  402. DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
  403. << MBB->getNumber() << '\n');
  404. // Set up loop bounds for the backwards post-order traversal.
  405. LoopBounds Bounds(BlockInfo, MTM.Loops);
  406. // Run an upwards post-order search for the trace start.
  407. Bounds.Downward = false;
  408. Bounds.Visited.clear();
  409. for (auto I : inverse_post_order_ext(MBB, Bounds)) {
  410. DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
  411. TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
  412. // All the predecessors have been visited, pick the preferred one.
  413. TBI.Pred = pickTracePred(I);
  414. DEBUG({
  415. if (TBI.Pred)
  416. dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
  417. else
  418. dbgs() << "null\n";
  419. });
  420. // The trace leading to I is now known, compute the depth resources.
  421. computeDepthResources(I);
  422. }
  423. // Run a downwards post-order search for the trace end.
  424. Bounds.Downward = true;
  425. Bounds.Visited.clear();
  426. for (auto I : post_order_ext(MBB, Bounds)) {
  427. DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
  428. TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
  429. // All the successors have been visited, pick the preferred one.
  430. TBI.Succ = pickTraceSucc(I);
  431. DEBUG({
  432. if (TBI.Succ)
  433. dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
  434. else
  435. dbgs() << "null\n";
  436. });
  437. // The trace leaving I is now known, compute the height resources.
  438. computeHeightResources(I);
  439. }
  440. }
  441. /// Invalidate traces through BadMBB.
  442. void
  443. MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
  444. SmallVector<const MachineBasicBlock*, 16> WorkList;
  445. TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
  446. // Invalidate height resources of blocks above MBB.
  447. if (BadTBI.hasValidHeight()) {
  448. BadTBI.invalidateHeight();
  449. WorkList.push_back(BadMBB);
  450. do {
  451. const MachineBasicBlock *MBB = WorkList.pop_back_val();
  452. DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
  453. << " height.\n");
  454. // Find any MBB predecessors that have MBB as their preferred successor.
  455. // They are the only ones that need to be invalidated.
  456. for (const MachineBasicBlock *Pred : MBB->predecessors()) {
  457. TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
  458. if (!TBI.hasValidHeight())
  459. continue;
  460. if (TBI.Succ == MBB) {
  461. TBI.invalidateHeight();
  462. WorkList.push_back(Pred);
  463. continue;
  464. }
  465. // Verify that TBI.Succ is actually a *I successor.
  466. assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
  467. }
  468. } while (!WorkList.empty());
  469. }
  470. // Invalidate depth resources of blocks below MBB.
  471. if (BadTBI.hasValidDepth()) {
  472. BadTBI.invalidateDepth();
  473. WorkList.push_back(BadMBB);
  474. do {
  475. const MachineBasicBlock *MBB = WorkList.pop_back_val();
  476. DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
  477. << " depth.\n");
  478. // Find any MBB successors that have MBB as their preferred predecessor.
  479. // They are the only ones that need to be invalidated.
  480. for (const MachineBasicBlock *Succ : MBB->successors()) {
  481. TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
  482. if (!TBI.hasValidDepth())
  483. continue;
  484. if (TBI.Pred == MBB) {
  485. TBI.invalidateDepth();
  486. WorkList.push_back(Succ);
  487. continue;
  488. }
  489. // Verify that TBI.Pred is actually a *I predecessor.
  490. assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
  491. }
  492. } while (!WorkList.empty());
  493. }
  494. // Clear any per-instruction data. We only have to do this for BadMBB itself
  495. // because the instructions in that block may change. Other blocks may be
  496. // invalidated, but their instructions will stay the same, so there is no
  497. // need to erase the Cycle entries. They will be overwritten when we
  498. // recompute.
  499. for (const auto &I : *BadMBB)
  500. Cycles.erase(&I);
  501. }
  502. void MachineTraceMetrics::Ensemble::verify() const {
  503. #ifndef NDEBUG
  504. assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
  505. "Outdated BlockInfo size");
  506. for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
  507. const TraceBlockInfo &TBI = BlockInfo[Num];
  508. if (TBI.hasValidDepth() && TBI.Pred) {
  509. const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
  510. assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
  511. assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
  512. "Trace is broken, depth should have been invalidated.");
  513. const MachineLoop *Loop = getLoopFor(MBB);
  514. assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
  515. }
  516. if (TBI.hasValidHeight() && TBI.Succ) {
  517. const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
  518. assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
  519. assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
  520. "Trace is broken, height should have been invalidated.");
  521. const MachineLoop *Loop = getLoopFor(MBB);
  522. const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
  523. assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
  524. "Trace contains backedge");
  525. }
  526. }
  527. #endif
  528. }
  529. //===----------------------------------------------------------------------===//
  530. // Data Dependencies
  531. //===----------------------------------------------------------------------===//
  532. //
  533. // Compute the depth and height of each instruction based on data dependencies
  534. // and instruction latencies. These cycle numbers assume that the CPU can issue
  535. // an infinite number of instructions per cycle as long as their dependencies
  536. // are ready.
  537. // A data dependency is represented as a defining MI and operand numbers on the
  538. // defining and using MI.
  539. namespace {
  540. struct DataDep {
  541. const MachineInstr *DefMI;
  542. unsigned DefOp;
  543. unsigned UseOp;
  544. DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
  545. : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
  546. /// Create a DataDep from an SSA form virtual register.
  547. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
  548. : UseOp(UseOp) {
  549. assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
  550. MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
  551. assert(!DefI.atEnd() && "Register has no defs");
  552. DefMI = DefI->getParent();
  553. DefOp = DefI.getOperandNo();
  554. assert((++DefI).atEnd() && "Register has multiple defs");
  555. }
  556. };
  557. }
  558. // Get the input data dependencies that must be ready before UseMI can issue.
  559. // Return true if UseMI has any physreg operands.
  560. static bool getDataDeps(const MachineInstr *UseMI,
  561. SmallVectorImpl<DataDep> &Deps,
  562. const MachineRegisterInfo *MRI) {
  563. // Debug values should not be included in any calculations.
  564. if (UseMI->isDebugValue())
  565. return false;
  566. bool HasPhysRegs = false;
  567. for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(),
  568. E = UseMI->operands_end(); I != E; ++I) {
  569. const MachineOperand &MO = *I;
  570. if (!MO.isReg())
  571. continue;
  572. unsigned Reg = MO.getReg();
  573. if (!Reg)
  574. continue;
  575. if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
  576. HasPhysRegs = true;
  577. continue;
  578. }
  579. // Collect virtual register reads.
  580. if (MO.readsReg())
  581. Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I)));
  582. }
  583. return HasPhysRegs;
  584. }
  585. // Get the input data dependencies of a PHI instruction, using Pred as the
  586. // preferred predecessor.
  587. // This will add at most one dependency to Deps.
  588. static void getPHIDeps(const MachineInstr *UseMI,
  589. SmallVectorImpl<DataDep> &Deps,
  590. const MachineBasicBlock *Pred,
  591. const MachineRegisterInfo *MRI) {
  592. // No predecessor at the beginning of a trace. Ignore dependencies.
  593. if (!Pred)
  594. return;
  595. assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
  596. for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
  597. if (UseMI->getOperand(i + 1).getMBB() == Pred) {
  598. unsigned Reg = UseMI->getOperand(i).getReg();
  599. Deps.push_back(DataDep(MRI, Reg, i));
  600. return;
  601. }
  602. }
  603. }
  604. // Keep track of physreg data dependencies by recording each live register unit.
  605. // Associate each regunit with an instruction operand. Depending on the
  606. // direction instructions are scanned, it could be the operand that defined the
  607. // regunit, or the highest operand to read the regunit.
  608. namespace {
  609. struct LiveRegUnit {
  610. unsigned RegUnit;
  611. unsigned Cycle;
  612. const MachineInstr *MI;
  613. unsigned Op;
  614. unsigned getSparseSetIndex() const { return RegUnit; }
  615. LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
  616. };
  617. }
  618. // Identify physreg dependencies for UseMI, and update the live regunit
  619. // tracking set when scanning instructions downwards.
  620. static void updatePhysDepsDownwards(const MachineInstr *UseMI,
  621. SmallVectorImpl<DataDep> &Deps,
  622. SparseSet<LiveRegUnit> &RegUnits,
  623. const TargetRegisterInfo *TRI) {
  624. SmallVector<unsigned, 8> Kills;
  625. SmallVector<unsigned, 8> LiveDefOps;
  626. for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
  627. ME = UseMI->operands_end(); MI != ME; ++MI) {
  628. const MachineOperand &MO = *MI;
  629. if (!MO.isReg())
  630. continue;
  631. unsigned Reg = MO.getReg();
  632. if (!TargetRegisterInfo::isPhysicalRegister(Reg))
  633. continue;
  634. // Track live defs and kills for updating RegUnits.
  635. if (MO.isDef()) {
  636. if (MO.isDead())
  637. Kills.push_back(Reg);
  638. else
  639. LiveDefOps.push_back(UseMI->getOperandNo(MI));
  640. } else if (MO.isKill())
  641. Kills.push_back(Reg);
  642. // Identify dependencies.
  643. if (!MO.readsReg())
  644. continue;
  645. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  646. SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
  647. if (I == RegUnits.end())
  648. continue;
  649. Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
  650. break;
  651. }
  652. }
  653. // Update RegUnits to reflect live registers after UseMI.
  654. // First kills.
  655. for (unsigned i = 0, e = Kills.size(); i != e; ++i)
  656. for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
  657. RegUnits.erase(*Units);
  658. // Second, live defs.
  659. for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
  660. unsigned DefOp = LiveDefOps[i];
  661. for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
  662. Units.isValid(); ++Units) {
  663. LiveRegUnit &LRU = RegUnits[*Units];
  664. LRU.MI = UseMI;
  665. LRU.Op = DefOp;
  666. }
  667. }
  668. }
  669. /// The length of the critical path through a trace is the maximum of two path
  670. /// lengths:
  671. ///
  672. /// 1. The maximum height+depth over all instructions in the trace center block.
  673. ///
  674. /// 2. The longest cross-block dependency chain. For small blocks, it is
  675. /// possible that the critical path through the trace doesn't include any
  676. /// instructions in the block.
  677. ///
  678. /// This function computes the second number from the live-in list of the
  679. /// center block.
  680. unsigned MachineTraceMetrics::Ensemble::
  681. computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
  682. assert(TBI.HasValidInstrDepths && "Missing depth info");
  683. assert(TBI.HasValidInstrHeights && "Missing height info");
  684. unsigned MaxLen = 0;
  685. for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
  686. const LiveInReg &LIR = TBI.LiveIns[i];
  687. if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
  688. continue;
  689. const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
  690. // Ignore dependencies outside the current trace.
  691. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
  692. if (!DefTBI.isUsefulDominator(TBI))
  693. continue;
  694. unsigned Len = LIR.Height + Cycles[DefMI].Depth;
  695. MaxLen = std::max(MaxLen, Len);
  696. }
  697. return MaxLen;
  698. }
  699. /// Compute instruction depths for all instructions above or in MBB in its
  700. /// trace. This assumes that the trace through MBB has already been computed.
  701. void MachineTraceMetrics::Ensemble::
  702. computeInstrDepths(const MachineBasicBlock *MBB) {
  703. // The top of the trace may already be computed, and HasValidInstrDepths
  704. // implies Head->HasValidInstrDepths, so we only need to start from the first
  705. // block in the trace that needs to be recomputed.
  706. SmallVector<const MachineBasicBlock*, 8> Stack;
  707. do {
  708. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  709. assert(TBI.hasValidDepth() && "Incomplete trace");
  710. if (TBI.HasValidInstrDepths)
  711. break;
  712. Stack.push_back(MBB);
  713. MBB = TBI.Pred;
  714. } while (MBB);
  715. // FIXME: If MBB is non-null at this point, it is the last pre-computed block
  716. // in the trace. We should track any live-out physregs that were defined in
  717. // the trace. This is quite rare in SSA form, typically created by CSE
  718. // hoisting a compare.
  719. SparseSet<LiveRegUnit> RegUnits;
  720. RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
  721. // Go through trace blocks in top-down order, stopping after the center block.
  722. SmallVector<DataDep, 8> Deps;
  723. while (!Stack.empty()) {
  724. MBB = Stack.pop_back_val();
  725. DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
  726. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  727. TBI.HasValidInstrDepths = true;
  728. TBI.CriticalPath = 0;
  729. // Print out resource depths here as well.
  730. DEBUG({
  731. dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
  732. ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
  733. for (unsigned K = 0; K != PRDepths.size(); ++K)
  734. if (PRDepths[K]) {
  735. unsigned Factor = MTM.SchedModel.getResourceFactor(K);
  736. dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
  737. << MTM.SchedModel.getProcResource(K)->Name << " ("
  738. << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
  739. }
  740. });
  741. // Also compute the critical path length through MBB when possible.
  742. if (TBI.HasValidInstrHeights)
  743. TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
  744. for (const auto &UseMI : *MBB) {
  745. // Collect all data dependencies.
  746. Deps.clear();
  747. if (UseMI.isPHI())
  748. getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
  749. else if (getDataDeps(&UseMI, Deps, MTM.MRI))
  750. updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
  751. // Filter and process dependencies, computing the earliest issue cycle.
  752. unsigned Cycle = 0;
  753. for (const DataDep &Dep : Deps) {
  754. const TraceBlockInfo&DepTBI =
  755. BlockInfo[Dep.DefMI->getParent()->getNumber()];
  756. // Ignore dependencies from outside the current trace.
  757. if (!DepTBI.isUsefulDominator(TBI))
  758. continue;
  759. assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
  760. unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
  761. // Add latency if DefMI is a real instruction. Transients get latency 0.
  762. if (!Dep.DefMI->isTransient())
  763. DepCycle += MTM.SchedModel
  764. .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
  765. Cycle = std::max(Cycle, DepCycle);
  766. }
  767. // Remember the instruction depth.
  768. InstrCycles &MICycles = Cycles[&UseMI];
  769. MICycles.Depth = Cycle;
  770. if (!TBI.HasValidInstrHeights) {
  771. DEBUG(dbgs() << Cycle << '\t' << UseMI);
  772. continue;
  773. }
  774. // Update critical path length.
  775. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
  776. DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
  777. }
  778. }
  779. }
  780. // Identify physreg dependencies for MI when scanning instructions upwards.
  781. // Return the issue height of MI after considering any live regunits.
  782. // Height is the issue height computed from virtual register dependencies alone.
  783. static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
  784. SparseSet<LiveRegUnit> &RegUnits,
  785. const TargetSchedModel &SchedModel,
  786. const TargetInstrInfo *TII,
  787. const TargetRegisterInfo *TRI) {
  788. SmallVector<unsigned, 8> ReadOps;
  789. for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
  790. MOE = MI->operands_end(); MOI != MOE; ++MOI) {
  791. const MachineOperand &MO = *MOI;
  792. if (!MO.isReg())
  793. continue;
  794. unsigned Reg = MO.getReg();
  795. if (!TargetRegisterInfo::isPhysicalRegister(Reg))
  796. continue;
  797. if (MO.readsReg())
  798. ReadOps.push_back(MI->getOperandNo(MOI));
  799. if (!MO.isDef())
  800. continue;
  801. // This is a def of Reg. Remove corresponding entries from RegUnits, and
  802. // update MI Height to consider the physreg dependencies.
  803. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  804. SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
  805. if (I == RegUnits.end())
  806. continue;
  807. unsigned DepHeight = I->Cycle;
  808. if (!MI->isTransient()) {
  809. // We may not know the UseMI of this dependency, if it came from the
  810. // live-in list. SchedModel can handle a NULL UseMI.
  811. DepHeight += SchedModel
  812. .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op);
  813. }
  814. Height = std::max(Height, DepHeight);
  815. // This regunit is dead above MI.
  816. RegUnits.erase(I);
  817. }
  818. }
  819. // Now we know the height of MI. Update any regunits read.
  820. for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
  821. unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
  822. for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
  823. LiveRegUnit &LRU = RegUnits[*Units];
  824. // Set the height to the highest reader of the unit.
  825. if (LRU.Cycle <= Height && LRU.MI != MI) {
  826. LRU.Cycle = Height;
  827. LRU.MI = MI;
  828. LRU.Op = ReadOps[i];
  829. }
  830. }
  831. }
  832. return Height;
  833. }
  834. typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
  835. // Push the height of DefMI upwards if required to match UseMI.
  836. // Return true if this is the first time DefMI was seen.
  837. static bool pushDepHeight(const DataDep &Dep,
  838. const MachineInstr *UseMI, unsigned UseHeight,
  839. MIHeightMap &Heights,
  840. const TargetSchedModel &SchedModel,
  841. const TargetInstrInfo *TII) {
  842. // Adjust height by Dep.DefMI latency.
  843. if (!Dep.DefMI->isTransient())
  844. UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
  845. UseMI, Dep.UseOp);
  846. // Update Heights[DefMI] to be the maximum height seen.
  847. MIHeightMap::iterator I;
  848. bool New;
  849. std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
  850. if (New)
  851. return true;
  852. // DefMI has been pushed before. Give it the max height.
  853. if (I->second < UseHeight)
  854. I->second = UseHeight;
  855. return false;
  856. }
  857. /// Assuming that the virtual register defined by DefMI:DefOp was used by
  858. /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
  859. /// when reaching the block that contains DefMI.
  860. void MachineTraceMetrics::Ensemble::
  861. addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
  862. ArrayRef<const MachineBasicBlock*> Trace) {
  863. assert(!Trace.empty() && "Trace should contain at least one block");
  864. unsigned Reg = DefMI->getOperand(DefOp).getReg();
  865. assert(TargetRegisterInfo::isVirtualRegister(Reg));
  866. const MachineBasicBlock *DefMBB = DefMI->getParent();
  867. // Reg is live-in to all blocks in Trace that follow DefMBB.
  868. for (unsigned i = Trace.size(); i; --i) {
  869. const MachineBasicBlock *MBB = Trace[i-1];
  870. if (MBB == DefMBB)
  871. return;
  872. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  873. // Just add the register. The height will be updated later.
  874. TBI.LiveIns.push_back(Reg);
  875. }
  876. }
  877. /// Compute instruction heights in the trace through MBB. This updates MBB and
  878. /// the blocks below it in the trace. It is assumed that the trace has already
  879. /// been computed.
  880. void MachineTraceMetrics::Ensemble::
  881. computeInstrHeights(const MachineBasicBlock *MBB) {
  882. // The bottom of the trace may already be computed.
  883. // Find the blocks that need updating.
  884. SmallVector<const MachineBasicBlock*, 8> Stack;
  885. do {
  886. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  887. assert(TBI.hasValidHeight() && "Incomplete trace");
  888. if (TBI.HasValidInstrHeights)
  889. break;
  890. Stack.push_back(MBB);
  891. TBI.LiveIns.clear();
  892. MBB = TBI.Succ;
  893. } while (MBB);
  894. // As we move upwards in the trace, keep track of instructions that are
  895. // required by deeper trace instructions. Map MI -> height required so far.
  896. MIHeightMap Heights;
  897. // For physregs, the def isn't known when we see the use.
  898. // Instead, keep track of the highest use of each regunit.
  899. SparseSet<LiveRegUnit> RegUnits;
  900. RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
  901. // If the bottom of the trace was already precomputed, initialize heights
  902. // from its live-in list.
  903. // MBB is the highest precomputed block in the trace.
  904. if (MBB) {
  905. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  906. for (LiveInReg &LI : TBI.LiveIns) {
  907. if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
  908. // For virtual registers, the def latency is included.
  909. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
  910. if (Height < LI.Height)
  911. Height = LI.Height;
  912. } else {
  913. // For register units, the def latency is not included because we don't
  914. // know the def yet.
  915. RegUnits[LI.Reg].Cycle = LI.Height;
  916. }
  917. }
  918. }
  919. // Go through the trace blocks in bottom-up order.
  920. SmallVector<DataDep, 8> Deps;
  921. for (;!Stack.empty(); Stack.pop_back()) {
  922. MBB = Stack.back();
  923. DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
  924. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  925. TBI.HasValidInstrHeights = true;
  926. TBI.CriticalPath = 0;
  927. DEBUG({
  928. dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
  929. ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
  930. for (unsigned K = 0; K != PRHeights.size(); ++K)
  931. if (PRHeights[K]) {
  932. unsigned Factor = MTM.SchedModel.getResourceFactor(K);
  933. dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
  934. << MTM.SchedModel.getProcResource(K)->Name << " ("
  935. << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
  936. }
  937. });
  938. // Get dependencies from PHIs in the trace successor.
  939. const MachineBasicBlock *Succ = TBI.Succ;
  940. // If MBB is the last block in the trace, and it has a back-edge to the
  941. // loop header, get loop-carried dependencies from PHIs in the header. For
  942. // that purpose, pretend that all the loop header PHIs have height 0.
  943. if (!Succ)
  944. if (const MachineLoop *Loop = getLoopFor(MBB))
  945. if (MBB->isSuccessor(Loop->getHeader()))
  946. Succ = Loop->getHeader();
  947. if (Succ) {
  948. for (const auto &PHI : *Succ) {
  949. if (!PHI.isPHI())
  950. break;
  951. Deps.clear();
  952. getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
  953. if (!Deps.empty()) {
  954. // Loop header PHI heights are all 0.
  955. unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
  956. DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
  957. if (pushDepHeight(Deps.front(), &PHI, Height,
  958. Heights, MTM.SchedModel, MTM.TII))
  959. addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
  960. }
  961. }
  962. }
  963. // Go through the block backwards.
  964. for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
  965. BI != BB;) {
  966. const MachineInstr *MI = --BI;
  967. // Find the MI height as determined by virtual register uses in the
  968. // trace below.
  969. unsigned Cycle = 0;
  970. MIHeightMap::iterator HeightI = Heights.find(MI);
  971. if (HeightI != Heights.end()) {
  972. Cycle = HeightI->second;
  973. // We won't be seeing any more MI uses.
  974. Heights.erase(HeightI);
  975. }
  976. // Don't process PHI deps. They depend on the specific predecessor, and
  977. // we'll get them when visiting the predecessor.
  978. Deps.clear();
  979. bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
  980. // There may also be regunit dependencies to include in the height.
  981. if (HasPhysRegs)
  982. Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
  983. MTM.SchedModel, MTM.TII, MTM.TRI);
  984. // Update the required height of any virtual registers read by MI.
  985. for (const DataDep &Dep : Deps)
  986. if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
  987. addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
  988. InstrCycles &MICycles = Cycles[MI];
  989. MICycles.Height = Cycle;
  990. if (!TBI.HasValidInstrDepths) {
  991. DEBUG(dbgs() << Cycle << '\t' << *MI);
  992. continue;
  993. }
  994. // Update critical path length.
  995. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
  996. DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
  997. }
  998. // Update virtual live-in heights. They were added by addLiveIns() with a 0
  999. // height because the final height isn't known until now.
  1000. DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
  1001. for (LiveInReg &LIR : TBI.LiveIns) {
  1002. const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
  1003. LIR.Height = Heights.lookup(DefMI);
  1004. DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
  1005. }
  1006. // Transfer the live regunits to the live-in list.
  1007. for (SparseSet<LiveRegUnit>::const_iterator
  1008. RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
  1009. TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
  1010. DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
  1011. << '@' << RI->Cycle);
  1012. }
  1013. DEBUG(dbgs() << '\n');
  1014. if (!TBI.HasValidInstrDepths)
  1015. continue;
  1016. // Add live-ins to the critical path length.
  1017. TBI.CriticalPath = std::max(TBI.CriticalPath,
  1018. computeCrossBlockCriticalPath(TBI));
  1019. DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
  1020. }
  1021. }
  1022. MachineTraceMetrics::Trace
  1023. MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
  1024. TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
  1025. if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
  1026. computeTrace(MBB);
  1027. if (!TBI.HasValidInstrDepths)
  1028. computeInstrDepths(MBB);
  1029. if (!TBI.HasValidInstrHeights)
  1030. computeInstrHeights(MBB);
  1031. return Trace(*this, TBI);
  1032. }
  1033. unsigned
  1034. MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
  1035. assert(MI && "Not an instruction.");
  1036. assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
  1037. "MI must be in the trace center block");
  1038. InstrCycles Cyc = getInstrCycles(MI);
  1039. return getCriticalPath() - (Cyc.Depth + Cyc.Height);
  1040. }
  1041. unsigned
  1042. MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
  1043. const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
  1044. SmallVector<DataDep, 1> Deps;
  1045. getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
  1046. assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
  1047. DataDep &Dep = Deps.front();
  1048. unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
  1049. // Add latency if DefMI is a real instruction. Transients get latency 0.
  1050. if (!Dep.DefMI->isTransient())
  1051. DepCycle += TE.MTM.SchedModel
  1052. .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
  1053. return DepCycle;
  1054. }
  1055. /// When bottom is set include instructions in current block in estimate.
  1056. unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
  1057. // Find the limiting processor resource.
  1058. // Numbers have been pre-scaled to be comparable.
  1059. unsigned PRMax = 0;
  1060. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  1061. if (Bottom) {
  1062. ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
  1063. for (unsigned K = 0; K != PRDepths.size(); ++K)
  1064. PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
  1065. } else {
  1066. for (unsigned K = 0; K != PRDepths.size(); ++K)
  1067. PRMax = std::max(PRMax, PRDepths[K]);
  1068. }
  1069. // Convert to cycle count.
  1070. PRMax = TE.MTM.getCycles(PRMax);
  1071. /// All instructions before current block
  1072. unsigned Instrs = TBI.InstrDepth;
  1073. // plus instructions in current block
  1074. if (Bottom)
  1075. Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
  1076. if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
  1077. Instrs /= IW;
  1078. // Assume issue width 1 without a schedule model.
  1079. return std::max(Instrs, PRMax);
  1080. }
  1081. unsigned MachineTraceMetrics::Trace::getResourceLength(
  1082. ArrayRef<const MachineBasicBlock *> Extrablocks,
  1083. ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
  1084. ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
  1085. // Add up resources above and below the center block.
  1086. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
  1087. ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
  1088. unsigned PRMax = 0;
  1089. // Capture computing cycles from extra instructions
  1090. auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
  1091. unsigned ResourceIdx)
  1092. ->unsigned {
  1093. unsigned Cycles = 0;
  1094. for (const MCSchedClassDesc *SC : Instrs) {
  1095. if (!SC->isValid())
  1096. continue;
  1097. for (TargetSchedModel::ProcResIter
  1098. PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
  1099. PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
  1100. PI != PE; ++PI) {
  1101. if (PI->ProcResourceIdx != ResourceIdx)
  1102. continue;
  1103. Cycles +=
  1104. (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
  1105. }
  1106. }
  1107. return Cycles;
  1108. };
  1109. for (unsigned K = 0; K != PRDepths.size(); ++K) {
  1110. unsigned PRCycles = PRDepths[K] + PRHeights[K];
  1111. for (const MachineBasicBlock *MBB : Extrablocks)
  1112. PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
  1113. PRCycles += extraCycles(ExtraInstrs, K);
  1114. PRCycles -= extraCycles(RemoveInstrs, K);
  1115. PRMax = std::max(PRMax, PRCycles);
  1116. }
  1117. // Convert to cycle count.
  1118. PRMax = TE.MTM.getCycles(PRMax);
  1119. // Instrs: #instructions in current trace outside current block.
  1120. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
  1121. // Add instruction count from the extra blocks.
  1122. for (const MachineBasicBlock *MBB : Extrablocks)
  1123. Instrs += TE.MTM.getResources(MBB)->InstrCount;
  1124. Instrs += ExtraInstrs.size();
  1125. Instrs -= RemoveInstrs.size();
  1126. if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
  1127. Instrs /= IW;
  1128. // Assume issue width 1 without a schedule model.
  1129. return std::max(Instrs, PRMax);
  1130. }
  1131. bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
  1132. const MachineInstr *UseMI) const {
  1133. if (DefMI->getParent() == UseMI->getParent())
  1134. return true;
  1135. const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
  1136. const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
  1137. return DepTBI.isUsefulDominator(TBI);
  1138. }
  1139. void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
  1140. OS << getName() << " ensemble:\n";
  1141. for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
  1142. OS << " BB#" << i << '\t';
  1143. BlockInfo[i].print(OS);
  1144. OS << '\n';
  1145. }
  1146. }
  1147. void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
  1148. if (hasValidDepth()) {
  1149. OS << "depth=" << InstrDepth;
  1150. if (Pred)
  1151. OS << " pred=BB#" << Pred->getNumber();
  1152. else
  1153. OS << " pred=null";
  1154. OS << " head=BB#" << Head;
  1155. if (HasValidInstrDepths)
  1156. OS << " +instrs";
  1157. } else
  1158. OS << "depth invalid";
  1159. OS << ", ";
  1160. if (hasValidHeight()) {
  1161. OS << "height=" << InstrHeight;
  1162. if (Succ)
  1163. OS << " succ=BB#" << Succ->getNumber();
  1164. else
  1165. OS << " succ=null";
  1166. OS << " tail=BB#" << Tail;
  1167. if (HasValidInstrHeights)
  1168. OS << " +instrs";
  1169. } else
  1170. OS << "height invalid";
  1171. if (HasValidInstrDepths && HasValidInstrHeights)
  1172. OS << ", crit=" << CriticalPath;
  1173. }
  1174. void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
  1175. unsigned MBBNum = &TBI - &TE.BlockInfo[0];
  1176. OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
  1177. << " --> BB#" << TBI.Tail << ':';
  1178. if (TBI.hasValidHeight() && TBI.hasValidDepth())
  1179. OS << ' ' << getInstrCount() << " instrs.";
  1180. if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
  1181. OS << ' ' << TBI.CriticalPath << " cycles.";
  1182. const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
  1183. OS << "\nBB#" << MBBNum;
  1184. while (Block->hasValidDepth() && Block->Pred) {
  1185. unsigned Num = Block->Pred->getNumber();
  1186. OS << " <- BB#" << Num;
  1187. Block = &TE.BlockInfo[Num];
  1188. }
  1189. Block = &TBI;
  1190. OS << "\n ";
  1191. while (Block->hasValidHeight() && Block->Succ) {
  1192. unsigned Num = Block->Succ->getNumber();
  1193. OS << " -> BB#" << Num;
  1194. Block = &TE.BlockInfo[Num];
  1195. }
  1196. OS << '\n';
  1197. }