2
0

MachineLICM.cpp 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469
  1. //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass performs loop invariant code motion on machine instructions. We
  11. // attempt to remove as much code from the body of a loop as possible.
  12. //
  13. // This pass is not intended to be a replacement or a complete alternative
  14. // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
  15. // constructs that are not exposed before lowering and instruction selection.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #include "llvm/CodeGen/Passes.h"
  19. #include "llvm/ADT/DenseMap.h"
  20. #include "llvm/ADT/SmallSet.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/Analysis/AliasAnalysis.h"
  23. #include "llvm/CodeGen/MachineDominators.h"
  24. #include "llvm/CodeGen/MachineFrameInfo.h"
  25. #include "llvm/CodeGen/MachineLoopInfo.h"
  26. #include "llvm/CodeGen/MachineMemOperand.h"
  27. #include "llvm/CodeGen/MachineRegisterInfo.h"
  28. #include "llvm/CodeGen/PseudoSourceValue.h"
  29. #include "llvm/CodeGen/TargetSchedule.h"
  30. #include "llvm/Support/CommandLine.h"
  31. #include "llvm/Support/Debug.h"
  32. #include "llvm/Support/raw_ostream.h"
  33. #include "llvm/Target/TargetInstrInfo.h"
  34. #include "llvm/Target/TargetLowering.h"
  35. #include "llvm/Target/TargetMachine.h"
  36. #include "llvm/Target/TargetRegisterInfo.h"
  37. #include "llvm/Target/TargetSubtargetInfo.h"
  38. using namespace llvm;
  39. #define DEBUG_TYPE "machine-licm"
  40. static cl::opt<bool>
  41. AvoidSpeculation("avoid-speculation",
  42. cl::desc("MachineLICM should avoid speculation"),
  43. cl::init(true), cl::Hidden);
  44. static cl::opt<bool>
  45. HoistCheapInsts("hoist-cheap-insts",
  46. cl::desc("MachineLICM should hoist even cheap instructions"),
  47. cl::init(false), cl::Hidden);
  48. static cl::opt<bool>
  49. SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
  50. cl::desc("MachineLICM should sink instructions into "
  51. "loops to avoid register spills"),
  52. cl::init(false), cl::Hidden);
  53. STATISTIC(NumHoisted,
  54. "Number of machine instructions hoisted out of loops");
  55. STATISTIC(NumLowRP,
  56. "Number of instructions hoisted in low reg pressure situation");
  57. STATISTIC(NumHighLatency,
  58. "Number of high latency instructions hoisted");
  59. STATISTIC(NumCSEed,
  60. "Number of hoisted machine instructions CSEed");
  61. STATISTIC(NumPostRAHoisted,
  62. "Number of machine instructions hoisted out of loops post regalloc");
  63. namespace {
  64. class MachineLICM : public MachineFunctionPass {
  65. const TargetInstrInfo *TII;
  66. const TargetLoweringBase *TLI;
  67. const TargetRegisterInfo *TRI;
  68. const MachineFrameInfo *MFI;
  69. MachineRegisterInfo *MRI;
  70. TargetSchedModel SchedModel;
  71. bool PreRegAlloc;
  72. // Various analyses that we use...
  73. AliasAnalysis *AA; // Alias analysis info.
  74. MachineLoopInfo *MLI; // Current MachineLoopInfo
  75. MachineDominatorTree *DT; // Machine dominator tree for the cur loop
  76. // State that is updated as we process loops
  77. bool Changed; // True if a loop is changed.
  78. bool FirstInLoop; // True if it's the first LICM in the loop.
  79. MachineLoop *CurLoop; // The current loop we are working on.
  80. MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
  81. // Exit blocks for CurLoop.
  82. SmallVector<MachineBasicBlock*, 8> ExitBlocks;
  83. bool isExitBlock(const MachineBasicBlock *MBB) const {
  84. return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
  85. ExitBlocks.end();
  86. }
  87. // Track 'estimated' register pressure.
  88. SmallSet<unsigned, 32> RegSeen;
  89. SmallVector<unsigned, 8> RegPressure;
  90. // Register pressure "limit" per register pressure set. If the pressure
  91. // is higher than the limit, then it's considered high.
  92. SmallVector<unsigned, 8> RegLimit;
  93. // Register pressure on path leading from loop preheader to current BB.
  94. SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
  95. // For each opcode, keep a list of potential CSE instructions.
  96. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
  97. enum {
  98. SpeculateFalse = 0,
  99. SpeculateTrue = 1,
  100. SpeculateUnknown = 2
  101. };
  102. // If a MBB does not dominate loop exiting blocks then it may not safe
  103. // to hoist loads from this block.
  104. // Tri-state: 0 - false, 1 - true, 2 - unknown
  105. unsigned SpeculationState;
  106. public:
  107. static char ID; // Pass identification, replacement for typeid
  108. MachineLICM() :
  109. MachineFunctionPass(ID), PreRegAlloc(true) {
  110. initializeMachineLICMPass(*PassRegistry::getPassRegistry());
  111. }
  112. explicit MachineLICM(bool PreRA) :
  113. MachineFunctionPass(ID), PreRegAlloc(PreRA) {
  114. initializeMachineLICMPass(*PassRegistry::getPassRegistry());
  115. }
  116. bool runOnMachineFunction(MachineFunction &MF) override;
  117. void getAnalysisUsage(AnalysisUsage &AU) const override {
  118. AU.addRequired<MachineLoopInfo>();
  119. AU.addRequired<MachineDominatorTree>();
  120. AU.addRequired<AliasAnalysis>();
  121. AU.addPreserved<MachineLoopInfo>();
  122. AU.addPreserved<MachineDominatorTree>();
  123. MachineFunctionPass::getAnalysisUsage(AU);
  124. }
  125. void releaseMemory() override {
  126. RegSeen.clear();
  127. RegPressure.clear();
  128. RegLimit.clear();
  129. BackTrace.clear();
  130. CSEMap.clear();
  131. }
  132. private:
  133. /// CandidateInfo - Keep track of information about hoisting candidates.
  134. struct CandidateInfo {
  135. MachineInstr *MI;
  136. unsigned Def;
  137. int FI;
  138. CandidateInfo(MachineInstr *mi, unsigned def, int fi)
  139. : MI(mi), Def(def), FI(fi) {}
  140. };
  141. /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
  142. /// invariants out to the preheader.
  143. void HoistRegionPostRA();
  144. /// HoistPostRA - When an instruction is found to only use loop invariant
  145. /// operands that is safe to hoist, this instruction is called to do the
  146. /// dirty work.
  147. void HoistPostRA(MachineInstr *MI, unsigned Def);
  148. /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
  149. /// gather register def and frame object update information.
  150. void ProcessMI(MachineInstr *MI,
  151. BitVector &PhysRegDefs,
  152. BitVector &PhysRegClobbers,
  153. SmallSet<int, 32> &StoredFIs,
  154. SmallVectorImpl<CandidateInfo> &Candidates);
  155. /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
  156. /// current loop.
  157. void AddToLiveIns(unsigned Reg);
  158. /// IsLICMCandidate - Returns true if the instruction may be a suitable
  159. /// candidate for LICM. e.g. If the instruction is a call, then it's
  160. /// obviously not safe to hoist it.
  161. bool IsLICMCandidate(MachineInstr &I);
  162. /// IsLoopInvariantInst - Returns true if the instruction is loop
  163. /// invariant. I.e., all virtual register operands are defined outside of
  164. /// the loop, physical registers aren't accessed (explicitly or implicitly),
  165. /// and the instruction is hoistable.
  166. ///
  167. bool IsLoopInvariantInst(MachineInstr &I);
  168. /// HasLoopPHIUse - Return true if the specified instruction is used by any
  169. /// phi node in the current loop.
  170. bool HasLoopPHIUse(const MachineInstr *MI) const;
  171. /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
  172. /// and an use in the current loop, return true if the target considered
  173. /// it 'high'.
  174. bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
  175. unsigned Reg) const;
  176. bool IsCheapInstruction(MachineInstr &MI) const;
  177. /// CanCauseHighRegPressure - Visit BBs from header to current BB,
  178. /// check if hoisting an instruction of the given cost matrix can cause high
  179. /// register pressure.
  180. bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
  181. bool Cheap);
  182. /// UpdateBackTraceRegPressure - Traverse the back trace from header to
  183. /// the current block and update their register pressures to reflect the
  184. /// effect of hoisting MI from the current block to the preheader.
  185. void UpdateBackTraceRegPressure(const MachineInstr *MI);
  186. /// IsProfitableToHoist - Return true if it is potentially profitable to
  187. /// hoist the given loop invariant.
  188. bool IsProfitableToHoist(MachineInstr &MI);
  189. /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
  190. /// If not then a load from this mbb may not be safe to hoist.
  191. bool IsGuaranteedToExecute(MachineBasicBlock *BB);
  192. void EnterScope(MachineBasicBlock *MBB);
  193. void ExitScope(MachineBasicBlock *MBB);
  194. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
  195. /// dominator tree node if its a leaf or all of its children are done. Walk
  196. /// up the dominator tree to destroy ancestors which are now done.
  197. void ExitScopeIfDone(MachineDomTreeNode *Node,
  198. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
  199. DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
  200. /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
  201. /// blocks dominated by the specified header block, and that are in the
  202. /// current loop) in depth first order w.r.t the DominatorTree. This allows
  203. /// us to visit definitions before uses, allowing us to hoist a loop body in
  204. /// one pass without iteration.
  205. ///
  206. void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
  207. void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
  208. /// SinkIntoLoop - Sink instructions into loops if profitable. This
  209. /// especially tries to prevent register spills caused by register pressure
  210. /// if there is little to no overhead moving instructions into loops.
  211. void SinkIntoLoop();
  212. /// InitRegPressure - Find all virtual register references that are liveout
  213. /// of the preheader to initialize the starting "register pressure". Note
  214. /// this does not count live through (livein but not used) registers.
  215. void InitRegPressure(MachineBasicBlock *BB);
  216. /// calcRegisterCost - Calculate the additional register pressure that the
  217. /// registers used in MI cause.
  218. ///
  219. /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
  220. /// figure out which usages are live-ins.
  221. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
  222. DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
  223. bool ConsiderSeen,
  224. bool ConsiderUnseenAsDef);
  225. /// UpdateRegPressure - Update estimate of register pressure after the
  226. /// specified instruction.
  227. void UpdateRegPressure(const MachineInstr *MI,
  228. bool ConsiderUnseenAsDef = false);
  229. /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
  230. /// the load itself could be hoisted. Return the unfolded and hoistable
  231. /// load, or null if the load couldn't be unfolded or if it wouldn't
  232. /// be hoistable.
  233. MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
  234. /// LookForDuplicate - Find an instruction amount PrevMIs that is a
  235. /// duplicate of MI. Return this instruction if it's found.
  236. const MachineInstr *LookForDuplicate(const MachineInstr *MI,
  237. std::vector<const MachineInstr*> &PrevMIs);
  238. /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
  239. /// the preheader that compute the same value. If it's found, do a RAU on
  240. /// with the definition of the existing instruction rather than hoisting
  241. /// the instruction to the preheader.
  242. bool EliminateCSE(MachineInstr *MI,
  243. DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
  244. /// MayCSE - Return true if the given instruction will be CSE'd if it's
  245. /// hoisted out of the loop.
  246. bool MayCSE(MachineInstr *MI);
  247. /// Hoist - When an instruction is found to only use loop invariant operands
  248. /// that is safe to hoist, this instruction is called to do the dirty work.
  249. /// It returns true if the instruction is hoisted.
  250. bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
  251. /// InitCSEMap - Initialize the CSE map with instructions that are in the
  252. /// current loop preheader that may become duplicates of instructions that
  253. /// are hoisted out of the loop.
  254. void InitCSEMap(MachineBasicBlock *BB);
  255. /// getCurPreheader - Get the preheader for the current loop, splitting
  256. /// a critical edge if needed.
  257. MachineBasicBlock *getCurPreheader();
  258. };
  259. } // end anonymous namespace
  260. char MachineLICM::ID = 0;
  261. char &llvm::MachineLICMID = MachineLICM::ID;
  262. INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
  263. "Machine Loop Invariant Code Motion", false, false)
  264. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  265. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  266. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  267. INITIALIZE_PASS_END(MachineLICM, "machinelicm",
  268. "Machine Loop Invariant Code Motion", false, false)
  269. /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
  270. /// loop that has a unique predecessor.
  271. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
  272. // Check whether this loop even has a unique predecessor.
  273. if (!CurLoop->getLoopPredecessor())
  274. return false;
  275. // Ok, now check to see if any of its outer loops do.
  276. for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
  277. if (L->getLoopPredecessor())
  278. return false;
  279. // None of them did, so this is the outermost with a unique predecessor.
  280. return true;
  281. }
  282. bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
  283. if (skipOptnoneFunction(*MF.getFunction()))
  284. return false;
  285. Changed = FirstInLoop = false;
  286. const TargetSubtargetInfo &ST = MF.getSubtarget();
  287. TII = ST.getInstrInfo();
  288. TLI = ST.getTargetLowering();
  289. TRI = ST.getRegisterInfo();
  290. MFI = MF.getFrameInfo();
  291. MRI = &MF.getRegInfo();
  292. SchedModel.init(ST.getSchedModel(), &ST, TII);
  293. PreRegAlloc = MRI->isSSA();
  294. if (PreRegAlloc)
  295. DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
  296. else
  297. DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
  298. DEBUG(dbgs() << MF.getName() << " ********\n");
  299. if (PreRegAlloc) {
  300. // Estimate register pressure during pre-regalloc pass.
  301. unsigned NumRPS = TRI->getNumRegPressureSets();
  302. RegPressure.resize(NumRPS);
  303. std::fill(RegPressure.begin(), RegPressure.end(), 0);
  304. RegLimit.resize(NumRPS);
  305. for (unsigned i = 0, e = NumRPS; i != e; ++i)
  306. RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
  307. }
  308. // Get our Loop information...
  309. MLI = &getAnalysis<MachineLoopInfo>();
  310. DT = &getAnalysis<MachineDominatorTree>();
  311. AA = &getAnalysis<AliasAnalysis>();
  312. SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
  313. while (!Worklist.empty()) {
  314. CurLoop = Worklist.pop_back_val();
  315. CurPreheader = nullptr;
  316. ExitBlocks.clear();
  317. // If this is done before regalloc, only visit outer-most preheader-sporting
  318. // loops.
  319. if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
  320. Worklist.append(CurLoop->begin(), CurLoop->end());
  321. continue;
  322. }
  323. CurLoop->getExitBlocks(ExitBlocks);
  324. if (!PreRegAlloc)
  325. HoistRegionPostRA();
  326. else {
  327. // CSEMap is initialized for loop header when the first instruction is
  328. // being hoisted.
  329. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
  330. FirstInLoop = true;
  331. HoistOutOfLoop(N);
  332. CSEMap.clear();
  333. if (SinkInstsToAvoidSpills)
  334. SinkIntoLoop();
  335. }
  336. }
  337. return Changed;
  338. }
  339. /// InstructionStoresToFI - Return true if instruction stores to the
  340. /// specified frame.
  341. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
  342. for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
  343. oe = MI->memoperands_end(); o != oe; ++o) {
  344. if (!(*o)->isStore() || !(*o)->getPseudoValue())
  345. continue;
  346. if (const FixedStackPseudoSourceValue *Value =
  347. dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
  348. if (Value->getFrameIndex() == FI)
  349. return true;
  350. }
  351. }
  352. return false;
  353. }
  354. /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
  355. /// gather register def and frame object update information.
  356. void MachineLICM::ProcessMI(MachineInstr *MI,
  357. BitVector &PhysRegDefs,
  358. BitVector &PhysRegClobbers,
  359. SmallSet<int, 32> &StoredFIs,
  360. SmallVectorImpl<CandidateInfo> &Candidates) {
  361. bool RuledOut = false;
  362. bool HasNonInvariantUse = false;
  363. unsigned Def = 0;
  364. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  365. const MachineOperand &MO = MI->getOperand(i);
  366. if (MO.isFI()) {
  367. // Remember if the instruction stores to the frame index.
  368. int FI = MO.getIndex();
  369. if (!StoredFIs.count(FI) &&
  370. MFI->isSpillSlotObjectIndex(FI) &&
  371. InstructionStoresToFI(MI, FI))
  372. StoredFIs.insert(FI);
  373. HasNonInvariantUse = true;
  374. continue;
  375. }
  376. // We can't hoist an instruction defining a physreg that is clobbered in
  377. // the loop.
  378. if (MO.isRegMask()) {
  379. PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
  380. continue;
  381. }
  382. if (!MO.isReg())
  383. continue;
  384. unsigned Reg = MO.getReg();
  385. if (!Reg)
  386. continue;
  387. assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
  388. "Not expecting virtual register!");
  389. if (!MO.isDef()) {
  390. if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
  391. // If it's using a non-loop-invariant register, then it's obviously not
  392. // safe to hoist.
  393. HasNonInvariantUse = true;
  394. continue;
  395. }
  396. if (MO.isImplicit()) {
  397. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  398. PhysRegClobbers.set(*AI);
  399. if (!MO.isDead())
  400. // Non-dead implicit def? This cannot be hoisted.
  401. RuledOut = true;
  402. // No need to check if a dead implicit def is also defined by
  403. // another instruction.
  404. continue;
  405. }
  406. // FIXME: For now, avoid instructions with multiple defs, unless
  407. // it's a dead implicit def.
  408. if (Def)
  409. RuledOut = true;
  410. else
  411. Def = Reg;
  412. // If we have already seen another instruction that defines the same
  413. // register, then this is not safe. Two defs is indicated by setting a
  414. // PhysRegClobbers bit.
  415. for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
  416. if (PhysRegDefs.test(*AS))
  417. PhysRegClobbers.set(*AS);
  418. PhysRegDefs.set(*AS);
  419. }
  420. if (PhysRegClobbers.test(Reg))
  421. // MI defined register is seen defined by another instruction in
  422. // the loop, it cannot be a LICM candidate.
  423. RuledOut = true;
  424. }
  425. // Only consider reloads for now and remats which do not have register
  426. // operands. FIXME: Consider unfold load folding instructions.
  427. if (Def && !RuledOut) {
  428. int FI = INT_MIN;
  429. if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
  430. (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
  431. Candidates.push_back(CandidateInfo(MI, Def, FI));
  432. }
  433. }
  434. /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
  435. /// invariants out to the preheader.
  436. void MachineLICM::HoistRegionPostRA() {
  437. MachineBasicBlock *Preheader = getCurPreheader();
  438. if (!Preheader)
  439. return;
  440. unsigned NumRegs = TRI->getNumRegs();
  441. BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
  442. BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
  443. SmallVector<CandidateInfo, 32> Candidates;
  444. SmallSet<int, 32> StoredFIs;
  445. // Walk the entire region, count number of defs for each register, and
  446. // collect potential LICM candidates.
  447. const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
  448. for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
  449. MachineBasicBlock *BB = Blocks[i];
  450. // If the header of the loop containing this basic block is a landing pad,
  451. // then don't try to hoist instructions out of this loop.
  452. const MachineLoop *ML = MLI->getLoopFor(BB);
  453. if (ML && ML->getHeader()->isLandingPad()) continue;
  454. // Conservatively treat live-in's as an external def.
  455. // FIXME: That means a reload that're reused in successor block(s) will not
  456. // be LICM'ed.
  457. for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
  458. E = BB->livein_end(); I != E; ++I) {
  459. unsigned Reg = *I;
  460. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  461. PhysRegDefs.set(*AI);
  462. }
  463. SpeculationState = SpeculateUnknown;
  464. for (MachineBasicBlock::iterator
  465. MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
  466. MachineInstr *MI = &*MII;
  467. ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
  468. }
  469. }
  470. // Gather the registers read / clobbered by the terminator.
  471. BitVector TermRegs(NumRegs);
  472. MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
  473. if (TI != Preheader->end()) {
  474. for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
  475. const MachineOperand &MO = TI->getOperand(i);
  476. if (!MO.isReg())
  477. continue;
  478. unsigned Reg = MO.getReg();
  479. if (!Reg)
  480. continue;
  481. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  482. TermRegs.set(*AI);
  483. }
  484. }
  485. // Now evaluate whether the potential candidates qualify.
  486. // 1. Check if the candidate defined register is defined by another
  487. // instruction in the loop.
  488. // 2. If the candidate is a load from stack slot (always true for now),
  489. // check if the slot is stored anywhere in the loop.
  490. // 3. Make sure candidate def should not clobber
  491. // registers read by the terminator. Similarly its def should not be
  492. // clobbered by the terminator.
  493. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
  494. if (Candidates[i].FI != INT_MIN &&
  495. StoredFIs.count(Candidates[i].FI))
  496. continue;
  497. unsigned Def = Candidates[i].Def;
  498. if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
  499. bool Safe = true;
  500. MachineInstr *MI = Candidates[i].MI;
  501. for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
  502. const MachineOperand &MO = MI->getOperand(j);
  503. if (!MO.isReg() || MO.isDef() || !MO.getReg())
  504. continue;
  505. unsigned Reg = MO.getReg();
  506. if (PhysRegDefs.test(Reg) ||
  507. PhysRegClobbers.test(Reg)) {
  508. // If it's using a non-loop-invariant register, then it's obviously
  509. // not safe to hoist.
  510. Safe = false;
  511. break;
  512. }
  513. }
  514. if (Safe)
  515. HoistPostRA(MI, Candidates[i].Def);
  516. }
  517. }
  518. }
  519. /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
  520. /// loop, and make sure it is not killed by any instructions in the loop.
  521. void MachineLICM::AddToLiveIns(unsigned Reg) {
  522. const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
  523. for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
  524. MachineBasicBlock *BB = Blocks[i];
  525. if (!BB->isLiveIn(Reg))
  526. BB->addLiveIn(Reg);
  527. for (MachineBasicBlock::iterator
  528. MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
  529. MachineInstr *MI = &*MII;
  530. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  531. MachineOperand &MO = MI->getOperand(i);
  532. if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
  533. if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
  534. MO.setIsKill(false);
  535. }
  536. }
  537. }
  538. }
  539. /// HoistPostRA - When an instruction is found to only use loop invariant
  540. /// operands that is safe to hoist, this instruction is called to do the
  541. /// dirty work.
  542. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
  543. MachineBasicBlock *Preheader = getCurPreheader();
  544. // Now move the instructions to the predecessor, inserting it before any
  545. // terminator instructions.
  546. DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
  547. << MI->getParent()->getNumber() << ": " << *MI);
  548. // Splice the instruction to the preheader.
  549. MachineBasicBlock *MBB = MI->getParent();
  550. Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
  551. // Add register to livein list to all the BBs in the current loop since a
  552. // loop invariant must be kept live throughout the whole loop. This is
  553. // important to ensure later passes do not scavenge the def register.
  554. AddToLiveIns(Def);
  555. ++NumPostRAHoisted;
  556. Changed = true;
  557. }
  558. // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
  559. // If not then a load from this mbb may not be safe to hoist.
  560. bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
  561. if (SpeculationState != SpeculateUnknown)
  562. return SpeculationState == SpeculateFalse;
  563. if (BB != CurLoop->getHeader()) {
  564. // Check loop exiting blocks.
  565. SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
  566. CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
  567. for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
  568. if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
  569. SpeculationState = SpeculateTrue;
  570. return false;
  571. }
  572. }
  573. SpeculationState = SpeculateFalse;
  574. return true;
  575. }
  576. void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
  577. DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  578. // Remember livein register pressure.
  579. BackTrace.push_back(RegPressure);
  580. }
  581. void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
  582. DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  583. BackTrace.pop_back();
  584. }
  585. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  586. /// dominator tree node if its a leaf or all of its children are done. Walk
  587. /// up the dominator tree to destroy ancestors which are now done.
  588. void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
  589. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
  590. DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
  591. if (OpenChildren[Node])
  592. return;
  593. // Pop scope.
  594. ExitScope(Node->getBlock());
  595. // Now traverse upwards to pop ancestors whose offsprings are all done.
  596. while (MachineDomTreeNode *Parent = ParentMap[Node]) {
  597. unsigned Left = --OpenChildren[Parent];
  598. if (Left != 0)
  599. break;
  600. ExitScope(Parent->getBlock());
  601. Node = Parent;
  602. }
  603. }
  604. /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
  605. /// blocks dominated by the specified header block, and that are in the
  606. /// current loop) in depth first order w.r.t the DominatorTree. This allows
  607. /// us to visit definitions before uses, allowing us to hoist a loop body in
  608. /// one pass without iteration.
  609. ///
  610. void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
  611. MachineBasicBlock *Preheader = getCurPreheader();
  612. if (!Preheader)
  613. return;
  614. SmallVector<MachineDomTreeNode*, 32> Scopes;
  615. SmallVector<MachineDomTreeNode*, 8> WorkList;
  616. DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
  617. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  618. // Perform a DFS walk to determine the order of visit.
  619. WorkList.push_back(HeaderN);
  620. while (!WorkList.empty()) {
  621. MachineDomTreeNode *Node = WorkList.pop_back_val();
  622. assert(Node && "Null dominator tree node?");
  623. MachineBasicBlock *BB = Node->getBlock();
  624. // If the header of the loop containing this basic block is a landing pad,
  625. // then don't try to hoist instructions out of this loop.
  626. const MachineLoop *ML = MLI->getLoopFor(BB);
  627. if (ML && ML->getHeader()->isLandingPad())
  628. continue;
  629. // If this subregion is not in the top level loop at all, exit.
  630. if (!CurLoop->contains(BB))
  631. continue;
  632. Scopes.push_back(Node);
  633. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  634. unsigned NumChildren = Children.size();
  635. // Don't hoist things out of a large switch statement. This often causes
  636. // code to be hoisted that wasn't going to be executed, and increases
  637. // register pressure in a situation where it's likely to matter.
  638. if (BB->succ_size() >= 25)
  639. NumChildren = 0;
  640. OpenChildren[Node] = NumChildren;
  641. // Add children in reverse order as then the next popped worklist node is
  642. // the first child of this node. This means we ultimately traverse the
  643. // DOM tree in exactly the same order as if we'd recursed.
  644. for (int i = (int)NumChildren-1; i >= 0; --i) {
  645. MachineDomTreeNode *Child = Children[i];
  646. ParentMap[Child] = Node;
  647. WorkList.push_back(Child);
  648. }
  649. }
  650. if (Scopes.size() == 0)
  651. return;
  652. // Compute registers which are livein into the loop headers.
  653. RegSeen.clear();
  654. BackTrace.clear();
  655. InitRegPressure(Preheader);
  656. // Now perform LICM.
  657. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
  658. MachineDomTreeNode *Node = Scopes[i];
  659. MachineBasicBlock *MBB = Node->getBlock();
  660. EnterScope(MBB);
  661. // Process the block
  662. SpeculationState = SpeculateUnknown;
  663. for (MachineBasicBlock::iterator
  664. MII = MBB->begin(), E = MBB->end(); MII != E; ) {
  665. MachineBasicBlock::iterator NextMII = MII; ++NextMII;
  666. MachineInstr *MI = &*MII;
  667. if (!Hoist(MI, Preheader))
  668. UpdateRegPressure(MI);
  669. MII = NextMII;
  670. }
  671. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  672. ExitScopeIfDone(Node, OpenChildren, ParentMap);
  673. }
  674. }
  675. void MachineLICM::SinkIntoLoop() {
  676. MachineBasicBlock *Preheader = getCurPreheader();
  677. if (!Preheader)
  678. return;
  679. SmallVector<MachineInstr *, 8> Candidates;
  680. for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
  681. I != Preheader->instr_end(); ++I) {
  682. // We need to ensure that we can safely move this instruction into the loop.
  683. // As such, it must not have side-effects, e.g. such as a call has.
  684. if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
  685. Candidates.push_back(I);
  686. }
  687. for (MachineInstr *I : Candidates) {
  688. const MachineOperand &MO = I->getOperand(0);
  689. if (!MO.isDef() || !MO.isReg() || !MO.getReg())
  690. continue;
  691. if (!MRI->hasOneDef(MO.getReg()))
  692. continue;
  693. bool CanSink = true;
  694. MachineBasicBlock *B = nullptr;
  695. for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
  696. // FIXME: Come up with a proper cost model that estimates whether sinking
  697. // the instruction (and thus possibly executing it on every loop
  698. // iteration) is more expensive than a register.
  699. // For now assumes that copies are cheap and thus almost always worth it.
  700. if (!MI.isCopy()) {
  701. CanSink = false;
  702. break;
  703. }
  704. if (!B) {
  705. B = MI.getParent();
  706. continue;
  707. }
  708. B = DT->findNearestCommonDominator(B, MI.getParent());
  709. if (!B) {
  710. CanSink = false;
  711. break;
  712. }
  713. }
  714. if (!CanSink || !B || B == Preheader)
  715. continue;
  716. B->splice(B->getFirstNonPHI(), Preheader, I);
  717. }
  718. }
  719. static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
  720. return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
  721. }
  722. /// InitRegPressure - Find all virtual register references that are liveout of
  723. /// the preheader to initialize the starting "register pressure". Note this
  724. /// does not count live through (livein but not used) registers.
  725. void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
  726. std::fill(RegPressure.begin(), RegPressure.end(), 0);
  727. // If the preheader has only a single predecessor and it ends with a
  728. // fallthrough or an unconditional branch, then scan its predecessor for live
  729. // defs as well. This happens whenever the preheader is created by splitting
  730. // the critical edge from the loop predecessor to the loop header.
  731. if (BB->pred_size() == 1) {
  732. MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  733. SmallVector<MachineOperand, 4> Cond;
  734. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
  735. InitRegPressure(*BB->pred_begin());
  736. }
  737. for (const MachineInstr &MI : *BB)
  738. UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
  739. }
  740. /// UpdateRegPressure - Update estimate of register pressure after the
  741. /// specified instruction.
  742. void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
  743. bool ConsiderUnseenAsDef) {
  744. auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
  745. for (const auto &RPIdAndCost : Cost) {
  746. unsigned Class = RPIdAndCost.first;
  747. if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
  748. RegPressure[Class] = 0;
  749. else
  750. RegPressure[Class] += RPIdAndCost.second;
  751. }
  752. }
  753. DenseMap<unsigned, int>
  754. MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
  755. bool ConsiderUnseenAsDef) {
  756. DenseMap<unsigned, int> Cost;
  757. if (MI->isImplicitDef())
  758. return Cost;
  759. for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
  760. const MachineOperand &MO = MI->getOperand(i);
  761. if (!MO.isReg() || MO.isImplicit())
  762. continue;
  763. unsigned Reg = MO.getReg();
  764. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  765. continue;
  766. // FIXME: It seems bad to use RegSeen only for some of these calculations.
  767. bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
  768. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  769. RegClassWeight W = TRI->getRegClassWeight(RC);
  770. int RCCost = 0;
  771. if (MO.isDef())
  772. RCCost = W.RegWeight;
  773. else {
  774. bool isKill = isOperandKill(MO, MRI);
  775. if (isNew && !isKill && ConsiderUnseenAsDef)
  776. // Haven't seen this, it must be a livein.
  777. RCCost = W.RegWeight;
  778. else if (!isNew && isKill)
  779. RCCost = -W.RegWeight;
  780. }
  781. if (RCCost == 0)
  782. continue;
  783. const int *PS = TRI->getRegClassPressureSets(RC);
  784. for (; *PS != -1; ++PS) {
  785. if (Cost.find(*PS) == Cost.end())
  786. Cost[*PS] = RCCost;
  787. else
  788. Cost[*PS] += RCCost;
  789. }
  790. }
  791. return Cost;
  792. }
  793. /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
  794. /// loads from global offset table or constant pool.
  795. static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
  796. assert (MI.mayLoad() && "Expected MI that loads!");
  797. for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
  798. E = MI.memoperands_end(); I != E; ++I) {
  799. if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
  800. if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
  801. return true;
  802. }
  803. }
  804. return false;
  805. }
  806. /// IsLICMCandidate - Returns true if the instruction may be a suitable
  807. /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
  808. /// not safe to hoist it.
  809. bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
  810. // Check if it's safe to move the instruction.
  811. bool DontMoveAcrossStore = true;
  812. if (!I.isSafeToMove(AA, DontMoveAcrossStore))
  813. return false;
  814. // If it is load then check if it is guaranteed to execute by making sure that
  815. // it dominates all exiting blocks. If it doesn't, then there is a path out of
  816. // the loop which does not execute this load, so we can't hoist it. Loads
  817. // from constant memory are not safe to speculate all the time, for example
  818. // indexed load from a jump table.
  819. // Stores and side effects are already checked by isSafeToMove.
  820. if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
  821. !IsGuaranteedToExecute(I.getParent()))
  822. return false;
  823. return true;
  824. }
  825. /// IsLoopInvariantInst - Returns true if the instruction is loop
  826. /// invariant. I.e., all virtual register operands are defined outside of the
  827. /// loop, physical registers aren't accessed explicitly, and there are no side
  828. /// effects that aren't captured by the operands or other flags.
  829. ///
  830. bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
  831. if (!IsLICMCandidate(I))
  832. return false;
  833. // The instruction is loop invariant if all of its operands are.
  834. for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
  835. const MachineOperand &MO = I.getOperand(i);
  836. if (!MO.isReg())
  837. continue;
  838. unsigned Reg = MO.getReg();
  839. if (Reg == 0) continue;
  840. // Don't hoist an instruction that uses or defines a physical register.
  841. if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
  842. if (MO.isUse()) {
  843. // If the physreg has no defs anywhere, it's just an ambient register
  844. // and we can freely move its uses. Alternatively, if it's allocatable,
  845. // it could get allocated to something with a def during allocation.
  846. if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
  847. return false;
  848. // Otherwise it's safe to move.
  849. continue;
  850. } else if (!MO.isDead()) {
  851. // A def that isn't dead. We can't move it.
  852. return false;
  853. } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
  854. // If the reg is live into the loop, we can't hoist an instruction
  855. // which would clobber it.
  856. return false;
  857. }
  858. }
  859. if (!MO.isUse())
  860. continue;
  861. assert(MRI->getVRegDef(Reg) &&
  862. "Machine instr not mapped for this vreg?!");
  863. // If the loop contains the definition of an operand, then the instruction
  864. // isn't loop invariant.
  865. if (CurLoop->contains(MRI->getVRegDef(Reg)))
  866. return false;
  867. }
  868. // If we got this far, the instruction is loop invariant!
  869. return true;
  870. }
  871. /// HasLoopPHIUse - Return true if the specified instruction is used by a
  872. /// phi node and hoisting it could cause a copy to be inserted.
  873. bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
  874. SmallVector<const MachineInstr*, 8> Work(1, MI);
  875. do {
  876. MI = Work.pop_back_val();
  877. for (const MachineOperand &MO : MI->operands()) {
  878. if (!MO.isReg() || !MO.isDef())
  879. continue;
  880. unsigned Reg = MO.getReg();
  881. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  882. continue;
  883. for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
  884. // A PHI may cause a copy to be inserted.
  885. if (UseMI.isPHI()) {
  886. // A PHI inside the loop causes a copy because the live range of Reg is
  887. // extended across the PHI.
  888. if (CurLoop->contains(&UseMI))
  889. return true;
  890. // A PHI in an exit block can cause a copy to be inserted if the PHI
  891. // has multiple predecessors in the loop with different values.
  892. // For now, approximate by rejecting all exit blocks.
  893. if (isExitBlock(UseMI.getParent()))
  894. return true;
  895. continue;
  896. }
  897. // Look past copies as well.
  898. if (UseMI.isCopy() && CurLoop->contains(&UseMI))
  899. Work.push_back(&UseMI);
  900. }
  901. }
  902. } while (!Work.empty());
  903. return false;
  904. }
  905. /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
  906. /// and an use in the current loop, return true if the target considered
  907. /// it 'high'.
  908. bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
  909. unsigned DefIdx, unsigned Reg) const {
  910. if (MRI->use_nodbg_empty(Reg))
  911. return false;
  912. for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
  913. if (UseMI.isCopyLike())
  914. continue;
  915. if (!CurLoop->contains(UseMI.getParent()))
  916. continue;
  917. for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
  918. const MachineOperand &MO = UseMI.getOperand(i);
  919. if (!MO.isReg() || !MO.isUse())
  920. continue;
  921. unsigned MOReg = MO.getReg();
  922. if (MOReg != Reg)
  923. continue;
  924. if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i))
  925. return true;
  926. }
  927. // Only look at the first in loop use.
  928. break;
  929. }
  930. return false;
  931. }
  932. /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
  933. /// the operand latency between its def and a use is one or less.
  934. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
  935. if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
  936. return true;
  937. bool isCheap = false;
  938. unsigned NumDefs = MI.getDesc().getNumDefs();
  939. for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
  940. MachineOperand &DefMO = MI.getOperand(i);
  941. if (!DefMO.isReg() || !DefMO.isDef())
  942. continue;
  943. --NumDefs;
  944. unsigned Reg = DefMO.getReg();
  945. if (TargetRegisterInfo::isPhysicalRegister(Reg))
  946. continue;
  947. if (!TII->hasLowDefLatency(SchedModel, &MI, i))
  948. return false;
  949. isCheap = true;
  950. }
  951. return isCheap;
  952. }
  953. /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
  954. /// if hoisting an instruction of the given cost matrix can cause high
  955. /// register pressure.
  956. bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
  957. bool CheapInstr) {
  958. for (const auto &RPIdAndCost : Cost) {
  959. if (RPIdAndCost.second <= 0)
  960. continue;
  961. unsigned Class = RPIdAndCost.first;
  962. int Limit = RegLimit[Class];
  963. // Don't hoist cheap instructions if they would increase register pressure,
  964. // even if we're under the limit.
  965. if (CheapInstr && !HoistCheapInsts)
  966. return true;
  967. for (const auto &RP : BackTrace)
  968. if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
  969. return true;
  970. }
  971. return false;
  972. }
  973. /// UpdateBackTraceRegPressure - Traverse the back trace from header to the
  974. /// current block and update their register pressures to reflect the effect
  975. /// of hoisting MI from the current block to the preheader.
  976. void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
  977. // First compute the 'cost' of the instruction, i.e. its contribution
  978. // to register pressure.
  979. auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
  980. /*ConsiderUnseenAsDef=*/false);
  981. // Update register pressure of blocks from loop header to current block.
  982. for (auto &RP : BackTrace)
  983. for (const auto &RPIdAndCost : Cost)
  984. RP[RPIdAndCost.first] += RPIdAndCost.second;
  985. }
  986. /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
  987. /// the given loop invariant.
  988. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
  989. if (MI.isImplicitDef())
  990. return true;
  991. // Besides removing computation from the loop, hoisting an instruction has
  992. // these effects:
  993. //
  994. // - The value defined by the instruction becomes live across the entire
  995. // loop. This increases register pressure in the loop.
  996. //
  997. // - If the value is used by a PHI in the loop, a copy will be required for
  998. // lowering the PHI after extending the live range.
  999. //
  1000. // - When hoisting the last use of a value in the loop, that value no longer
  1001. // needs to be live in the loop. This lowers register pressure in the loop.
  1002. bool CheapInstr = IsCheapInstruction(MI);
  1003. bool CreatesCopy = HasLoopPHIUse(&MI);
  1004. // Don't hoist a cheap instruction if it would create a copy in the loop.
  1005. if (CheapInstr && CreatesCopy) {
  1006. DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
  1007. return false;
  1008. }
  1009. // Rematerializable instructions should always be hoisted since the register
  1010. // allocator can just pull them down again when needed.
  1011. if (TII->isTriviallyReMaterializable(&MI, AA))
  1012. return true;
  1013. // FIXME: If there are long latency loop-invariant instructions inside the
  1014. // loop at this point, why didn't the optimizer's LICM hoist them?
  1015. for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
  1016. const MachineOperand &MO = MI.getOperand(i);
  1017. if (!MO.isReg() || MO.isImplicit())
  1018. continue;
  1019. unsigned Reg = MO.getReg();
  1020. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  1021. continue;
  1022. if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
  1023. DEBUG(dbgs() << "Hoist High Latency: " << MI);
  1024. ++NumHighLatency;
  1025. return true;
  1026. }
  1027. }
  1028. // Estimate register pressure to determine whether to LICM the instruction.
  1029. // In low register pressure situation, we can be more aggressive about
  1030. // hoisting. Also, favors hoisting long latency instructions even in
  1031. // moderately high pressure situation.
  1032. // Cheap instructions will only be hoisted if they don't increase register
  1033. // pressure at all.
  1034. auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
  1035. /*ConsiderUnseenAsDef=*/false);
  1036. // Visit BBs from header to current BB, if hoisting this doesn't cause
  1037. // high register pressure, then it's safe to proceed.
  1038. if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
  1039. DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
  1040. ++NumLowRP;
  1041. return true;
  1042. }
  1043. // Don't risk increasing register pressure if it would create copies.
  1044. if (CreatesCopy) {
  1045. DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
  1046. return false;
  1047. }
  1048. // Do not "speculate" in high register pressure situation. If an
  1049. // instruction is not guaranteed to be executed in the loop, it's best to be
  1050. // conservative.
  1051. if (AvoidSpeculation &&
  1052. (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
  1053. DEBUG(dbgs() << "Won't speculate: " << MI);
  1054. return false;
  1055. }
  1056. // High register pressure situation, only hoist if the instruction is going
  1057. // to be remat'ed.
  1058. if (!TII->isTriviallyReMaterializable(&MI, AA) &&
  1059. !MI.isInvariantLoad(AA)) {
  1060. DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
  1061. return false;
  1062. }
  1063. return true;
  1064. }
  1065. MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
  1066. // Don't unfold simple loads.
  1067. if (MI->canFoldAsLoad())
  1068. return nullptr;
  1069. // If not, we may be able to unfold a load and hoist that.
  1070. // First test whether the instruction is loading from an amenable
  1071. // memory location.
  1072. if (!MI->isInvariantLoad(AA))
  1073. return nullptr;
  1074. // Next determine the register class for a temporary register.
  1075. unsigned LoadRegIndex;
  1076. unsigned NewOpc =
  1077. TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
  1078. /*UnfoldLoad=*/true,
  1079. /*UnfoldStore=*/false,
  1080. &LoadRegIndex);
  1081. if (NewOpc == 0) return nullptr;
  1082. const MCInstrDesc &MID = TII->get(NewOpc);
  1083. if (MID.getNumDefs() != 1) return nullptr;
  1084. MachineFunction &MF = *MI->getParent()->getParent();
  1085. const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
  1086. // Ok, we're unfolding. Create a temporary register and do the unfold.
  1087. unsigned Reg = MRI->createVirtualRegister(RC);
  1088. SmallVector<MachineInstr *, 2> NewMIs;
  1089. bool Success =
  1090. TII->unfoldMemoryOperand(MF, MI, Reg,
  1091. /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
  1092. NewMIs);
  1093. (void)Success;
  1094. assert(Success &&
  1095. "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
  1096. "succeeded!");
  1097. assert(NewMIs.size() == 2 &&
  1098. "Unfolded a load into multiple instructions!");
  1099. MachineBasicBlock *MBB = MI->getParent();
  1100. MachineBasicBlock::iterator Pos = MI;
  1101. MBB->insert(Pos, NewMIs[0]);
  1102. MBB->insert(Pos, NewMIs[1]);
  1103. // If unfolding produced a load that wasn't loop-invariant or profitable to
  1104. // hoist, discard the new instructions and bail.
  1105. if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
  1106. NewMIs[0]->eraseFromParent();
  1107. NewMIs[1]->eraseFromParent();
  1108. return nullptr;
  1109. }
  1110. // Update register pressure for the unfolded instruction.
  1111. UpdateRegPressure(NewMIs[1]);
  1112. // Otherwise we successfully unfolded a load that we can hoist.
  1113. MI->eraseFromParent();
  1114. return NewMIs[0];
  1115. }
  1116. void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
  1117. for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
  1118. const MachineInstr *MI = &*I;
  1119. unsigned Opcode = MI->getOpcode();
  1120. CSEMap[Opcode].push_back(MI);
  1121. }
  1122. }
  1123. const MachineInstr*
  1124. MachineLICM::LookForDuplicate(const MachineInstr *MI,
  1125. std::vector<const MachineInstr*> &PrevMIs) {
  1126. for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
  1127. const MachineInstr *PrevMI = PrevMIs[i];
  1128. if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
  1129. return PrevMI;
  1130. }
  1131. return nullptr;
  1132. }
  1133. bool MachineLICM::EliminateCSE(MachineInstr *MI,
  1134. DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
  1135. // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
  1136. // the undef property onto uses.
  1137. if (CI == CSEMap.end() || MI->isImplicitDef())
  1138. return false;
  1139. if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
  1140. DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
  1141. // Replace virtual registers defined by MI by their counterparts defined
  1142. // by Dup.
  1143. SmallVector<unsigned, 2> Defs;
  1144. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  1145. const MachineOperand &MO = MI->getOperand(i);
  1146. // Physical registers may not differ here.
  1147. assert((!MO.isReg() || MO.getReg() == 0 ||
  1148. !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
  1149. MO.getReg() == Dup->getOperand(i).getReg()) &&
  1150. "Instructions with different phys regs are not identical!");
  1151. if (MO.isReg() && MO.isDef() &&
  1152. !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
  1153. Defs.push_back(i);
  1154. }
  1155. SmallVector<const TargetRegisterClass*, 2> OrigRCs;
  1156. for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
  1157. unsigned Idx = Defs[i];
  1158. unsigned Reg = MI->getOperand(Idx).getReg();
  1159. unsigned DupReg = Dup->getOperand(Idx).getReg();
  1160. OrigRCs.push_back(MRI->getRegClass(DupReg));
  1161. if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
  1162. // Restore old RCs if more than one defs.
  1163. for (unsigned j = 0; j != i; ++j)
  1164. MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
  1165. return false;
  1166. }
  1167. }
  1168. for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
  1169. unsigned Idx = Defs[i];
  1170. unsigned Reg = MI->getOperand(Idx).getReg();
  1171. unsigned DupReg = Dup->getOperand(Idx).getReg();
  1172. MRI->replaceRegWith(Reg, DupReg);
  1173. MRI->clearKillFlags(DupReg);
  1174. }
  1175. MI->eraseFromParent();
  1176. ++NumCSEed;
  1177. return true;
  1178. }
  1179. return false;
  1180. }
  1181. /// MayCSE - Return true if the given instruction will be CSE'd if it's
  1182. /// hoisted out of the loop.
  1183. bool MachineLICM::MayCSE(MachineInstr *MI) {
  1184. unsigned Opcode = MI->getOpcode();
  1185. DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
  1186. CI = CSEMap.find(Opcode);
  1187. // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
  1188. // the undef property onto uses.
  1189. if (CI == CSEMap.end() || MI->isImplicitDef())
  1190. return false;
  1191. return LookForDuplicate(MI, CI->second) != nullptr;
  1192. }
  1193. /// Hoist - When an instruction is found to use only loop invariant operands
  1194. /// that are safe to hoist, this instruction is called to do the dirty work.
  1195. ///
  1196. bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
  1197. // First check whether we should hoist this instruction.
  1198. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
  1199. // If not, try unfolding a hoistable load.
  1200. MI = ExtractHoistableLoad(MI);
  1201. if (!MI) return false;
  1202. }
  1203. // Now move the instructions to the predecessor, inserting it before any
  1204. // terminator instructions.
  1205. DEBUG({
  1206. dbgs() << "Hoisting " << *MI;
  1207. if (Preheader->getBasicBlock())
  1208. dbgs() << " to MachineBasicBlock "
  1209. << Preheader->getName();
  1210. if (MI->getParent()->getBasicBlock())
  1211. dbgs() << " from MachineBasicBlock "
  1212. << MI->getParent()->getName();
  1213. dbgs() << "\n";
  1214. });
  1215. // If this is the first instruction being hoisted to the preheader,
  1216. // initialize the CSE map with potential common expressions.
  1217. if (FirstInLoop) {
  1218. InitCSEMap(Preheader);
  1219. FirstInLoop = false;
  1220. }
  1221. // Look for opportunity to CSE the hoisted instruction.
  1222. unsigned Opcode = MI->getOpcode();
  1223. DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
  1224. CI = CSEMap.find(Opcode);
  1225. if (!EliminateCSE(MI, CI)) {
  1226. // Otherwise, splice the instruction to the preheader.
  1227. Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
  1228. // Update register pressure for BBs from header to this block.
  1229. UpdateBackTraceRegPressure(MI);
  1230. // Clear the kill flags of any register this instruction defines,
  1231. // since they may need to be live throughout the entire loop
  1232. // rather than just live for part of it.
  1233. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  1234. MachineOperand &MO = MI->getOperand(i);
  1235. if (MO.isReg() && MO.isDef() && !MO.isDead())
  1236. MRI->clearKillFlags(MO.getReg());
  1237. }
  1238. // Add to the CSE map.
  1239. if (CI != CSEMap.end())
  1240. CI->second.push_back(MI);
  1241. else
  1242. CSEMap[Opcode].push_back(MI);
  1243. }
  1244. ++NumHoisted;
  1245. Changed = true;
  1246. return true;
  1247. }
  1248. MachineBasicBlock *MachineLICM::getCurPreheader() {
  1249. // Determine the block to which to hoist instructions. If we can't find a
  1250. // suitable loop predecessor, we can't do any hoisting.
  1251. // If we've tried to get a preheader and failed, don't try again.
  1252. if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
  1253. return nullptr;
  1254. if (!CurPreheader) {
  1255. CurPreheader = CurLoop->getLoopPreheader();
  1256. if (!CurPreheader) {
  1257. MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
  1258. if (!Pred) {
  1259. CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
  1260. return nullptr;
  1261. }
  1262. CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
  1263. if (!CurPreheader) {
  1264. CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
  1265. return nullptr;
  1266. }
  1267. }
  1268. }
  1269. return CurPreheader;
  1270. }