MachineCSE.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass performs global common subexpression elimination on machine
  11. // instructions using a scoped hash table based value numbering scheme. It
  12. // must be run while the machine function is still in SSA form.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/CodeGen/Passes.h"
  16. #include "llvm/ADT/DenseMap.h"
  17. #include "llvm/ADT/ScopedHashTable.h"
  18. #include "llvm/ADT/SmallSet.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/CodeGen/MachineDominators.h"
  22. #include "llvm/CodeGen/MachineInstr.h"
  23. #include "llvm/CodeGen/MachineRegisterInfo.h"
  24. #include "llvm/Support/Debug.h"
  25. #include "llvm/Support/RecyclingAllocator.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include "llvm/Target/TargetInstrInfo.h"
  28. #include "llvm/Target/TargetSubtargetInfo.h"
  29. using namespace llvm;
  30. #define DEBUG_TYPE "machine-cse"
  31. STATISTIC(NumCoalesces, "Number of copies coalesced");
  32. STATISTIC(NumCSEs, "Number of common subexpression eliminated");
  33. STATISTIC(NumPhysCSEs,
  34. "Number of physreg referencing common subexpr eliminated");
  35. STATISTIC(NumCrossBBCSEs,
  36. "Number of cross-MBB physreg referencing CS eliminated");
  37. STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
  38. namespace {
  39. class MachineCSE : public MachineFunctionPass {
  40. const TargetInstrInfo *TII;
  41. const TargetRegisterInfo *TRI;
  42. AliasAnalysis *AA;
  43. MachineDominatorTree *DT;
  44. MachineRegisterInfo *MRI;
  45. public:
  46. static char ID; // Pass identification
  47. MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
  48. initializeMachineCSEPass(*PassRegistry::getPassRegistry());
  49. }
  50. bool runOnMachineFunction(MachineFunction &MF) override;
  51. void getAnalysisUsage(AnalysisUsage &AU) const override {
  52. AU.setPreservesCFG();
  53. MachineFunctionPass::getAnalysisUsage(AU);
  54. AU.addRequired<AliasAnalysis>();
  55. AU.addPreservedID(MachineLoopInfoID);
  56. AU.addRequired<MachineDominatorTree>();
  57. AU.addPreserved<MachineDominatorTree>();
  58. }
  59. void releaseMemory() override {
  60. ScopeMap.clear();
  61. Exps.clear();
  62. }
  63. private:
  64. unsigned LookAheadLimit;
  65. typedef RecyclingAllocator<BumpPtrAllocator,
  66. ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
  67. typedef ScopedHashTable<MachineInstr*, unsigned,
  68. MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
  69. typedef ScopedHTType::ScopeTy ScopeType;
  70. DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
  71. ScopedHTType VNT;
  72. SmallVector<MachineInstr*, 64> Exps;
  73. unsigned CurrVN;
  74. bool PerformTrivialCopyPropagation(MachineInstr *MI,
  75. MachineBasicBlock *MBB);
  76. bool isPhysDefTriviallyDead(unsigned Reg,
  77. MachineBasicBlock::const_iterator I,
  78. MachineBasicBlock::const_iterator E) const;
  79. bool hasLivePhysRegDefUses(const MachineInstr *MI,
  80. const MachineBasicBlock *MBB,
  81. SmallSet<unsigned,8> &PhysRefs,
  82. SmallVectorImpl<unsigned> &PhysDefs,
  83. bool &PhysUseDef) const;
  84. bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  85. SmallSet<unsigned,8> &PhysRefs,
  86. SmallVectorImpl<unsigned> &PhysDefs,
  87. bool &NonLocal) const;
  88. bool isCSECandidate(MachineInstr *MI);
  89. bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
  90. MachineInstr *CSMI, MachineInstr *MI);
  91. void EnterScope(MachineBasicBlock *MBB);
  92. void ExitScope(MachineBasicBlock *MBB);
  93. bool ProcessBlock(MachineBasicBlock *MBB);
  94. void ExitScopeIfDone(MachineDomTreeNode *Node,
  95. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
  96. bool PerformCSE(MachineDomTreeNode *Node);
  97. };
  98. } // end anonymous namespace
  99. char MachineCSE::ID = 0;
  100. char &llvm::MachineCSEID = MachineCSE::ID;
  101. INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
  102. "Machine Common Subexpression Elimination", false, false)
  103. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  104. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  105. INITIALIZE_PASS_END(MachineCSE, "machine-cse",
  106. "Machine Common Subexpression Elimination", false, false)
  107. /// The source register of a COPY machine instruction can be propagated to all
  108. /// its users, and this propagation could increase the probability of finding
  109. /// common subexpressions. If the COPY has only one user, the COPY itself can
  110. /// be removed.
  111. bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
  112. MachineBasicBlock *MBB) {
  113. bool Changed = false;
  114. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  115. MachineOperand &MO = MI->getOperand(i);
  116. if (!MO.isReg() || !MO.isUse())
  117. continue;
  118. unsigned Reg = MO.getReg();
  119. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  120. continue;
  121. bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
  122. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  123. if (!DefMI->isCopy())
  124. continue;
  125. unsigned SrcReg = DefMI->getOperand(1).getReg();
  126. if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
  127. continue;
  128. if (DefMI->getOperand(0).getSubReg())
  129. continue;
  130. // FIXME: We should trivially coalesce subregister copies to expose CSE
  131. // opportunities on instructions with truncated operands (see
  132. // cse-add-with-overflow.ll). This can be done here as follows:
  133. // if (SrcSubReg)
  134. // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
  135. // SrcSubReg);
  136. // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
  137. //
  138. // The 2-addr pass has been updated to handle coalesced subregs. However,
  139. // some machine-specific code still can't handle it.
  140. // To handle it properly we also need a way find a constrained subregister
  141. // class given a super-reg class and subreg index.
  142. if (DefMI->getOperand(1).getSubReg())
  143. continue;
  144. const TargetRegisterClass *RC = MRI->getRegClass(Reg);
  145. if (!MRI->constrainRegClass(SrcReg, RC))
  146. continue;
  147. DEBUG(dbgs() << "Coalescing: " << *DefMI);
  148. DEBUG(dbgs() << "*** to: " << *MI);
  149. // Propagate SrcReg of copies to MI.
  150. MO.setReg(SrcReg);
  151. MRI->clearKillFlags(SrcReg);
  152. // Coalesce single use copies.
  153. if (OnlyOneUse) {
  154. DefMI->eraseFromParent();
  155. ++NumCoalesces;
  156. }
  157. Changed = true;
  158. }
  159. return Changed;
  160. }
  161. bool
  162. MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
  163. MachineBasicBlock::const_iterator I,
  164. MachineBasicBlock::const_iterator E) const {
  165. unsigned LookAheadLeft = LookAheadLimit;
  166. while (LookAheadLeft) {
  167. // Skip over dbg_value's.
  168. while (I != E && I->isDebugValue())
  169. ++I;
  170. if (I == E)
  171. // Reached end of block, register is obviously dead.
  172. return true;
  173. bool SeenDef = false;
  174. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  175. const MachineOperand &MO = I->getOperand(i);
  176. if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
  177. SeenDef = true;
  178. if (!MO.isReg() || !MO.getReg())
  179. continue;
  180. if (!TRI->regsOverlap(MO.getReg(), Reg))
  181. continue;
  182. if (MO.isUse())
  183. // Found a use!
  184. return false;
  185. SeenDef = true;
  186. }
  187. if (SeenDef)
  188. // See a def of Reg (or an alias) before encountering any use, it's
  189. // trivially dead.
  190. return true;
  191. --LookAheadLeft;
  192. ++I;
  193. }
  194. return false;
  195. }
  196. /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
  197. /// physical registers (except for dead defs of physical registers). It also
  198. /// returns the physical register def by reference if it's the only one and the
  199. /// instruction does not uses a physical register.
  200. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
  201. const MachineBasicBlock *MBB,
  202. SmallSet<unsigned,8> &PhysRefs,
  203. SmallVectorImpl<unsigned> &PhysDefs,
  204. bool &PhysUseDef) const{
  205. // First, add all uses to PhysRefs.
  206. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  207. const MachineOperand &MO = MI->getOperand(i);
  208. if (!MO.isReg() || MO.isDef())
  209. continue;
  210. unsigned Reg = MO.getReg();
  211. if (!Reg)
  212. continue;
  213. if (TargetRegisterInfo::isVirtualRegister(Reg))
  214. continue;
  215. // Reading constant physregs is ok.
  216. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
  217. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
  218. PhysRefs.insert(*AI);
  219. }
  220. // Next, collect all defs into PhysDefs. If any is already in PhysRefs
  221. // (which currently contains only uses), set the PhysUseDef flag.
  222. PhysUseDef = false;
  223. MachineBasicBlock::const_iterator I = MI; I = std::next(I);
  224. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  225. const MachineOperand &MO = MI->getOperand(i);
  226. if (!MO.isReg() || !MO.isDef())
  227. continue;
  228. unsigned Reg = MO.getReg();
  229. if (!Reg)
  230. continue;
  231. if (TargetRegisterInfo::isVirtualRegister(Reg))
  232. continue;
  233. // Check against PhysRefs even if the def is "dead".
  234. if (PhysRefs.count(Reg))
  235. PhysUseDef = true;
  236. // If the def is dead, it's ok. But the def may not marked "dead". That's
  237. // common since this pass is run before livevariables. We can scan
  238. // forward a few instructions and check if it is obviously dead.
  239. if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
  240. PhysDefs.push_back(Reg);
  241. }
  242. // Finally, add all defs to PhysRefs as well.
  243. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
  244. for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
  245. PhysRefs.insert(*AI);
  246. return !PhysRefs.empty();
  247. }
  248. bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
  249. SmallSet<unsigned,8> &PhysRefs,
  250. SmallVectorImpl<unsigned> &PhysDefs,
  251. bool &NonLocal) const {
  252. // For now conservatively returns false if the common subexpression is
  253. // not in the same basic block as the given instruction. The only exception
  254. // is if the common subexpression is in the sole predecessor block.
  255. const MachineBasicBlock *MBB = MI->getParent();
  256. const MachineBasicBlock *CSMBB = CSMI->getParent();
  257. bool CrossMBB = false;
  258. if (CSMBB != MBB) {
  259. if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
  260. return false;
  261. for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
  262. if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
  263. // Avoid extending live range of physical registers if they are
  264. //allocatable or reserved.
  265. return false;
  266. }
  267. CrossMBB = true;
  268. }
  269. MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
  270. MachineBasicBlock::const_iterator E = MI;
  271. MachineBasicBlock::const_iterator EE = CSMBB->end();
  272. unsigned LookAheadLeft = LookAheadLimit;
  273. while (LookAheadLeft) {
  274. // Skip over dbg_value's.
  275. while (I != E && I != EE && I->isDebugValue())
  276. ++I;
  277. if (I == EE) {
  278. assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
  279. (void)CrossMBB;
  280. CrossMBB = false;
  281. NonLocal = true;
  282. I = MBB->begin();
  283. EE = MBB->end();
  284. continue;
  285. }
  286. if (I == E)
  287. return true;
  288. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  289. const MachineOperand &MO = I->getOperand(i);
  290. // RegMasks go on instructions like calls that clobber lots of physregs.
  291. // Don't attempt to CSE across such an instruction.
  292. if (MO.isRegMask())
  293. return false;
  294. if (!MO.isReg() || !MO.isDef())
  295. continue;
  296. unsigned MOReg = MO.getReg();
  297. if (TargetRegisterInfo::isVirtualRegister(MOReg))
  298. continue;
  299. if (PhysRefs.count(MOReg))
  300. return false;
  301. }
  302. --LookAheadLeft;
  303. ++I;
  304. }
  305. return false;
  306. }
  307. bool MachineCSE::isCSECandidate(MachineInstr *MI) {
  308. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
  309. MI->isInlineAsm() || MI->isDebugValue())
  310. return false;
  311. // Ignore copies.
  312. if (MI->isCopyLike())
  313. return false;
  314. // Ignore stuff that we obviously can't move.
  315. if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
  316. MI->hasUnmodeledSideEffects())
  317. return false;
  318. if (MI->mayLoad()) {
  319. // Okay, this instruction does a load. As a refinement, we allow the target
  320. // to decide whether the loaded value is actually a constant. If so, we can
  321. // actually use it as a load.
  322. if (!MI->isInvariantLoad(AA))
  323. // FIXME: we should be able to hoist loads with no other side effects if
  324. // there are no other instructions which can change memory in this loop.
  325. // This is a trivial form of alias analysis.
  326. return false;
  327. }
  328. return true;
  329. }
  330. /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
  331. /// common expression that defines Reg.
  332. bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
  333. MachineInstr *CSMI, MachineInstr *MI) {
  334. // FIXME: Heuristics that works around the lack the live range splitting.
  335. // If CSReg is used at all uses of Reg, CSE should not increase register
  336. // pressure of CSReg.
  337. bool MayIncreasePressure = true;
  338. if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
  339. TargetRegisterInfo::isVirtualRegister(Reg)) {
  340. MayIncreasePressure = false;
  341. SmallPtrSet<MachineInstr*, 8> CSUses;
  342. for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
  343. CSUses.insert(&MI);
  344. }
  345. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  346. if (!CSUses.count(&MI)) {
  347. MayIncreasePressure = true;
  348. break;
  349. }
  350. }
  351. }
  352. if (!MayIncreasePressure) return true;
  353. // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
  354. // an immediate predecessor. We don't want to increase register pressure and
  355. // end up causing other computation to be spilled.
  356. if (TII->isAsCheapAsAMove(MI)) {
  357. MachineBasicBlock *CSBB = CSMI->getParent();
  358. MachineBasicBlock *BB = MI->getParent();
  359. if (CSBB != BB && !CSBB->isSuccessor(BB))
  360. return false;
  361. }
  362. // Heuristics #2: If the expression doesn't not use a vr and the only use
  363. // of the redundant computation are copies, do not cse.
  364. bool HasVRegUse = false;
  365. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  366. const MachineOperand &MO = MI->getOperand(i);
  367. if (MO.isReg() && MO.isUse() &&
  368. TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
  369. HasVRegUse = true;
  370. break;
  371. }
  372. }
  373. if (!HasVRegUse) {
  374. bool HasNonCopyUse = false;
  375. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  376. // Ignore copies.
  377. if (!MI.isCopyLike()) {
  378. HasNonCopyUse = true;
  379. break;
  380. }
  381. }
  382. if (!HasNonCopyUse)
  383. return false;
  384. }
  385. // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
  386. // it unless the defined value is already used in the BB of the new use.
  387. bool HasPHI = false;
  388. SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
  389. for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
  390. HasPHI |= MI.isPHI();
  391. CSBBs.insert(MI.getParent());
  392. }
  393. if (!HasPHI)
  394. return true;
  395. return CSBBs.count(MI->getParent());
  396. }
  397. void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
  398. DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
  399. ScopeType *Scope = new ScopeType(VNT);
  400. ScopeMap[MBB] = Scope;
  401. }
  402. void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
  403. DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
  404. DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
  405. assert(SI != ScopeMap.end());
  406. delete SI->second;
  407. ScopeMap.erase(SI);
  408. }
  409. bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
  410. bool Changed = false;
  411. SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
  412. SmallVector<unsigned, 2> ImplicitDefsToUpdate;
  413. SmallVector<unsigned, 2> ImplicitDefs;
  414. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
  415. MachineInstr *MI = &*I;
  416. ++I;
  417. if (!isCSECandidate(MI))
  418. continue;
  419. bool FoundCSE = VNT.count(MI);
  420. if (!FoundCSE) {
  421. // Using trivial copy propagation to find more CSE opportunities.
  422. if (PerformTrivialCopyPropagation(MI, MBB)) {
  423. Changed = true;
  424. // After coalescing MI itself may become a copy.
  425. if (MI->isCopyLike())
  426. continue;
  427. // Try again to see if CSE is possible.
  428. FoundCSE = VNT.count(MI);
  429. }
  430. }
  431. // Commute commutable instructions.
  432. bool Commuted = false;
  433. if (!FoundCSE && MI->isCommutable()) {
  434. MachineInstr *NewMI = TII->commuteInstruction(MI);
  435. if (NewMI) {
  436. Commuted = true;
  437. FoundCSE = VNT.count(NewMI);
  438. if (NewMI != MI) {
  439. // New instruction. It doesn't need to be kept.
  440. NewMI->eraseFromParent();
  441. Changed = true;
  442. } else if (!FoundCSE)
  443. // MI was changed but it didn't help, commute it back!
  444. (void)TII->commuteInstruction(MI);
  445. }
  446. }
  447. // If the instruction defines physical registers and the values *may* be
  448. // used, then it's not safe to replace it with a common subexpression.
  449. // It's also not safe if the instruction uses physical registers.
  450. bool CrossMBBPhysDef = false;
  451. SmallSet<unsigned, 8> PhysRefs;
  452. SmallVector<unsigned, 2> PhysDefs;
  453. bool PhysUseDef = false;
  454. if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
  455. PhysDefs, PhysUseDef)) {
  456. FoundCSE = false;
  457. // ... Unless the CS is local or is in the sole predecessor block
  458. // and it also defines the physical register which is not clobbered
  459. // in between and the physical register uses were not clobbered.
  460. // This can never be the case if the instruction both uses and
  461. // defines the same physical register, which was detected above.
  462. if (!PhysUseDef) {
  463. unsigned CSVN = VNT.lookup(MI);
  464. MachineInstr *CSMI = Exps[CSVN];
  465. if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
  466. FoundCSE = true;
  467. }
  468. }
  469. if (!FoundCSE) {
  470. VNT.insert(MI, CurrVN++);
  471. Exps.push_back(MI);
  472. continue;
  473. }
  474. // Found a common subexpression, eliminate it.
  475. unsigned CSVN = VNT.lookup(MI);
  476. MachineInstr *CSMI = Exps[CSVN];
  477. DEBUG(dbgs() << "Examining: " << *MI);
  478. DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
  479. // Check if it's profitable to perform this CSE.
  480. bool DoCSE = true;
  481. unsigned NumDefs = MI->getDesc().getNumDefs() +
  482. MI->getDesc().getNumImplicitDefs();
  483. for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
  484. MachineOperand &MO = MI->getOperand(i);
  485. if (!MO.isReg() || !MO.isDef())
  486. continue;
  487. unsigned OldReg = MO.getReg();
  488. unsigned NewReg = CSMI->getOperand(i).getReg();
  489. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  490. // we should make sure it is not dead at CSMI.
  491. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
  492. ImplicitDefsToUpdate.push_back(i);
  493. // Keep track of implicit defs of CSMI and MI, to clear possibly
  494. // made-redundant kill flags.
  495. if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
  496. ImplicitDefs.push_back(OldReg);
  497. if (OldReg == NewReg) {
  498. --NumDefs;
  499. continue;
  500. }
  501. assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
  502. TargetRegisterInfo::isVirtualRegister(NewReg) &&
  503. "Do not CSE physical register defs!");
  504. if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
  505. DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
  506. DoCSE = false;
  507. break;
  508. }
  509. // Don't perform CSE if the result of the old instruction cannot exist
  510. // within the register class of the new instruction.
  511. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
  512. if (!MRI->constrainRegClass(NewReg, OldRC)) {
  513. DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
  514. DoCSE = false;
  515. break;
  516. }
  517. CSEPairs.push_back(std::make_pair(OldReg, NewReg));
  518. --NumDefs;
  519. }
  520. // Actually perform the elimination.
  521. if (DoCSE) {
  522. for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
  523. unsigned OldReg = CSEPairs[i].first;
  524. unsigned NewReg = CSEPairs[i].second;
  525. // OldReg may have been unused but is used now, clear the Dead flag
  526. MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
  527. assert(Def != nullptr && "CSEd register has no unique definition?");
  528. Def->clearRegisterDeads(NewReg);
  529. // Replace with NewReg and clear kill flags which may be wrong now.
  530. MRI->replaceRegWith(OldReg, NewReg);
  531. MRI->clearKillFlags(NewReg);
  532. }
  533. // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
  534. // we should make sure it is not dead at CSMI.
  535. for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
  536. CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
  537. // Go through implicit defs of CSMI and MI, and clear the kill flags on
  538. // their uses in all the instructions between CSMI and MI.
  539. // We might have made some of the kill flags redundant, consider:
  540. // subs ... %NZCV<imp-def> <- CSMI
  541. // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore
  542. // subs ... %NZCV<imp-def> <- MI, to be eliminated
  543. // csinc ... %NZCV<imp-use,kill>
  544. // Since we eliminated MI, and reused a register imp-def'd by CSMI
  545. // (here %NZCV), that register, if it was killed before MI, should have
  546. // that kill flag removed, because it's lifetime was extended.
  547. if (CSMI->getParent() == MI->getParent()) {
  548. for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
  549. for (auto ImplicitDef : ImplicitDefs)
  550. if (MachineOperand *MO = II->findRegisterUseOperand(
  551. ImplicitDef, /*isKill=*/true, TRI))
  552. MO->setIsKill(false);
  553. } else {
  554. // If the instructions aren't in the same BB, bail out and clear the
  555. // kill flag on all uses of the imp-def'd register.
  556. for (auto ImplicitDef : ImplicitDefs)
  557. MRI->clearKillFlags(ImplicitDef);
  558. }
  559. if (CrossMBBPhysDef) {
  560. // Add physical register defs now coming in from a predecessor to MBB
  561. // livein list.
  562. while (!PhysDefs.empty()) {
  563. unsigned LiveIn = PhysDefs.pop_back_val();
  564. if (!MBB->isLiveIn(LiveIn))
  565. MBB->addLiveIn(LiveIn);
  566. }
  567. ++NumCrossBBCSEs;
  568. }
  569. MI->eraseFromParent();
  570. ++NumCSEs;
  571. if (!PhysRefs.empty())
  572. ++NumPhysCSEs;
  573. if (Commuted)
  574. ++NumCommutes;
  575. Changed = true;
  576. } else {
  577. VNT.insert(MI, CurrVN++);
  578. Exps.push_back(MI);
  579. }
  580. CSEPairs.clear();
  581. ImplicitDefsToUpdate.clear();
  582. ImplicitDefs.clear();
  583. }
  584. return Changed;
  585. }
  586. /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
  587. /// dominator tree node if its a leaf or all of its children are done. Walk
  588. /// up the dominator tree to destroy ancestors which are now done.
  589. void
  590. MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
  591. DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
  592. if (OpenChildren[Node])
  593. return;
  594. // Pop scope.
  595. ExitScope(Node->getBlock());
  596. // Now traverse upwards to pop ancestors whose offsprings are all done.
  597. while (MachineDomTreeNode *Parent = Node->getIDom()) {
  598. unsigned Left = --OpenChildren[Parent];
  599. if (Left != 0)
  600. break;
  601. ExitScope(Parent->getBlock());
  602. Node = Parent;
  603. }
  604. }
  605. bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
  606. SmallVector<MachineDomTreeNode*, 32> Scopes;
  607. SmallVector<MachineDomTreeNode*, 8> WorkList;
  608. DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
  609. CurrVN = 0;
  610. // Perform a DFS walk to determine the order of visit.
  611. WorkList.push_back(Node);
  612. do {
  613. Node = WorkList.pop_back_val();
  614. Scopes.push_back(Node);
  615. const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
  616. unsigned NumChildren = Children.size();
  617. OpenChildren[Node] = NumChildren;
  618. for (unsigned i = 0; i != NumChildren; ++i) {
  619. MachineDomTreeNode *Child = Children[i];
  620. WorkList.push_back(Child);
  621. }
  622. } while (!WorkList.empty());
  623. // Now perform CSE.
  624. bool Changed = false;
  625. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
  626. MachineDomTreeNode *Node = Scopes[i];
  627. MachineBasicBlock *MBB = Node->getBlock();
  628. EnterScope(MBB);
  629. Changed |= ProcessBlock(MBB);
  630. // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
  631. ExitScopeIfDone(Node, OpenChildren);
  632. }
  633. return Changed;
  634. }
  635. bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
  636. if (skipOptnoneFunction(*MF.getFunction()))
  637. return false;
  638. TII = MF.getSubtarget().getInstrInfo();
  639. TRI = MF.getSubtarget().getRegisterInfo();
  640. MRI = &MF.getRegInfo();
  641. AA = &getAnalysis<AliasAnalysis>();
  642. DT = &getAnalysis<MachineDominatorTree>();
  643. LookAheadLimit = TII->getMachineCSELookAheadLimit();
  644. return PerformCSE(DT->getRootNode());
  645. }