2
0

PeepholeOptimizer.cpp 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464
  1. //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Perform peephole optimizations on the machine code:
  11. //
  12. // - Optimize Extensions
  13. //
  14. // Optimization of sign / zero extension instructions. It may be extended to
  15. // handle other instructions with similar properties.
  16. //
  17. // On some targets, some instructions, e.g. X86 sign / zero extension, may
  18. // leave the source value in the lower part of the result. This optimization
  19. // will replace some uses of the pre-extension value with uses of the
  20. // sub-register of the results.
  21. //
  22. // - Optimize Comparisons
  23. //
  24. // Optimization of comparison instructions. For instance, in this code:
  25. //
  26. // sub r1, 1
  27. // cmp r1, 0
  28. // bz L1
  29. //
  30. // If the "sub" instruction all ready sets (or could be modified to set) the
  31. // same flag that the "cmp" instruction sets and that "bz" uses, then we can
  32. // eliminate the "cmp" instruction.
  33. //
  34. // Another instance, in this code:
  35. //
  36. // sub r1, r3 | sub r1, imm
  37. // cmp r3, r1 or cmp r1, r3 | cmp r1, imm
  38. // bge L1
  39. //
  40. // If the branch instruction can use flag from "sub", then we can replace
  41. // "sub" with "subs" and eliminate the "cmp" instruction.
  42. //
  43. // - Optimize Loads:
  44. //
  45. // Loads that can be folded into a later instruction. A load is foldable
  46. // if it loads to virtual registers and the virtual register defined has
  47. // a single use.
  48. //
  49. // - Optimize Copies and Bitcast (more generally, target specific copies):
  50. //
  51. // Rewrite copies and bitcasts to avoid cross register bank copies
  52. // when possible.
  53. // E.g., Consider the following example, where capital and lower
  54. // letters denote different register file:
  55. // b = copy A <-- cross-bank copy
  56. // C = copy b <-- cross-bank copy
  57. // =>
  58. // b = copy A <-- cross-bank copy
  59. // C = copy A <-- same-bank copy
  60. //
  61. // E.g., for bitcast:
  62. // b = bitcast A <-- cross-bank copy
  63. // C = bitcast b <-- cross-bank copy
  64. // =>
  65. // b = bitcast A <-- cross-bank copy
  66. // C = copy A <-- same-bank copy
  67. //===----------------------------------------------------------------------===//
  68. #include "llvm/CodeGen/Passes.h"
  69. #include "llvm/ADT/DenseMap.h"
  70. #include "llvm/ADT/SmallPtrSet.h"
  71. #include "llvm/ADT/SmallSet.h"
  72. #include "llvm/ADT/Statistic.h"
  73. #include "llvm/CodeGen/MachineDominators.h"
  74. #include "llvm/CodeGen/MachineInstrBuilder.h"
  75. #include "llvm/CodeGen/MachineRegisterInfo.h"
  76. #include "llvm/Support/CommandLine.h"
  77. #include "llvm/Support/Debug.h"
  78. #include "llvm/Support/raw_ostream.h"
  79. #include "llvm/Target/TargetInstrInfo.h"
  80. #include "llvm/Target/TargetRegisterInfo.h"
  81. #include "llvm/Target/TargetSubtargetInfo.h"
  82. #include <utility>
  83. using namespace llvm;
  84. #define DEBUG_TYPE "peephole-opt"
  85. // Optimize Extensions
  86. static cl::opt<bool>
  87. Aggressive("aggressive-ext-opt", cl::Hidden,
  88. cl::desc("Aggressive extension optimization"));
  89. static cl::opt<bool>
  90. DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
  91. cl::desc("Disable the peephole optimizer"));
  92. static cl::opt<bool>
  93. DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
  94. cl::desc("Disable advanced copy optimization"));
  95. STATISTIC(NumReuse, "Number of extension results reused");
  96. STATISTIC(NumCmps, "Number of compares eliminated");
  97. STATISTIC(NumImmFold, "Number of move immediate folded");
  98. STATISTIC(NumLoadFold, "Number of loads folded");
  99. STATISTIC(NumSelects, "Number of selects optimized");
  100. STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
  101. STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
  102. namespace {
  103. class PeepholeOptimizer : public MachineFunctionPass {
  104. const TargetInstrInfo *TII;
  105. const TargetRegisterInfo *TRI;
  106. MachineRegisterInfo *MRI;
  107. MachineDominatorTree *DT; // Machine dominator tree
  108. public:
  109. static char ID; // Pass identification
  110. PeepholeOptimizer() : MachineFunctionPass(ID) {
  111. initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
  112. }
  113. bool runOnMachineFunction(MachineFunction &MF) override;
  114. void getAnalysisUsage(AnalysisUsage &AU) const override {
  115. AU.setPreservesCFG();
  116. MachineFunctionPass::getAnalysisUsage(AU);
  117. if (Aggressive) {
  118. AU.addRequired<MachineDominatorTree>();
  119. AU.addPreserved<MachineDominatorTree>();
  120. }
  121. }
  122. private:
  123. bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
  124. bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
  125. SmallPtrSetImpl<MachineInstr*> &LocalMIs);
  126. bool optimizeSelect(MachineInstr *MI,
  127. SmallPtrSetImpl<MachineInstr *> &LocalMIs);
  128. bool optimizeCondBranch(MachineInstr *MI);
  129. bool optimizeCopyOrBitcast(MachineInstr *MI);
  130. bool optimizeCoalescableCopy(MachineInstr *MI);
  131. bool optimizeUncoalescableCopy(MachineInstr *MI,
  132. SmallPtrSetImpl<MachineInstr *> &LocalMIs);
  133. bool findNextSource(unsigned &Reg, unsigned &SubReg);
  134. bool isMoveImmediate(MachineInstr *MI,
  135. SmallSet<unsigned, 4> &ImmDefRegs,
  136. DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
  137. bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
  138. SmallSet<unsigned, 4> &ImmDefRegs,
  139. DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
  140. bool isLoadFoldable(MachineInstr *MI,
  141. SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
  142. /// \brief Check whether \p MI is understood by the register coalescer
  143. /// but may require some rewriting.
  144. bool isCoalescableCopy(const MachineInstr &MI) {
  145. // SubregToRegs are not interesting, because they are already register
  146. // coalescer friendly.
  147. return MI.isCopy() || (!DisableAdvCopyOpt &&
  148. (MI.isRegSequence() || MI.isInsertSubreg() ||
  149. MI.isExtractSubreg()));
  150. }
  151. /// \brief Check whether \p MI is a copy like instruction that is
  152. /// not recognized by the register coalescer.
  153. bool isUncoalescableCopy(const MachineInstr &MI) {
  154. return MI.isBitcast() ||
  155. (!DisableAdvCopyOpt &&
  156. (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
  157. MI.isExtractSubregLike()));
  158. }
  159. };
  160. /// \brief Helper class to track the possible sources of a value defined by
  161. /// a (chain of) copy related instructions.
  162. /// Given a definition (instruction and definition index), this class
  163. /// follows the use-def chain to find successive suitable sources.
  164. /// The given source can be used to rewrite the definition into
  165. /// def = COPY src.
  166. ///
  167. /// For instance, let us consider the following snippet:
  168. /// v0 =
  169. /// v2 = INSERT_SUBREG v1, v0, sub0
  170. /// def = COPY v2.sub0
  171. ///
  172. /// Using a ValueTracker for def = COPY v2.sub0 will give the following
  173. /// suitable sources:
  174. /// v2.sub0 and v0.
  175. /// Then, def can be rewritten into def = COPY v0.
  176. class ValueTracker {
  177. private:
  178. /// The current point into the use-def chain.
  179. const MachineInstr *Def;
  180. /// The index of the definition in Def.
  181. unsigned DefIdx;
  182. /// The sub register index of the definition.
  183. unsigned DefSubReg;
  184. /// The register where the value can be found.
  185. unsigned Reg;
  186. /// Specifiy whether or not the value tracking looks through
  187. /// complex instructions. When this is false, the value tracker
  188. /// bails on everything that is not a copy or a bitcast.
  189. ///
  190. /// Note: This could have been implemented as a specialized version of
  191. /// the ValueTracker class but that would have complicated the code of
  192. /// the users of this class.
  193. bool UseAdvancedTracking;
  194. /// MachineRegisterInfo used to perform tracking.
  195. const MachineRegisterInfo &MRI;
  196. /// Optional TargetInstrInfo used to perform some complex
  197. /// tracking.
  198. const TargetInstrInfo *TII;
  199. /// \brief Dispatcher to the right underlying implementation of
  200. /// getNextSource.
  201. bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
  202. /// \brief Specialized version of getNextSource for Copy instructions.
  203. bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
  204. /// \brief Specialized version of getNextSource for Bitcast instructions.
  205. bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
  206. /// \brief Specialized version of getNextSource for RegSequence
  207. /// instructions.
  208. bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
  209. /// \brief Specialized version of getNextSource for InsertSubreg
  210. /// instructions.
  211. bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
  212. /// \brief Specialized version of getNextSource for ExtractSubreg
  213. /// instructions.
  214. bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
  215. /// \brief Specialized version of getNextSource for SubregToReg
  216. /// instructions.
  217. bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
  218. public:
  219. /// \brief Create a ValueTracker instance for the value defined by \p Reg.
  220. /// \p DefSubReg represents the sub register index the value tracker will
  221. /// track. It does not need to match the sub register index used in the
  222. /// definition of \p Reg.
  223. /// \p UseAdvancedTracking specifies whether or not the value tracker looks
  224. /// through complex instructions. By default (false), it handles only copy
  225. /// and bitcast instructions.
  226. /// If \p Reg is a physical register, a value tracker constructed with
  227. /// this constructor will not find any alternative source.
  228. /// Indeed, when \p Reg is a physical register that constructor does not
  229. /// know which definition of \p Reg it should track.
  230. /// Use the next constructor to track a physical register.
  231. ValueTracker(unsigned Reg, unsigned DefSubReg,
  232. const MachineRegisterInfo &MRI,
  233. bool UseAdvancedTracking = false,
  234. const TargetInstrInfo *TII = nullptr)
  235. : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
  236. UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
  237. if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
  238. Def = MRI.getVRegDef(Reg);
  239. DefIdx = MRI.def_begin(Reg).getOperandNo();
  240. }
  241. }
  242. /// \brief Create a ValueTracker instance for the value defined by
  243. /// the pair \p MI, \p DefIdx.
  244. /// Unlike the other constructor, the value tracker produced by this one
  245. /// may be able to find a new source when the definition is a physical
  246. /// register.
  247. /// This could be useful to rewrite target specific instructions into
  248. /// generic copy instructions.
  249. ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
  250. const MachineRegisterInfo &MRI,
  251. bool UseAdvancedTracking = false,
  252. const TargetInstrInfo *TII = nullptr)
  253. : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
  254. UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
  255. assert(DefIdx < Def->getDesc().getNumDefs() &&
  256. Def->getOperand(DefIdx).isReg() && "Invalid definition");
  257. Reg = Def->getOperand(DefIdx).getReg();
  258. }
  259. /// \brief Following the use-def chain, get the next available source
  260. /// for the tracked value.
  261. /// When the returned value is not nullptr, \p SrcReg gives the register
  262. /// that contain the tracked value.
  263. /// \note The sub register index returned in \p SrcSubReg must be used
  264. /// on \p SrcReg to access the actual value.
  265. /// \return Unless the returned value is nullptr (i.e., no source found),
  266. /// \p SrcReg gives the register of the next source used in the returned
  267. /// instruction and \p SrcSubReg the sub-register index to be used on that
  268. /// source to get the tracked value. When nullptr is returned, no
  269. /// alternative source has been found.
  270. const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
  271. /// \brief Get the last register where the initial value can be found.
  272. /// Initially this is the register of the definition.
  273. /// Then, after each successful call to getNextSource, this is the
  274. /// register of the last source.
  275. unsigned getReg() const { return Reg; }
  276. };
  277. }
  278. char PeepholeOptimizer::ID = 0;
  279. char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
  280. INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
  281. "Peephole Optimizations", false, false)
  282. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  283. INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
  284. "Peephole Optimizations", false, false)
  285. /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
  286. /// a single register and writes a single register and it does not modify the
  287. /// source, and if the source value is preserved as a sub-register of the
  288. /// result, then replace all reachable uses of the source with the subreg of the
  289. /// result.
  290. ///
  291. /// Do not generate an EXTRACT that is used only in a debug use, as this changes
  292. /// the code. Since this code does not currently share EXTRACTs, just ignore all
  293. /// debug uses.
  294. bool PeepholeOptimizer::
  295. optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
  296. SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
  297. unsigned SrcReg, DstReg, SubIdx;
  298. if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
  299. return false;
  300. if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
  301. TargetRegisterInfo::isPhysicalRegister(SrcReg))
  302. return false;
  303. if (MRI->hasOneNonDBGUse(SrcReg))
  304. // No other uses.
  305. return false;
  306. // Ensure DstReg can get a register class that actually supports
  307. // sub-registers. Don't change the class until we commit.
  308. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
  309. DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
  310. if (!DstRC)
  311. return false;
  312. // The ext instr may be operating on a sub-register of SrcReg as well.
  313. // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
  314. // register.
  315. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
  316. // SrcReg:SubIdx should be replaced.
  317. bool UseSrcSubIdx =
  318. TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
  319. // The source has other uses. See if we can replace the other uses with use of
  320. // the result of the extension.
  321. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
  322. for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
  323. ReachedBBs.insert(UI.getParent());
  324. // Uses that are in the same BB of uses of the result of the instruction.
  325. SmallVector<MachineOperand*, 8> Uses;
  326. // Uses that the result of the instruction can reach.
  327. SmallVector<MachineOperand*, 8> ExtendedUses;
  328. bool ExtendLife = true;
  329. for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
  330. MachineInstr *UseMI = UseMO.getParent();
  331. if (UseMI == MI)
  332. continue;
  333. if (UseMI->isPHI()) {
  334. ExtendLife = false;
  335. continue;
  336. }
  337. // Only accept uses of SrcReg:SubIdx.
  338. if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
  339. continue;
  340. // It's an error to translate this:
  341. //
  342. // %reg1025 = <sext> %reg1024
  343. // ...
  344. // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
  345. //
  346. // into this:
  347. //
  348. // %reg1025 = <sext> %reg1024
  349. // ...
  350. // %reg1027 = COPY %reg1025:4
  351. // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
  352. //
  353. // The problem here is that SUBREG_TO_REG is there to assert that an
  354. // implicit zext occurs. It doesn't insert a zext instruction. If we allow
  355. // the COPY here, it will give us the value after the <sext>, not the
  356. // original value of %reg1024 before <sext>.
  357. if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
  358. continue;
  359. MachineBasicBlock *UseMBB = UseMI->getParent();
  360. if (UseMBB == MBB) {
  361. // Local uses that come after the extension.
  362. if (!LocalMIs.count(UseMI))
  363. Uses.push_back(&UseMO);
  364. } else if (ReachedBBs.count(UseMBB)) {
  365. // Non-local uses where the result of the extension is used. Always
  366. // replace these unless it's a PHI.
  367. Uses.push_back(&UseMO);
  368. } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
  369. // We may want to extend the live range of the extension result in order
  370. // to replace these uses.
  371. ExtendedUses.push_back(&UseMO);
  372. } else {
  373. // Both will be live out of the def MBB anyway. Don't extend live range of
  374. // the extension result.
  375. ExtendLife = false;
  376. break;
  377. }
  378. }
  379. if (ExtendLife && !ExtendedUses.empty())
  380. // Extend the liveness of the extension result.
  381. Uses.append(ExtendedUses.begin(), ExtendedUses.end());
  382. // Now replace all uses.
  383. bool Changed = false;
  384. if (!Uses.empty()) {
  385. SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
  386. // Look for PHI uses of the extended result, we don't want to extend the
  387. // liveness of a PHI input. It breaks all kinds of assumptions down
  388. // stream. A PHI use is expected to be the kill of its source values.
  389. for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
  390. if (UI.isPHI())
  391. PHIBBs.insert(UI.getParent());
  392. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
  393. for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
  394. MachineOperand *UseMO = Uses[i];
  395. MachineInstr *UseMI = UseMO->getParent();
  396. MachineBasicBlock *UseMBB = UseMI->getParent();
  397. if (PHIBBs.count(UseMBB))
  398. continue;
  399. // About to add uses of DstReg, clear DstReg's kill flags.
  400. if (!Changed) {
  401. MRI->clearKillFlags(DstReg);
  402. MRI->constrainRegClass(DstReg, DstRC);
  403. }
  404. unsigned NewVR = MRI->createVirtualRegister(RC);
  405. MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
  406. TII->get(TargetOpcode::COPY), NewVR)
  407. .addReg(DstReg, 0, SubIdx);
  408. // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
  409. if (UseSrcSubIdx) {
  410. Copy->getOperand(0).setSubReg(SubIdx);
  411. Copy->getOperand(0).setIsUndef();
  412. }
  413. UseMO->setReg(NewVR);
  414. ++NumReuse;
  415. Changed = true;
  416. }
  417. }
  418. return Changed;
  419. }
  420. /// optimizeCmpInstr - If the instruction is a compare and the previous
  421. /// instruction it's comparing against all ready sets (or could be modified to
  422. /// set) the same flag as the compare, then we can remove the comparison and use
  423. /// the flag from the previous instruction.
  424. bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
  425. MachineBasicBlock *MBB) {
  426. // If this instruction is a comparison against zero and isn't comparing a
  427. // physical register, we can try to optimize it.
  428. unsigned SrcReg, SrcReg2;
  429. int CmpMask, CmpValue;
  430. if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
  431. TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
  432. (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
  433. return false;
  434. // Attempt to optimize the comparison instruction.
  435. if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
  436. ++NumCmps;
  437. return true;
  438. }
  439. return false;
  440. }
  441. /// Optimize a select instruction.
  442. bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
  443. SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
  444. unsigned TrueOp = 0;
  445. unsigned FalseOp = 0;
  446. bool Optimizable = false;
  447. SmallVector<MachineOperand, 4> Cond;
  448. if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
  449. return false;
  450. if (!Optimizable)
  451. return false;
  452. if (!TII->optimizeSelect(MI, LocalMIs))
  453. return false;
  454. MI->eraseFromParent();
  455. ++NumSelects;
  456. return true;
  457. }
  458. /// \brief Check if a simpler conditional branch can be
  459. // generated
  460. bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
  461. return TII->optimizeCondBranch(MI);
  462. }
  463. /// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
  464. /// share the same register file.
  465. static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
  466. const TargetRegisterClass *DefRC,
  467. unsigned DefSubReg,
  468. const TargetRegisterClass *SrcRC,
  469. unsigned SrcSubReg) {
  470. // Same register class.
  471. if (DefRC == SrcRC)
  472. return true;
  473. // Both operands are sub registers. Check if they share a register class.
  474. unsigned SrcIdx, DefIdx;
  475. if (SrcSubReg && DefSubReg)
  476. return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
  477. SrcIdx, DefIdx) != nullptr;
  478. // At most one of the register is a sub register, make it Src to avoid
  479. // duplicating the test.
  480. if (!SrcSubReg) {
  481. std::swap(DefSubReg, SrcSubReg);
  482. std::swap(DefRC, SrcRC);
  483. }
  484. // One of the register is a sub register, check if we can get a superclass.
  485. if (SrcSubReg)
  486. return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
  487. // Plain copy.
  488. return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
  489. }
  490. /// \brief Try to find the next source that share the same register file
  491. /// for the value defined by \p Reg and \p SubReg.
  492. /// When true is returned, \p Reg and \p SubReg are updated with the
  493. /// register number and sub-register index of the new source.
  494. /// \return False if no alternative sources are available. True otherwise.
  495. bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
  496. // Do not try to find a new source for a physical register.
  497. // So far we do not have any motivating example for doing that.
  498. // Thus, instead of maintaining untested code, we will revisit that if
  499. // that changes at some point.
  500. if (TargetRegisterInfo::isPhysicalRegister(Reg))
  501. return false;
  502. const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
  503. unsigned DefSubReg = SubReg;
  504. unsigned Src;
  505. unsigned SrcSubReg;
  506. bool ShouldRewrite = false;
  507. // Follow the chain of copies until we reach the top of the use-def chain
  508. // or find a more suitable source.
  509. ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
  510. do {
  511. unsigned CopySrcReg, CopySrcSubReg;
  512. if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
  513. break;
  514. Src = CopySrcReg;
  515. SrcSubReg = CopySrcSubReg;
  516. // Do not extend the live-ranges of physical registers as they add
  517. // constraints to the register allocator.
  518. // Moreover, if we want to extend the live-range of a physical register,
  519. // unlike SSA virtual register, we will have to check that they are not
  520. // redefine before the related use.
  521. if (TargetRegisterInfo::isPhysicalRegister(Src))
  522. break;
  523. const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
  524. // If this source does not incur a cross register bank copy, use it.
  525. ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
  526. SrcSubReg);
  527. } while (!ShouldRewrite);
  528. // If we did not find a more suitable source, there is nothing to optimize.
  529. if (!ShouldRewrite || Src == Reg)
  530. return false;
  531. Reg = Src;
  532. SubReg = SrcSubReg;
  533. return true;
  534. }
  535. namespace {
  536. /// \brief Helper class to rewrite the arguments of a copy-like instruction.
  537. class CopyRewriter {
  538. protected:
  539. /// The copy-like instruction.
  540. MachineInstr &CopyLike;
  541. /// The index of the source being rewritten.
  542. unsigned CurrentSrcIdx;
  543. public:
  544. CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
  545. virtual ~CopyRewriter() {}
  546. /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
  547. /// the related value that it affects (TrackReg, TrackSubReg).
  548. /// A source is considered rewritable if its register class and the
  549. /// register class of the related TrackReg may not be register
  550. /// coalescer friendly. In other words, given a copy-like instruction
  551. /// not all the arguments may be returned at rewritable source, since
  552. /// some arguments are none to be register coalescer friendly.
  553. ///
  554. /// Each call of this method moves the current source to the next
  555. /// rewritable source.
  556. /// For instance, let CopyLike be the instruction to rewrite.
  557. /// CopyLike has one definition and one source:
  558. /// dst.dstSubIdx = CopyLike src.srcSubIdx.
  559. ///
  560. /// The first call will give the first rewritable source, i.e.,
  561. /// the only source this instruction has:
  562. /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
  563. /// This source defines the whole definition, i.e.,
  564. /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
  565. ///
  566. /// The second and subsequent calls will return false, has there is only one
  567. /// rewritable source.
  568. ///
  569. /// \return True if a rewritable source has been found, false otherwise.
  570. /// The output arguments are valid if and only if true is returned.
  571. virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
  572. unsigned &TrackReg,
  573. unsigned &TrackSubReg) {
  574. // If CurrentSrcIdx == 1, this means this function has already been
  575. // called once. CopyLike has one defintiion and one argument, thus,
  576. // there is nothing else to rewrite.
  577. if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
  578. return false;
  579. // This is the first call to getNextRewritableSource.
  580. // Move the CurrentSrcIdx to remember that we made that call.
  581. CurrentSrcIdx = 1;
  582. // The rewritable source is the argument.
  583. const MachineOperand &MOSrc = CopyLike.getOperand(1);
  584. SrcReg = MOSrc.getReg();
  585. SrcSubReg = MOSrc.getSubReg();
  586. // What we track are the alternative sources of the definition.
  587. const MachineOperand &MODef = CopyLike.getOperand(0);
  588. TrackReg = MODef.getReg();
  589. TrackSubReg = MODef.getSubReg();
  590. return true;
  591. }
  592. /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
  593. /// if possible.
  594. /// \return True if the rewritting was possible, false otherwise.
  595. virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
  596. if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
  597. return false;
  598. MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
  599. MOSrc.setReg(NewReg);
  600. MOSrc.setSubReg(NewSubReg);
  601. return true;
  602. }
  603. };
  604. /// \brief Specialized rewriter for INSERT_SUBREG instruction.
  605. class InsertSubregRewriter : public CopyRewriter {
  606. public:
  607. InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
  608. assert(MI.isInsertSubreg() && "Invalid instruction");
  609. }
  610. /// \brief See CopyRewriter::getNextRewritableSource.
  611. /// Here CopyLike has the following form:
  612. /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
  613. /// Src1 has the same register class has dst, hence, there is
  614. /// nothing to rewrite.
  615. /// Src2.src2SubIdx, may not be register coalescer friendly.
  616. /// Therefore, the first call to this method returns:
  617. /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
  618. /// (TrackReg, TrackSubReg) = (dst, subIdx).
  619. ///
  620. /// Subsequence calls will return false.
  621. bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
  622. unsigned &TrackReg,
  623. unsigned &TrackSubReg) override {
  624. // If we already get the only source we can rewrite, return false.
  625. if (CurrentSrcIdx == 2)
  626. return false;
  627. // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
  628. CurrentSrcIdx = 2;
  629. const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
  630. SrcReg = MOInsertedReg.getReg();
  631. SrcSubReg = MOInsertedReg.getSubReg();
  632. const MachineOperand &MODef = CopyLike.getOperand(0);
  633. // We want to track something that is compatible with the
  634. // partial definition.
  635. TrackReg = MODef.getReg();
  636. if (MODef.getSubReg())
  637. // Bails if we have to compose sub-register indices.
  638. return false;
  639. TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
  640. return true;
  641. }
  642. bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
  643. if (CurrentSrcIdx != 2)
  644. return false;
  645. // We are rewriting the inserted reg.
  646. MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
  647. MO.setReg(NewReg);
  648. MO.setSubReg(NewSubReg);
  649. return true;
  650. }
  651. };
  652. /// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
  653. class ExtractSubregRewriter : public CopyRewriter {
  654. const TargetInstrInfo &TII;
  655. public:
  656. ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
  657. : CopyRewriter(MI), TII(TII) {
  658. assert(MI.isExtractSubreg() && "Invalid instruction");
  659. }
  660. /// \brief See CopyRewriter::getNextRewritableSource.
  661. /// Here CopyLike has the following form:
  662. /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
  663. /// There is only one rewritable source: Src.subIdx,
  664. /// which defines dst.dstSubIdx.
  665. bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
  666. unsigned &TrackReg,
  667. unsigned &TrackSubReg) override {
  668. // If we already get the only source we can rewrite, return false.
  669. if (CurrentSrcIdx == 1)
  670. return false;
  671. // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
  672. CurrentSrcIdx = 1;
  673. const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
  674. SrcReg = MOExtractedReg.getReg();
  675. // If we have to compose sub-register indices, bails out.
  676. if (MOExtractedReg.getSubReg())
  677. return false;
  678. SrcSubReg = CopyLike.getOperand(2).getImm();
  679. // We want to track something that is compatible with the definition.
  680. const MachineOperand &MODef = CopyLike.getOperand(0);
  681. TrackReg = MODef.getReg();
  682. TrackSubReg = MODef.getSubReg();
  683. return true;
  684. }
  685. bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
  686. // The only source we can rewrite is the input register.
  687. if (CurrentSrcIdx != 1)
  688. return false;
  689. CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
  690. // If we find a source that does not require to extract something,
  691. // rewrite the operation with a copy.
  692. if (!NewSubReg) {
  693. // Move the current index to an invalid position.
  694. // We do not want another call to this method to be able
  695. // to do any change.
  696. CurrentSrcIdx = -1;
  697. // Rewrite the operation as a COPY.
  698. // Get rid of the sub-register index.
  699. CopyLike.RemoveOperand(2);
  700. // Morph the operation into a COPY.
  701. CopyLike.setDesc(TII.get(TargetOpcode::COPY));
  702. return true;
  703. }
  704. CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
  705. return true;
  706. }
  707. };
  708. /// \brief Specialized rewriter for REG_SEQUENCE instruction.
  709. class RegSequenceRewriter : public CopyRewriter {
  710. public:
  711. RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
  712. assert(MI.isRegSequence() && "Invalid instruction");
  713. }
  714. /// \brief See CopyRewriter::getNextRewritableSource.
  715. /// Here CopyLike has the following form:
  716. /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
  717. /// Each call will return a different source, walking all the available
  718. /// source.
  719. ///
  720. /// The first call returns:
  721. /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
  722. /// (TrackReg, TrackSubReg) = (dst, subIdx1).
  723. ///
  724. /// The second call returns:
  725. /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
  726. /// (TrackReg, TrackSubReg) = (dst, subIdx2).
  727. ///
  728. /// And so on, until all the sources have been traversed, then
  729. /// it returns false.
  730. bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
  731. unsigned &TrackReg,
  732. unsigned &TrackSubReg) override {
  733. // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
  734. // If this is the first call, move to the first argument.
  735. if (CurrentSrcIdx == 0) {
  736. CurrentSrcIdx = 1;
  737. } else {
  738. // Otherwise, move to the next argument and check that it is valid.
  739. CurrentSrcIdx += 2;
  740. if (CurrentSrcIdx >= CopyLike.getNumOperands())
  741. return false;
  742. }
  743. const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
  744. SrcReg = MOInsertedReg.getReg();
  745. // If we have to compose sub-register indices, bails out.
  746. if ((SrcSubReg = MOInsertedReg.getSubReg()))
  747. return false;
  748. // We want to track something that is compatible with the related
  749. // partial definition.
  750. TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
  751. const MachineOperand &MODef = CopyLike.getOperand(0);
  752. TrackReg = MODef.getReg();
  753. // If we have to compose sub-registers, bails.
  754. return MODef.getSubReg() == 0;
  755. }
  756. bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
  757. // We cannot rewrite out of bound operands.
  758. // Moreover, rewritable sources are at odd positions.
  759. if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
  760. return false;
  761. MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
  762. MO.setReg(NewReg);
  763. MO.setSubReg(NewSubReg);
  764. return true;
  765. }
  766. };
  767. } // End namespace.
  768. /// \brief Get the appropriated CopyRewriter for \p MI.
  769. /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
  770. /// if no rewriter works for \p MI.
  771. static CopyRewriter *getCopyRewriter(MachineInstr &MI,
  772. const TargetInstrInfo &TII) {
  773. switch (MI.getOpcode()) {
  774. default:
  775. return nullptr;
  776. case TargetOpcode::COPY:
  777. return new CopyRewriter(MI);
  778. case TargetOpcode::INSERT_SUBREG:
  779. return new InsertSubregRewriter(MI);
  780. case TargetOpcode::EXTRACT_SUBREG:
  781. return new ExtractSubregRewriter(MI, TII);
  782. case TargetOpcode::REG_SEQUENCE:
  783. return new RegSequenceRewriter(MI);
  784. }
  785. llvm_unreachable(nullptr);
  786. }
  787. /// \brief Optimize generic copy instructions to avoid cross
  788. /// register bank copy. The optimization looks through a chain of
  789. /// copies and tries to find a source that has a compatible register
  790. /// class.
  791. /// Two register classes are considered to be compatible if they share
  792. /// the same register bank.
  793. /// New copies issued by this optimization are register allocator
  794. /// friendly. This optimization does not remove any copy as it may
  795. /// overconstraint the register allocator, but replaces some operands
  796. /// when possible.
  797. /// \pre isCoalescableCopy(*MI) is true.
  798. /// \return True, when \p MI has been rewritten. False otherwise.
  799. bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
  800. assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
  801. assert(MI->getDesc().getNumDefs() == 1 &&
  802. "Coalescer can understand multiple defs?!");
  803. const MachineOperand &MODef = MI->getOperand(0);
  804. // Do not rewrite physical definitions.
  805. if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
  806. return false;
  807. bool Changed = false;
  808. // Get the right rewriter for the current copy.
  809. std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
  810. // If none exists, bails out.
  811. if (!CpyRewriter)
  812. return false;
  813. // Rewrite each rewritable source.
  814. unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
  815. while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
  816. TrackSubReg)) {
  817. unsigned NewSrc = TrackReg;
  818. unsigned NewSubReg = TrackSubReg;
  819. // Try to find a more suitable source.
  820. // If we failed to do so, or get the actual source,
  821. // move to the next source.
  822. if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
  823. continue;
  824. // Rewrite source.
  825. if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
  826. // We may have extended the live-range of NewSrc, account for that.
  827. MRI->clearKillFlags(NewSrc);
  828. Changed = true;
  829. }
  830. }
  831. // TODO: We could have a clean-up method to tidy the instruction.
  832. // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
  833. // => v0 = COPY v1
  834. // Currently we haven't seen motivating example for that and we
  835. // want to avoid untested code.
  836. NumRewrittenCopies += Changed;
  837. return Changed;
  838. }
  839. /// \brief Optimize copy-like instructions to create
  840. /// register coalescer friendly instruction.
  841. /// The optimization tries to kill-off the \p MI by looking
  842. /// through a chain of copies to find a source that has a compatible
  843. /// register class.
  844. /// If such a source is found, it replace \p MI by a generic COPY
  845. /// operation.
  846. /// \pre isUncoalescableCopy(*MI) is true.
  847. /// \return True, when \p MI has been optimized. In that case, \p MI has
  848. /// been removed from its parent.
  849. /// All COPY instructions created, are inserted in \p LocalMIs.
  850. bool PeepholeOptimizer::optimizeUncoalescableCopy(
  851. MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
  852. assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
  853. // Check if we can rewrite all the values defined by this instruction.
  854. SmallVector<
  855. std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
  856. 4> RewritePairs;
  857. for (const MachineOperand &MODef : MI->defs()) {
  858. if (MODef.isDead())
  859. // We can ignore those.
  860. continue;
  861. // If a physical register is here, this is probably for a good reason.
  862. // Do not rewrite that.
  863. if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
  864. return false;
  865. // If we do not know how to rewrite this definition, there is no point
  866. // in trying to kill this instruction.
  867. TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
  868. TargetInstrInfo::RegSubRegPair Src = Def;
  869. if (!findNextSource(Src.Reg, Src.SubReg))
  870. return false;
  871. RewritePairs.push_back(std::make_pair(Def, Src));
  872. }
  873. // The change is possible for all defs, do it.
  874. for (const auto &PairDefSrc : RewritePairs) {
  875. const auto &Def = PairDefSrc.first;
  876. const auto &Src = PairDefSrc.second;
  877. // Rewrite the "copy" in a way the register coalescer understands.
  878. assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
  879. "We do not rewrite physical registers");
  880. const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
  881. unsigned NewVR = MRI->createVirtualRegister(DefRC);
  882. MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
  883. TII->get(TargetOpcode::COPY),
  884. NewVR).addReg(Src.Reg, 0, Src.SubReg);
  885. NewCopy->getOperand(0).setSubReg(Def.SubReg);
  886. if (Def.SubReg)
  887. NewCopy->getOperand(0).setIsUndef();
  888. LocalMIs.insert(NewCopy);
  889. MRI->replaceRegWith(Def.Reg, NewVR);
  890. MRI->clearKillFlags(NewVR);
  891. // We extended the lifetime of Src.
  892. // Clear the kill flags to account for that.
  893. MRI->clearKillFlags(Src.Reg);
  894. }
  895. // MI is now dead.
  896. MI->eraseFromParent();
  897. ++NumUncoalescableCopies;
  898. return true;
  899. }
  900. /// isLoadFoldable - Check whether MI is a candidate for folding into a later
  901. /// instruction. We only fold loads to virtual registers and the virtual
  902. /// register defined has a single use.
  903. bool PeepholeOptimizer::isLoadFoldable(
  904. MachineInstr *MI,
  905. SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
  906. if (!MI->canFoldAsLoad() || !MI->mayLoad())
  907. return false;
  908. const MCInstrDesc &MCID = MI->getDesc();
  909. if (MCID.getNumDefs() != 1)
  910. return false;
  911. unsigned Reg = MI->getOperand(0).getReg();
  912. // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
  913. // loads. It should be checked when processing uses of the load, since
  914. // uses can be removed during peephole.
  915. if (!MI->getOperand(0).getSubReg() &&
  916. TargetRegisterInfo::isVirtualRegister(Reg) &&
  917. MRI->hasOneNonDBGUse(Reg)) {
  918. FoldAsLoadDefCandidates.insert(Reg);
  919. return true;
  920. }
  921. return false;
  922. }
  923. bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
  924. SmallSet<unsigned, 4> &ImmDefRegs,
  925. DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
  926. const MCInstrDesc &MCID = MI->getDesc();
  927. if (!MI->isMoveImmediate())
  928. return false;
  929. if (MCID.getNumDefs() != 1)
  930. return false;
  931. unsigned Reg = MI->getOperand(0).getReg();
  932. if (TargetRegisterInfo::isVirtualRegister(Reg)) {
  933. ImmDefMIs.insert(std::make_pair(Reg, MI));
  934. ImmDefRegs.insert(Reg);
  935. return true;
  936. }
  937. return false;
  938. }
  939. /// foldImmediate - Try folding register operands that are defined by move
  940. /// immediate instructions, i.e. a trivial constant folding optimization, if
  941. /// and only if the def and use are in the same BB.
  942. bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
  943. SmallSet<unsigned, 4> &ImmDefRegs,
  944. DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
  945. for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
  946. MachineOperand &MO = MI->getOperand(i);
  947. if (!MO.isReg() || MO.isDef())
  948. continue;
  949. unsigned Reg = MO.getReg();
  950. if (!TargetRegisterInfo::isVirtualRegister(Reg))
  951. continue;
  952. if (ImmDefRegs.count(Reg) == 0)
  953. continue;
  954. DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
  955. assert(II != ImmDefMIs.end());
  956. if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
  957. ++NumImmFold;
  958. return true;
  959. }
  960. }
  961. return false;
  962. }
  963. bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
  964. if (skipOptnoneFunction(*MF.getFunction()))
  965. return false;
  966. DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
  967. DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
  968. if (DisablePeephole)
  969. return false;
  970. TII = MF.getSubtarget().getInstrInfo();
  971. TRI = MF.getSubtarget().getRegisterInfo();
  972. MRI = &MF.getRegInfo();
  973. DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
  974. bool Changed = false;
  975. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
  976. MachineBasicBlock *MBB = &*I;
  977. bool SeenMoveImm = false;
  978. // During this forward scan, at some point it needs to answer the question
  979. // "given a pointer to an MI in the current BB, is it located before or
  980. // after the current instruction".
  981. // To perform this, the following set keeps track of the MIs already seen
  982. // during the scan, if a MI is not in the set, it is assumed to be located
  983. // after. Newly created MIs have to be inserted in the set as well.
  984. SmallPtrSet<MachineInstr*, 16> LocalMIs;
  985. SmallSet<unsigned, 4> ImmDefRegs;
  986. DenseMap<unsigned, MachineInstr*> ImmDefMIs;
  987. SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
  988. for (MachineBasicBlock::iterator
  989. MII = I->begin(), MIE = I->end(); MII != MIE; ) {
  990. MachineInstr *MI = &*MII;
  991. // We may be erasing MI below, increment MII now.
  992. ++MII;
  993. LocalMIs.insert(MI);
  994. // Skip debug values. They should not affect this peephole optimization.
  995. if (MI->isDebugValue())
  996. continue;
  997. // If there exists an instruction which belongs to the following
  998. // categories, we will discard the load candidates.
  999. if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
  1000. MI->isKill() || MI->isInlineAsm() ||
  1001. MI->hasUnmodeledSideEffects()) {
  1002. FoldAsLoadDefCandidates.clear();
  1003. continue;
  1004. }
  1005. if (MI->mayStore() || MI->isCall())
  1006. FoldAsLoadDefCandidates.clear();
  1007. if ((isUncoalescableCopy(*MI) &&
  1008. optimizeUncoalescableCopy(MI, LocalMIs)) ||
  1009. (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
  1010. (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
  1011. // MI is deleted.
  1012. LocalMIs.erase(MI);
  1013. Changed = true;
  1014. continue;
  1015. }
  1016. if (MI->isConditionalBranch() && optimizeCondBranch(MI)) {
  1017. Changed = true;
  1018. continue;
  1019. }
  1020. if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
  1021. // MI is just rewritten.
  1022. Changed = true;
  1023. continue;
  1024. }
  1025. if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
  1026. SeenMoveImm = true;
  1027. } else {
  1028. Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
  1029. // optimizeExtInstr might have created new instructions after MI
  1030. // and before the already incremented MII. Adjust MII so that the
  1031. // next iteration sees the new instructions.
  1032. MII = MI;
  1033. ++MII;
  1034. if (SeenMoveImm)
  1035. Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
  1036. }
  1037. // Check whether MI is a load candidate for folding into a later
  1038. // instruction. If MI is not a candidate, check whether we can fold an
  1039. // earlier load into MI.
  1040. if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
  1041. !FoldAsLoadDefCandidates.empty()) {
  1042. const MCInstrDesc &MIDesc = MI->getDesc();
  1043. for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
  1044. ++i) {
  1045. const MachineOperand &MOp = MI->getOperand(i);
  1046. if (!MOp.isReg())
  1047. continue;
  1048. unsigned FoldAsLoadDefReg = MOp.getReg();
  1049. if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
  1050. // We need to fold load after optimizeCmpInstr, since
  1051. // optimizeCmpInstr can enable folding by converting SUB to CMP.
  1052. // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
  1053. // we need it for markUsesInDebugValueAsUndef().
  1054. unsigned FoldedReg = FoldAsLoadDefReg;
  1055. MachineInstr *DefMI = nullptr;
  1056. MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
  1057. FoldAsLoadDefReg,
  1058. DefMI);
  1059. if (FoldMI) {
  1060. // Update LocalMIs since we replaced MI with FoldMI and deleted
  1061. // DefMI.
  1062. DEBUG(dbgs() << "Replacing: " << *MI);
  1063. DEBUG(dbgs() << " With: " << *FoldMI);
  1064. LocalMIs.erase(MI);
  1065. LocalMIs.erase(DefMI);
  1066. LocalMIs.insert(FoldMI);
  1067. MI->eraseFromParent();
  1068. DefMI->eraseFromParent();
  1069. MRI->markUsesInDebugValueAsUndef(FoldedReg);
  1070. FoldAsLoadDefCandidates.erase(FoldedReg);
  1071. ++NumLoadFold;
  1072. // MI is replaced with FoldMI.
  1073. Changed = true;
  1074. break;
  1075. }
  1076. }
  1077. }
  1078. }
  1079. }
  1080. }
  1081. return Changed;
  1082. }
  1083. bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
  1084. unsigned &SrcSubReg) {
  1085. assert(Def->isCopy() && "Invalid definition");
  1086. // Copy instruction are supposed to be: Def = Src.
  1087. // If someone breaks this assumption, bad things will happen everywhere.
  1088. assert(Def->getNumOperands() == 2 && "Invalid number of operands");
  1089. if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
  1090. // If we look for a different subreg, it means we want a subreg of src.
  1091. // Bails as we do not support composing subreg yet.
  1092. return false;
  1093. // Otherwise, we want the whole source.
  1094. const MachineOperand &Src = Def->getOperand(1);
  1095. SrcReg = Src.getReg();
  1096. SrcSubReg = Src.getSubReg();
  1097. return true;
  1098. }
  1099. bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
  1100. unsigned &SrcSubReg) {
  1101. assert(Def->isBitcast() && "Invalid definition");
  1102. // Bail if there are effects that a plain copy will not expose.
  1103. if (Def->hasUnmodeledSideEffects())
  1104. return false;
  1105. // Bitcasts with more than one def are not supported.
  1106. if (Def->getDesc().getNumDefs() != 1)
  1107. return false;
  1108. if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
  1109. // If we look for a different subreg, it means we want a subreg of the src.
  1110. // Bails as we do not support composing subreg yet.
  1111. return false;
  1112. unsigned SrcIdx = Def->getNumOperands();
  1113. for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
  1114. ++OpIdx) {
  1115. const MachineOperand &MO = Def->getOperand(OpIdx);
  1116. if (!MO.isReg() || !MO.getReg())
  1117. continue;
  1118. assert(!MO.isDef() && "We should have skipped all the definitions by now");
  1119. if (SrcIdx != EndOpIdx)
  1120. // Multiple sources?
  1121. return false;
  1122. SrcIdx = OpIdx;
  1123. }
  1124. const MachineOperand &Src = Def->getOperand(SrcIdx);
  1125. SrcReg = Src.getReg();
  1126. SrcSubReg = Src.getSubReg();
  1127. return true;
  1128. }
  1129. bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
  1130. unsigned &SrcSubReg) {
  1131. assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
  1132. "Invalid definition");
  1133. if (Def->getOperand(DefIdx).getSubReg())
  1134. // If we are composing subreg, bails out.
  1135. // The case we are checking is Def.<subreg> = REG_SEQUENCE.
  1136. // This should almost never happen as the SSA property is tracked at
  1137. // the register level (as opposed to the subreg level).
  1138. // I.e.,
  1139. // Def.sub0 =
  1140. // Def.sub1 =
  1141. // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
  1142. // Def. Thus, it must not be generated.
  1143. // However, some code could theoretically generates a single
  1144. // Def.sub0 (i.e, not defining the other subregs) and we would
  1145. // have this case.
  1146. // If we can ascertain (or force) that this never happens, we could
  1147. // turn that into an assertion.
  1148. return false;
  1149. if (!TII)
  1150. // We could handle the REG_SEQUENCE here, but we do not want to
  1151. // duplicate the code from the generic TII.
  1152. return false;
  1153. SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
  1154. if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
  1155. return false;
  1156. // We are looking at:
  1157. // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
  1158. // Check if one of the operand defines the subreg we are interested in.
  1159. for (auto &RegSeqInput : RegSeqInputRegs) {
  1160. if (RegSeqInput.SubIdx == DefSubReg) {
  1161. if (RegSeqInput.SubReg)
  1162. // Bails if we have to compose sub registers.
  1163. return false;
  1164. SrcReg = RegSeqInput.Reg;
  1165. SrcSubReg = RegSeqInput.SubReg;
  1166. return true;
  1167. }
  1168. }
  1169. // If the subreg we are tracking is super-defined by another subreg,
  1170. // we could follow this value. However, this would require to compose
  1171. // the subreg and we do not do that for now.
  1172. return false;
  1173. }
  1174. bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
  1175. unsigned &SrcSubReg) {
  1176. assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
  1177. "Invalid definition");
  1178. if (Def->getOperand(DefIdx).getSubReg())
  1179. // If we are composing subreg, bails out.
  1180. // Same remark as getNextSourceFromRegSequence.
  1181. // I.e., this may be turned into an assert.
  1182. return false;
  1183. if (!TII)
  1184. // We could handle the REG_SEQUENCE here, but we do not want to
  1185. // duplicate the code from the generic TII.
  1186. return false;
  1187. TargetInstrInfo::RegSubRegPair BaseReg;
  1188. TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
  1189. if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
  1190. return false;
  1191. // We are looking at:
  1192. // Def = INSERT_SUBREG v0, v1, sub1
  1193. // There are two cases:
  1194. // 1. DefSubReg == sub1, get v1.
  1195. // 2. DefSubReg != sub1, the value may be available through v0.
  1196. // #1 Check if the inserted register matches the required sub index.
  1197. if (InsertedReg.SubIdx == DefSubReg) {
  1198. SrcReg = InsertedReg.Reg;
  1199. SrcSubReg = InsertedReg.SubReg;
  1200. return true;
  1201. }
  1202. // #2 Otherwise, if the sub register we are looking for is not partial
  1203. // defined by the inserted element, we can look through the main
  1204. // register (v0).
  1205. const MachineOperand &MODef = Def->getOperand(DefIdx);
  1206. // If the result register (Def) and the base register (v0) do not
  1207. // have the same register class or if we have to compose
  1208. // subregisters, bails out.
  1209. if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
  1210. BaseReg.SubReg)
  1211. return false;
  1212. // Get the TRI and check if the inserted sub-register overlaps with the
  1213. // sub-register we are tracking.
  1214. const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
  1215. if (!TRI ||
  1216. (TRI->getSubRegIndexLaneMask(DefSubReg) &
  1217. TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
  1218. return false;
  1219. // At this point, the value is available in v0 via the same subreg
  1220. // we used for Def.
  1221. SrcReg = BaseReg.Reg;
  1222. SrcSubReg = DefSubReg;
  1223. return true;
  1224. }
  1225. bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
  1226. unsigned &SrcSubReg) {
  1227. assert((Def->isExtractSubreg() ||
  1228. Def->isExtractSubregLike()) && "Invalid definition");
  1229. // We are looking at:
  1230. // Def = EXTRACT_SUBREG v0, sub0
  1231. // Bails if we have to compose sub registers.
  1232. // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
  1233. if (DefSubReg)
  1234. return false;
  1235. if (!TII)
  1236. // We could handle the EXTRACT_SUBREG here, but we do not want to
  1237. // duplicate the code from the generic TII.
  1238. return false;
  1239. TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
  1240. if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
  1241. return false;
  1242. // Bails if we have to compose sub registers.
  1243. // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
  1244. if (ExtractSubregInputReg.SubReg)
  1245. return false;
  1246. // Otherwise, the value is available in the v0.sub0.
  1247. SrcReg = ExtractSubregInputReg.Reg;
  1248. SrcSubReg = ExtractSubregInputReg.SubIdx;
  1249. return true;
  1250. }
  1251. bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
  1252. unsigned &SrcSubReg) {
  1253. assert(Def->isSubregToReg() && "Invalid definition");
  1254. // We are looking at:
  1255. // Def = SUBREG_TO_REG Imm, v0, sub0
  1256. // Bails if we have to compose sub registers.
  1257. // If DefSubReg != sub0, we would have to check that all the bits
  1258. // we track are included in sub0 and if yes, we would have to
  1259. // determine the right subreg in v0.
  1260. if (DefSubReg != Def->getOperand(3).getImm())
  1261. return false;
  1262. // Bails if we have to compose sub registers.
  1263. // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
  1264. if (Def->getOperand(2).getSubReg())
  1265. return false;
  1266. SrcReg = Def->getOperand(2).getReg();
  1267. SrcSubReg = Def->getOperand(3).getImm();
  1268. return true;
  1269. }
  1270. bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
  1271. assert(Def && "This method needs a valid definition");
  1272. assert(
  1273. (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
  1274. Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
  1275. if (Def->isCopy())
  1276. return getNextSourceFromCopy(SrcReg, SrcSubReg);
  1277. if (Def->isBitcast())
  1278. return getNextSourceFromBitcast(SrcReg, SrcSubReg);
  1279. // All the remaining cases involve "complex" instructions.
  1280. // Bails if we did not ask for the advanced tracking.
  1281. if (!UseAdvancedTracking)
  1282. return false;
  1283. if (Def->isRegSequence() || Def->isRegSequenceLike())
  1284. return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
  1285. if (Def->isInsertSubreg() || Def->isInsertSubregLike())
  1286. return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
  1287. if (Def->isExtractSubreg() || Def->isExtractSubregLike())
  1288. return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
  1289. if (Def->isSubregToReg())
  1290. return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
  1291. return false;
  1292. }
  1293. const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
  1294. unsigned &SrcSubReg) {
  1295. // If we reach a point where we cannot move up in the use-def chain,
  1296. // there is nothing we can get.
  1297. if (!Def)
  1298. return nullptr;
  1299. const MachineInstr *PrevDef = nullptr;
  1300. // Try to find the next source.
  1301. if (getNextSourceImpl(SrcReg, SrcSubReg)) {
  1302. // Update definition, definition index, and subregister for the
  1303. // next call of getNextSource.
  1304. // Update the current register.
  1305. Reg = SrcReg;
  1306. // Update the return value before moving up in the use-def chain.
  1307. PrevDef = Def;
  1308. // If we can still move up in the use-def chain, move to the next
  1309. // defintion.
  1310. if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
  1311. Def = MRI.getVRegDef(Reg);
  1312. DefIdx = MRI.def_begin(Reg).getOperandNo();
  1313. DefSubReg = SrcSubReg;
  1314. return PrevDef;
  1315. }
  1316. }
  1317. // If we end up here, this means we will not be able to find another source
  1318. // for the next iteration.
  1319. // Make sure any new call to getNextSource bails out early by cutting the
  1320. // use-def chain.
  1321. Def = nullptr;
  1322. return PrevDef;
  1323. }