IndVarSimplify.cpp 82 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111
  1. //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This transformation analyzes and transforms the induction variables (and
  11. // computations derived from them) into simpler forms suitable for subsequent
  12. // analysis and transformation.
  13. //
  14. // If the trip count of a loop is computable, this pass also makes the following
  15. // changes:
  16. // 1. The exit condition for the loop is canonicalized to compare the
  17. // induction value against the exit value. This turns loops like:
  18. // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
  19. // 2. Any use outside of the loop of an expression derived from the indvar
  20. // is changed to compute the derived value outside of the loop, eliminating
  21. // the dependence on the exit value of the induction variable. If the only
  22. // purpose of the loop is to compute the exit value of some derived
  23. // expression, this transformation will make the loop dead.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. #include "llvm/Transforms/Scalar.h"
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/Analysis/LoopInfo.h"
  31. #include "llvm/Analysis/LoopPass.h"
  32. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  33. #include "llvm/Analysis/TargetLibraryInfo.h"
  34. #include "llvm/Analysis/TargetTransformInfo.h"
  35. #include "llvm/IR/BasicBlock.h"
  36. #include "llvm/IR/CFG.h"
  37. #include "llvm/IR/Constants.h"
  38. #include "llvm/IR/DataLayout.h"
  39. #include "llvm/IR/Dominators.h"
  40. #include "llvm/IR/Instructions.h"
  41. #include "llvm/IR/IntrinsicInst.h"
  42. #include "llvm/IR/LLVMContext.h"
  43. #include "llvm/IR/PatternMatch.h"
  44. #include "llvm/IR/Type.h"
  45. #include "llvm/Support/CommandLine.h"
  46. #include "llvm/Support/Debug.h"
  47. #include "llvm/Support/raw_ostream.h"
  48. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  49. #include "llvm/Transforms/Utils/Local.h"
  50. #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  51. using namespace llvm;
  52. #define DEBUG_TYPE "indvars"
  53. STATISTIC(NumWidened , "Number of indvars widened");
  54. STATISTIC(NumReplaced , "Number of exit values replaced");
  55. STATISTIC(NumLFTR , "Number of loop exit tests replaced");
  56. STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
  57. STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
  58. #if 0 // HLSL Change Starts - option pending
  59. // Trip count verification can be enabled by default under NDEBUG if we
  60. // implement a strong expression equivalence checker in SCEV. Until then, we
  61. // use the verify-indvars flag, which may assert in some cases.
  62. static cl::opt<bool> VerifyIndvars(
  63. "verify-indvars", cl::Hidden,
  64. cl::desc("Verify the ScalarEvolution result after running indvars"));
  65. static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
  66. cl::desc("Reduce live induction variables."));
  67. enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
  68. static cl::opt<ReplaceExitVal> ReplaceExitValue(
  69. "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
  70. cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
  71. cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
  72. clEnumValN(OnlyCheapRepl, "cheap",
  73. "only replace exit value when the cost is cheap"),
  74. clEnumValN(AlwaysRepl, "always",
  75. "always replace exit value whenever possible"),
  76. clEnumValEnd));
  77. #else
  78. static const bool VerifyIndvars = false;
  79. static const bool ReduceLiveIVs = false;
  80. enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
  81. static const ReplaceExitVal ReplaceExitValue = OnlyCheapRepl;
  82. #endif
  83. namespace {
  84. struct RewritePhi;
  85. }
  86. namespace {
  87. class IndVarSimplify : public LoopPass {
  88. LoopInfo *LI;
  89. ScalarEvolution *SE;
  90. DominatorTree *DT;
  91. TargetLibraryInfo *TLI;
  92. const TargetTransformInfo *TTI;
  93. SmallVector<WeakVH, 16> DeadInsts;
  94. bool Changed;
  95. public:
  96. static char ID; // Pass identification, replacement for typeid
  97. IndVarSimplify()
  98. : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
  99. initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
  100. }
  101. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  102. void getAnalysisUsage(AnalysisUsage &AU) const override {
  103. AU.addRequired<DominatorTreeWrapperPass>();
  104. AU.addRequired<LoopInfoWrapperPass>();
  105. AU.addRequired<ScalarEvolution>();
  106. AU.addRequiredID(LoopSimplifyID);
  107. AU.addRequiredID(LCSSAID);
  108. AU.addPreserved<ScalarEvolution>();
  109. AU.addPreservedID(LoopSimplifyID);
  110. AU.addPreservedID(LCSSAID);
  111. AU.setPreservesCFG();
  112. }
  113. private:
  114. void releaseMemory() override {
  115. DeadInsts.clear();
  116. }
  117. bool isValidRewrite(Value *FromVal, Value *ToVal);
  118. void HandleFloatingPointIV(Loop *L, PHINode *PH);
  119. void RewriteNonIntegerIVs(Loop *L);
  120. void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
  121. bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
  122. void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
  123. Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
  124. PHINode *IndVar, SCEVExpander &Rewriter);
  125. void SinkUnusedInvariants(Loop *L);
  126. Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
  127. Instruction *InsertPt, Type *Ty,
  128. bool &IsHighCostExpansion);
  129. };
  130. }
  131. char IndVarSimplify::ID = 0;
  132. INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
  133. "Induction Variable Simplification", false, false)
  134. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  135. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  136. INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  137. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  138. INITIALIZE_PASS_DEPENDENCY(LCSSA)
  139. INITIALIZE_PASS_END(IndVarSimplify, "indvars",
  140. "Induction Variable Simplification", false, false)
  141. Pass *llvm::createIndVarSimplifyPass() {
  142. return new IndVarSimplify();
  143. }
  144. /// isValidRewrite - Return true if the SCEV expansion generated by the
  145. /// rewriter can replace the original value. SCEV guarantees that it
  146. /// produces the same value, but the way it is produced may be illegal IR.
  147. /// Ideally, this function will only be called for verification.
  148. bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
  149. // If an SCEV expression subsumed multiple pointers, its expansion could
  150. // reassociate the GEP changing the base pointer. This is illegal because the
  151. // final address produced by a GEP chain must be inbounds relative to its
  152. // underlying object. Otherwise basic alias analysis, among other things,
  153. // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
  154. // producing an expression involving multiple pointers. Until then, we must
  155. // bail out here.
  156. //
  157. // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
  158. // because it understands lcssa phis while SCEV does not.
  159. Value *FromPtr = FromVal;
  160. Value *ToPtr = ToVal;
  161. if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
  162. FromPtr = GEP->getPointerOperand();
  163. }
  164. if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
  165. ToPtr = GEP->getPointerOperand();
  166. }
  167. if (FromPtr != FromVal || ToPtr != ToVal) {
  168. // Quickly check the common case
  169. if (FromPtr == ToPtr)
  170. return true;
  171. // SCEV may have rewritten an expression that produces the GEP's pointer
  172. // operand. That's ok as long as the pointer operand has the same base
  173. // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
  174. // base of a recurrence. This handles the case in which SCEV expansion
  175. // converts a pointer type recurrence into a nonrecurrent pointer base
  176. // indexed by an integer recurrence.
  177. // If the GEP base pointer is a vector of pointers, abort.
  178. if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
  179. return false;
  180. const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
  181. const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
  182. if (FromBase == ToBase)
  183. return true;
  184. DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
  185. << *FromBase << " != " << *ToBase << "\n");
  186. return false;
  187. }
  188. return true;
  189. }
  190. /// Determine the insertion point for this user. By default, insert immediately
  191. /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
  192. /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
  193. /// common dominator for the incoming blocks.
  194. static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
  195. DominatorTree *DT) {
  196. PHINode *PHI = dyn_cast<PHINode>(User);
  197. if (!PHI)
  198. return User;
  199. Instruction *InsertPt = nullptr;
  200. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
  201. if (PHI->getIncomingValue(i) != Def)
  202. continue;
  203. BasicBlock *InsertBB = PHI->getIncomingBlock(i);
  204. if (!InsertPt) {
  205. InsertPt = InsertBB->getTerminator();
  206. continue;
  207. }
  208. InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
  209. InsertPt = InsertBB->getTerminator();
  210. }
  211. assert(InsertPt && "Missing phi operand");
  212. assert((!isa<Instruction>(Def) ||
  213. DT->dominates(cast<Instruction>(Def), InsertPt)) &&
  214. "def does not dominate all uses");
  215. return InsertPt;
  216. }
  217. //===----------------------------------------------------------------------===//
  218. // RewriteNonIntegerIVs and helpers. Prefer integer IVs.
  219. //===----------------------------------------------------------------------===//
  220. /// ConvertToSInt - Convert APF to an integer, if possible.
  221. static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
  222. bool isExact = false;
  223. // See if we can convert this to an int64_t
  224. uint64_t UIntVal;
  225. if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
  226. &isExact) != APFloat::opOK || !isExact)
  227. return false;
  228. IntVal = UIntVal;
  229. return true;
  230. }
  231. /// HandleFloatingPointIV - If the loop has floating induction variable
  232. /// then insert corresponding integer induction variable if possible.
  233. /// For example,
  234. /// for(double i = 0; i < 10000; ++i)
  235. /// bar(i)
  236. /// is converted into
  237. /// for(int i = 0; i < 10000; ++i)
  238. /// bar((double)i);
  239. ///
  240. void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
  241. unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
  242. unsigned BackEdge = IncomingEdge^1;
  243. // Check incoming value.
  244. ConstantFP *InitValueVal =
  245. dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
  246. int64_t InitValue;
  247. if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
  248. return;
  249. // Check IV increment. Reject this PN if increment operation is not
  250. // an add or increment value can not be represented by an integer.
  251. BinaryOperator *Incr =
  252. dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
  253. if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
  254. // If this is not an add of the PHI with a constantfp, or if the constant fp
  255. // is not an integer, bail out.
  256. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
  257. int64_t IncValue;
  258. if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
  259. !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
  260. return;
  261. // Check Incr uses. One user is PN and the other user is an exit condition
  262. // used by the conditional terminator.
  263. Value::user_iterator IncrUse = Incr->user_begin();
  264. Instruction *U1 = cast<Instruction>(*IncrUse++);
  265. if (IncrUse == Incr->user_end()) return;
  266. Instruction *U2 = cast<Instruction>(*IncrUse++);
  267. if (IncrUse != Incr->user_end()) return;
  268. // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
  269. // only used by a branch, we can't transform it.
  270. FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
  271. if (!Compare)
  272. Compare = dyn_cast<FCmpInst>(U2);
  273. if (!Compare || !Compare->hasOneUse() ||
  274. !isa<BranchInst>(Compare->user_back()))
  275. return;
  276. BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
  277. // We need to verify that the branch actually controls the iteration count
  278. // of the loop. If not, the new IV can overflow and no one will notice.
  279. // The branch block must be in the loop and one of the successors must be out
  280. // of the loop.
  281. assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
  282. if (!L->contains(TheBr->getParent()) ||
  283. (L->contains(TheBr->getSuccessor(0)) &&
  284. L->contains(TheBr->getSuccessor(1))))
  285. return;
  286. // If it isn't a comparison with an integer-as-fp (the exit value), we can't
  287. // transform it.
  288. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
  289. int64_t ExitValue;
  290. if (ExitValueVal == nullptr ||
  291. !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
  292. return;
  293. // Find new predicate for integer comparison.
  294. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
  295. switch (Compare->getPredicate()) {
  296. default: return; // Unknown comparison.
  297. case CmpInst::FCMP_OEQ:
  298. case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
  299. case CmpInst::FCMP_ONE:
  300. case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
  301. case CmpInst::FCMP_OGT:
  302. case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
  303. case CmpInst::FCMP_OGE:
  304. case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
  305. case CmpInst::FCMP_OLT:
  306. case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
  307. case CmpInst::FCMP_OLE:
  308. case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
  309. }
  310. // We convert the floating point induction variable to a signed i32 value if
  311. // we can. This is only safe if the comparison will not overflow in a way
  312. // that won't be trapped by the integer equivalent operations. Check for this
  313. // now.
  314. // TODO: We could use i64 if it is native and the range requires it.
  315. // The start/stride/exit values must all fit in signed i32.
  316. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
  317. return;
  318. // If not actually striding (add x, 0.0), avoid touching the code.
  319. if (IncValue == 0)
  320. return;
  321. // Positive and negative strides have different safety conditions.
  322. if (IncValue > 0) {
  323. // If we have a positive stride, we require the init to be less than the
  324. // exit value.
  325. if (InitValue >= ExitValue)
  326. return;
  327. uint32_t Range = uint32_t(ExitValue-InitValue);
  328. // Check for infinite loop, either:
  329. // while (i <= Exit) or until (i > Exit)
  330. if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
  331. if (++Range == 0) return; // Range overflows.
  332. }
  333. unsigned Leftover = Range % uint32_t(IncValue);
  334. // If this is an equality comparison, we require that the strided value
  335. // exactly land on the exit value, otherwise the IV condition will wrap
  336. // around and do things the fp IV wouldn't.
  337. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
  338. Leftover != 0)
  339. return;
  340. // If the stride would wrap around the i32 before exiting, we can't
  341. // transform the IV.
  342. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
  343. return;
  344. } else {
  345. // If we have a negative stride, we require the init to be greater than the
  346. // exit value.
  347. if (InitValue <= ExitValue)
  348. return;
  349. uint32_t Range = uint32_t(InitValue-ExitValue);
  350. // Check for infinite loop, either:
  351. // while (i >= Exit) or until (i < Exit)
  352. if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
  353. if (++Range == 0) return; // Range overflows.
  354. }
  355. unsigned Leftover = Range % uint32_t(-IncValue);
  356. // If this is an equality comparison, we require that the strided value
  357. // exactly land on the exit value, otherwise the IV condition will wrap
  358. // around and do things the fp IV wouldn't.
  359. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
  360. Leftover != 0)
  361. return;
  362. // If the stride would wrap around the i32 before exiting, we can't
  363. // transform the IV.
  364. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
  365. return;
  366. }
  367. IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
  368. // Insert new integer induction variable.
  369. PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
  370. NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
  371. PN->getIncomingBlock(IncomingEdge));
  372. Value *NewAdd =
  373. BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
  374. Incr->getName()+".int", Incr);
  375. NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
  376. ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
  377. ConstantInt::get(Int32Ty, ExitValue),
  378. Compare->getName());
  379. // In the following deletions, PN may become dead and may be deleted.
  380. // Use a WeakVH to observe whether this happens.
  381. WeakVH WeakPH = PN;
  382. // Delete the old floating point exit comparison. The branch starts using the
  383. // new comparison.
  384. NewCompare->takeName(Compare);
  385. Compare->replaceAllUsesWith(NewCompare);
  386. RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
  387. // Delete the old floating point increment.
  388. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
  389. RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
  390. // If the FP induction variable still has uses, this is because something else
  391. // in the loop uses its value. In order to canonicalize the induction
  392. // variable, we chose to eliminate the IV and rewrite it in terms of an
  393. // int->fp cast.
  394. //
  395. // We give preference to sitofp over uitofp because it is faster on most
  396. // platforms.
  397. if (WeakPH) {
  398. Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
  399. PN->getParent()->getFirstInsertionPt());
  400. PN->replaceAllUsesWith(Conv);
  401. RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
  402. }
  403. Changed = true;
  404. }
  405. void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
  406. // First step. Check to see if there are any floating-point recurrences.
  407. // If there are, change them into integer recurrences, permitting analysis by
  408. // the SCEV routines.
  409. //
  410. BasicBlock *Header = L->getHeader();
  411. SmallVector<WeakVH, 8> PHIs;
  412. for (BasicBlock::iterator I = Header->begin();
  413. PHINode *PN = dyn_cast<PHINode>(I); ++I)
  414. PHIs.push_back(PN);
  415. for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
  416. if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
  417. HandleFloatingPointIV(L, PN);
  418. // If the loop previously had floating-point IV, ScalarEvolution
  419. // may not have been able to compute a trip count. Now that we've done some
  420. // re-writing, the trip count may be computable.
  421. if (Changed)
  422. SE->forgetLoop(L);
  423. }
  424. namespace {
  425. // Collect information about PHI nodes which can be transformed in
  426. // RewriteLoopExitValues.
  427. struct RewritePhi {
  428. PHINode *PN;
  429. unsigned Ith; // Ith incoming value.
  430. Value *Val; // Exit value after expansion.
  431. bool HighCost; // High Cost when expansion.
  432. bool SafePhi; // LCSSASafePhiForRAUW.
  433. RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
  434. : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
  435. };
  436. }
  437. Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
  438. Loop *L, Instruction *InsertPt,
  439. Type *ResultTy,
  440. bool &IsHighCostExpansion) {
  441. using namespace llvm::PatternMatch;
  442. if (!Rewriter.isHighCostExpansion(S, L)) {
  443. IsHighCostExpansion = false;
  444. return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
  445. }
  446. // Before expanding S into an expensive LLVM expression, see if we can use an
  447. // already existing value as the expansion for S. There is potential to make
  448. // this significantly smarter, but this simple heuristic already gets some
  449. // interesting cases.
  450. SmallVector<BasicBlock *, 4> Latches;
  451. L->getLoopLatches(Latches);
  452. for (BasicBlock *BB : Latches) {
  453. ICmpInst::Predicate Pred;
  454. Instruction *LHS, *RHS;
  455. BasicBlock *TrueBB, *FalseBB;
  456. if (!match(BB->getTerminator(),
  457. m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
  458. TrueBB, FalseBB)))
  459. continue;
  460. if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
  461. IsHighCostExpansion = false;
  462. return LHS;
  463. }
  464. if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
  465. IsHighCostExpansion = false;
  466. return RHS;
  467. }
  468. }
  469. // We didn't find anything, fall back to using SCEVExpander.
  470. assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
  471. IsHighCostExpansion = true;
  472. return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
  473. }
  474. //===----------------------------------------------------------------------===//
  475. // RewriteLoopExitValues - Optimize IV users outside the loop.
  476. // As a side effect, reduces the amount of IV processing within the loop.
  477. //===----------------------------------------------------------------------===//
  478. /// RewriteLoopExitValues - Check to see if this loop has a computable
  479. /// loop-invariant execution count. If so, this means that we can compute the
  480. /// final value of any expressions that are recurrent in the loop, and
  481. /// substitute the exit values from the loop into any instructions outside of
  482. /// the loop that use the final values of the current expressions.
  483. ///
  484. /// This is mostly redundant with the regular IndVarSimplify activities that
  485. /// happen later, except that it's more powerful in some cases, because it's
  486. /// able to brute-force evaluate arbitrary instructions as long as they have
  487. /// constant operands at the beginning of the loop.
  488. void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
  489. // Verify the input to the pass in already in LCSSA form.
  490. assert(L->isLCSSAForm(*DT));
  491. SmallVector<BasicBlock*, 8> ExitBlocks;
  492. L->getUniqueExitBlocks(ExitBlocks);
  493. SmallVector<RewritePhi, 8> RewritePhiSet;
  494. // Find all values that are computed inside the loop, but used outside of it.
  495. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
  496. // the exit blocks of the loop to find them.
  497. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
  498. BasicBlock *ExitBB = ExitBlocks[i];
  499. // If there are no PHI nodes in this exit block, then no values defined
  500. // inside the loop are used on this path, skip it.
  501. PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
  502. if (!PN) continue;
  503. unsigned NumPreds = PN->getNumIncomingValues();
  504. // We would like to be able to RAUW single-incoming value PHI nodes. We
  505. // have to be certain this is safe even when this is an LCSSA PHI node.
  506. // While the computed exit value is no longer varying in *this* loop, the
  507. // exit block may be an exit block for an outer containing loop as well,
  508. // the exit value may be varying in the outer loop, and thus it may still
  509. // require an LCSSA PHI node. The safe case is when this is
  510. // single-predecessor PHI node (LCSSA) and the exit block containing it is
  511. // part of the enclosing loop, or this is the outer most loop of the nest.
  512. // In either case the exit value could (at most) be varying in the same
  513. // loop body as the phi node itself. Thus if it is in turn used outside of
  514. // an enclosing loop it will only be via a separate LCSSA node.
  515. bool LCSSASafePhiForRAUW =
  516. NumPreds == 1 &&
  517. (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
  518. // Iterate over all of the PHI nodes.
  519. BasicBlock::iterator BBI = ExitBB->begin();
  520. while ((PN = dyn_cast<PHINode>(BBI++))) {
  521. if (PN->use_empty())
  522. continue; // dead use, don't replace it
  523. // SCEV only supports integer expressions for now.
  524. if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
  525. continue;
  526. // It's necessary to tell ScalarEvolution about this explicitly so that
  527. // it can walk the def-use list and forget all SCEVs, as it may not be
  528. // watching the PHI itself. Once the new exit value is in place, there
  529. // may not be a def-use connection between the loop and every instruction
  530. // which got a SCEVAddRecExpr for that loop.
  531. SE->forgetValue(PN);
  532. // Iterate over all of the values in all the PHI nodes.
  533. for (unsigned i = 0; i != NumPreds; ++i) {
  534. // If the value being merged in is not integer or is not defined
  535. // in the loop, skip it.
  536. Value *InVal = PN->getIncomingValue(i);
  537. if (!isa<Instruction>(InVal))
  538. continue;
  539. // If this pred is for a subloop, not L itself, skip it.
  540. if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
  541. continue; // The Block is in a subloop, skip it.
  542. // Check that InVal is defined in the loop.
  543. Instruction *Inst = cast<Instruction>(InVal);
  544. if (!L->contains(Inst))
  545. continue;
  546. // Okay, this instruction has a user outside of the current loop
  547. // and varies predictably *inside* the loop. Evaluate the value it
  548. // contains when the loop exits, if possible.
  549. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
  550. if (!SE->isLoopInvariant(ExitValue, L) ||
  551. !isSafeToExpand(ExitValue, *SE))
  552. continue;
  553. // Computing the value outside of the loop brings no benefit if :
  554. // - it is definitely used inside the loop in a way which can not be
  555. // optimized away.
  556. // - no use outside of the loop can take advantage of hoisting the
  557. // computation out of the loop
  558. if (ExitValue->getSCEVType()>=scMulExpr) {
  559. unsigned NumHardInternalUses = 0;
  560. unsigned NumSoftExternalUses = 0;
  561. unsigned NumUses = 0;
  562. for (auto IB = Inst->user_begin(), IE = Inst->user_end();
  563. IB != IE && NumUses <= 6; ++IB) {
  564. Instruction *UseInstr = cast<Instruction>(*IB);
  565. unsigned Opc = UseInstr->getOpcode();
  566. NumUses++;
  567. if (L->contains(UseInstr)) {
  568. if (Opc == Instruction::Call || Opc == Instruction::Ret)
  569. NumHardInternalUses++;
  570. } else {
  571. if (Opc == Instruction::PHI) {
  572. // Do not count the Phi as a use. LCSSA may have inserted
  573. // plenty of trivial ones.
  574. NumUses--;
  575. for (auto PB = UseInstr->user_begin(),
  576. PE = UseInstr->user_end();
  577. PB != PE && NumUses <= 6; ++PB, ++NumUses) {
  578. unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
  579. if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
  580. NumSoftExternalUses++;
  581. }
  582. continue;
  583. }
  584. if (Opc != Instruction::Call && Opc != Instruction::Ret)
  585. NumSoftExternalUses++;
  586. }
  587. }
  588. if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
  589. continue;
  590. }
  591. bool HighCost = false;
  592. Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
  593. PN->getType(), HighCost);
  594. DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
  595. << " LoopVal = " << *Inst << "\n");
  596. if (!isValidRewrite(Inst, ExitVal)) {
  597. DeadInsts.push_back(ExitVal);
  598. continue;
  599. }
  600. // Collect all the candidate PHINodes to be rewritten.
  601. RewritePhiSet.push_back(
  602. RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));
  603. }
  604. }
  605. }
  606. bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
  607. // Transformation.
  608. for (const RewritePhi &Phi : RewritePhiSet) {
  609. PHINode *PN = Phi.PN;
  610. Value *ExitVal = Phi.Val;
  611. // Only do the rewrite when the ExitValue can be expanded cheaply.
  612. // If LoopCanBeDel is true, rewrite exit value aggressively.
  613. if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
  614. DeadInsts.push_back(ExitVal);
  615. continue;
  616. }
  617. Changed = true;
  618. ++NumReplaced;
  619. Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
  620. PN->setIncomingValue(Phi.Ith, ExitVal);
  621. // If this instruction is dead now, delete it. Don't do it now to avoid
  622. // invalidating iterators.
  623. if (isInstructionTriviallyDead(Inst, TLI))
  624. DeadInsts.push_back(Inst);
  625. // If we determined that this PHI is safe to replace even if an LCSSA
  626. // PHI, do so.
  627. if (Phi.SafePhi) {
  628. PN->replaceAllUsesWith(ExitVal);
  629. PN->eraseFromParent();
  630. }
  631. }
  632. // The insertion point instruction may have been deleted; clear it out
  633. // so that the rewriter doesn't trip over it later.
  634. Rewriter.clearInsertPoint();
  635. }
  636. /// CanLoopBeDeleted - Check whether it is possible to delete the loop after
  637. /// rewriting exit value. If it is possible, ignore ReplaceExitValue and
  638. /// do rewriting aggressively.
  639. bool IndVarSimplify::CanLoopBeDeleted(
  640. Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
  641. BasicBlock *Preheader = L->getLoopPreheader();
  642. // If there is no preheader, the loop will not be deleted.
  643. if (!Preheader)
  644. return false;
  645. // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
  646. // We obviate multiple ExitingBlocks case for simplicity.
  647. // TODO: If we see testcase with multiple ExitingBlocks can be deleted
  648. // after exit value rewriting, we can enhance the logic here.
  649. SmallVector<BasicBlock *, 4> ExitingBlocks;
  650. L->getExitingBlocks(ExitingBlocks);
  651. SmallVector<BasicBlock *, 8> ExitBlocks;
  652. L->getUniqueExitBlocks(ExitBlocks);
  653. if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
  654. return false;
  655. BasicBlock *ExitBlock = ExitBlocks[0];
  656. BasicBlock::iterator BI = ExitBlock->begin();
  657. while (PHINode *P = dyn_cast<PHINode>(BI)) {
  658. Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
  659. // If the Incoming value of P is found in RewritePhiSet, we know it
  660. // could be rewritten to use a loop invariant value in transformation
  661. // phase later. Skip it in the loop invariant check below.
  662. bool found = false;
  663. for (const RewritePhi &Phi : RewritePhiSet) {
  664. unsigned i = Phi.Ith;
  665. if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
  666. found = true;
  667. break;
  668. }
  669. }
  670. Instruction *I;
  671. if (!found && (I = dyn_cast<Instruction>(Incoming)))
  672. if (!L->hasLoopInvariantOperands(I))
  673. return false;
  674. ++BI;
  675. }
  676. for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
  677. LI != LE; ++LI) {
  678. for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
  679. ++BI) {
  680. if (BI->mayHaveSideEffects())
  681. return false;
  682. }
  683. }
  684. return true;
  685. }
  686. //===----------------------------------------------------------------------===//
  687. // IV Widening - Extend the width of an IV to cover its widest uses.
  688. //===----------------------------------------------------------------------===//
  689. namespace {
  690. // Collect information about induction variables that are used by sign/zero
  691. // extend operations. This information is recorded by CollectExtend and
  692. // provides the input to WidenIV.
  693. struct WideIVInfo {
  694. PHINode *NarrowIV;
  695. Type *WidestNativeType; // Widest integer type created [sz]ext
  696. bool IsSigned; // Was a sext user seen before a zext?
  697. WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
  698. IsSigned(false) {}
  699. };
  700. }
  701. /// visitCast - Update information about the induction variable that is
  702. /// extended by this sign or zero extend operation. This is used to determine
  703. /// the final width of the IV before actually widening it.
  704. static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
  705. const TargetTransformInfo *TTI) {
  706. bool IsSigned = Cast->getOpcode() == Instruction::SExt;
  707. if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
  708. return;
  709. Type *Ty = Cast->getType();
  710. uint64_t Width = SE->getTypeSizeInBits(Ty);
  711. if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
  712. return;
  713. // Cast is either an sext or zext up to this point.
  714. // We should not widen an indvar if arithmetics on the wider indvar are more
  715. // expensive than those on the narrower indvar. We check only the cost of ADD
  716. // because at least an ADD is required to increment the induction variable. We
  717. // could compute more comprehensively the cost of all instructions on the
  718. // induction variable when necessary.
  719. if (TTI &&
  720. TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
  721. TTI->getArithmeticInstrCost(Instruction::Add,
  722. Cast->getOperand(0)->getType())) {
  723. return;
  724. }
  725. if (!WI.WidestNativeType) {
  726. WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
  727. WI.IsSigned = IsSigned;
  728. return;
  729. }
  730. // We extend the IV to satisfy the sign of its first user, arbitrarily.
  731. if (WI.IsSigned != IsSigned)
  732. return;
  733. if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
  734. WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
  735. }
  736. namespace {
  737. /// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
  738. /// WideIV that computes the same value as the Narrow IV def. This avoids
  739. /// caching Use* pointers.
  740. struct NarrowIVDefUse {
  741. Instruction *NarrowDef;
  742. Instruction *NarrowUse;
  743. Instruction *WideDef;
  744. NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
  745. NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
  746. NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
  747. };
  748. /// WidenIV - The goal of this transform is to remove sign and zero extends
  749. /// without creating any new induction variables. To do this, it creates a new
  750. /// phi of the wider type and redirects all users, either removing extends or
  751. /// inserting truncs whenever we stop propagating the type.
  752. ///
  753. class WidenIV {
  754. // Parameters
  755. PHINode *OrigPhi;
  756. Type *WideType;
  757. bool IsSigned;
  758. // Context
  759. LoopInfo *LI;
  760. Loop *L;
  761. ScalarEvolution *SE;
  762. DominatorTree *DT;
  763. // Result
  764. PHINode *WidePhi;
  765. Instruction *WideInc;
  766. const SCEV *WideIncExpr;
  767. SmallVectorImpl<WeakVH> &DeadInsts;
  768. SmallPtrSet<Instruction*,16> Widened;
  769. SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
  770. public:
  771. WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
  772. ScalarEvolution *SEv, DominatorTree *DTree,
  773. SmallVectorImpl<WeakVH> &DI) :
  774. OrigPhi(WI.NarrowIV),
  775. WideType(WI.WidestNativeType),
  776. IsSigned(WI.IsSigned),
  777. LI(LInfo),
  778. L(LI->getLoopFor(OrigPhi->getParent())),
  779. SE(SEv),
  780. DT(DTree),
  781. WidePhi(nullptr),
  782. WideInc(nullptr),
  783. WideIncExpr(nullptr),
  784. DeadInsts(DI) {
  785. assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
  786. }
  787. PHINode *CreateWideIV(SCEVExpander &Rewriter);
  788. protected:
  789. Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
  790. Instruction *Use);
  791. Instruction *CloneIVUser(NarrowIVDefUse DU);
  792. const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
  793. const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
  794. const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
  795. unsigned OpCode) const;
  796. Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
  797. bool WidenLoopCompare(NarrowIVDefUse DU);
  798. void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
  799. };
  800. } // anonymous namespace
  801. /// isLoopInvariant - Perform a quick domtree based check for loop invariance
  802. /// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
  803. /// gratuitous for this purpose.
  804. static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
  805. Instruction *Inst = dyn_cast<Instruction>(V);
  806. if (!Inst)
  807. return true;
  808. return DT->properlyDominates(Inst->getParent(), L->getHeader());
  809. }
  810. Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
  811. Instruction *Use) {
  812. // Set the debug location and conservative insertion point.
  813. IRBuilder<> Builder(Use);
  814. // Hoist the insertion point into loop preheaders as far as possible.
  815. for (const Loop *L = LI->getLoopFor(Use->getParent());
  816. L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
  817. L = L->getParentLoop())
  818. Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
  819. return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
  820. Builder.CreateZExt(NarrowOper, WideType);
  821. }
  822. /// CloneIVUser - Instantiate a wide operation to replace a narrow
  823. /// operation. This only needs to handle operations that can evaluation to
  824. /// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
  825. Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
  826. unsigned Opcode = DU.NarrowUse->getOpcode();
  827. switch (Opcode) {
  828. default:
  829. return nullptr;
  830. case Instruction::Add:
  831. case Instruction::Mul:
  832. case Instruction::UDiv:
  833. case Instruction::Sub:
  834. case Instruction::And:
  835. case Instruction::Or:
  836. case Instruction::Xor:
  837. case Instruction::Shl:
  838. case Instruction::LShr:
  839. case Instruction::AShr:
  840. DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
  841. // Replace NarrowDef operands with WideDef. Otherwise, we don't know
  842. // anything about the narrow operand yet so must insert a [sz]ext. It is
  843. // probably loop invariant and will be folded or hoisted. If it actually
  844. // comes from a widened IV, it should be removed during a future call to
  845. // WidenIVUse.
  846. Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
  847. getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
  848. Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
  849. getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
  850. BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
  851. BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
  852. LHS, RHS,
  853. NarrowBO->getName());
  854. IRBuilder<> Builder(DU.NarrowUse);
  855. Builder.Insert(WideBO);
  856. if (const OverflowingBinaryOperator *OBO =
  857. dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
  858. if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
  859. if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
  860. }
  861. return WideBO;
  862. }
  863. }
  864. const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
  865. unsigned OpCode) const {
  866. if (OpCode == Instruction::Add)
  867. return SE->getAddExpr(LHS, RHS);
  868. if (OpCode == Instruction::Sub)
  869. return SE->getMinusSCEV(LHS, RHS);
  870. if (OpCode == Instruction::Mul)
  871. return SE->getMulExpr(LHS, RHS);
  872. llvm_unreachable("Unsupported opcode.");
  873. }
  874. /// No-wrap operations can transfer sign extension of their result to their
  875. /// operands. Generate the SCEV value for the widened operation without
  876. /// actually modifying the IR yet. If the expression after extending the
  877. /// operands is an AddRec for this loop, return it.
  878. const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
  879. // Handle the common case of add<nsw/nuw>
  880. const unsigned OpCode = DU.NarrowUse->getOpcode();
  881. // Only Add/Sub/Mul instructions supported yet.
  882. if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
  883. OpCode != Instruction::Mul)
  884. return nullptr;
  885. // One operand (NarrowDef) has already been extended to WideDef. Now determine
  886. // if extending the other will lead to a recurrence.
  887. const unsigned ExtendOperIdx =
  888. DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
  889. assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
  890. const SCEV *ExtendOperExpr = nullptr;
  891. const OverflowingBinaryOperator *OBO =
  892. cast<OverflowingBinaryOperator>(DU.NarrowUse);
  893. if (IsSigned && OBO->hasNoSignedWrap())
  894. ExtendOperExpr = SE->getSignExtendExpr(
  895. SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
  896. else if(!IsSigned && OBO->hasNoUnsignedWrap())
  897. ExtendOperExpr = SE->getZeroExtendExpr(
  898. SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
  899. else
  900. return nullptr;
  901. // When creating this SCEV expr, don't apply the current operations NSW or NUW
  902. // flags. This instruction may be guarded by control flow that the no-wrap
  903. // behavior depends on. Non-control-equivalent instructions can be mapped to
  904. // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
  905. // semantics to those operations.
  906. const SCEV *lhs = SE->getSCEV(DU.WideDef);
  907. const SCEV *rhs = ExtendOperExpr;
  908. // Let's swap operands to the initial order for the case of non-commutative
  909. // operations, like SUB. See PR21014.
  910. if (ExtendOperIdx == 0)
  911. std::swap(lhs, rhs);
  912. const SCEVAddRecExpr *AddRec =
  913. dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
  914. if (!AddRec || AddRec->getLoop() != L)
  915. return nullptr;
  916. return AddRec;
  917. }
  918. /// GetWideRecurrence - Is this instruction potentially interesting for further
  919. /// simplification after widening it's type? In other words, can the
  920. /// extend be safely hoisted out of the loop with SCEV reducing the value to a
  921. /// recurrence on the same loop. If so, return the sign or zero extended
  922. /// recurrence. Otherwise return NULL.
  923. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
  924. if (!SE->isSCEVable(NarrowUse->getType()))
  925. return nullptr;
  926. const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
  927. if (SE->getTypeSizeInBits(NarrowExpr->getType())
  928. >= SE->getTypeSizeInBits(WideType)) {
  929. // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
  930. // index. So don't follow this use.
  931. return nullptr;
  932. }
  933. const SCEV *WideExpr = IsSigned ?
  934. SE->getSignExtendExpr(NarrowExpr, WideType) :
  935. SE->getZeroExtendExpr(NarrowExpr, WideType);
  936. const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
  937. if (!AddRec || AddRec->getLoop() != L)
  938. return nullptr;
  939. return AddRec;
  940. }
  941. /// This IV user cannot be widen. Replace this use of the original narrow IV
  942. /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
  943. static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
  944. DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
  945. << " for user " << *DU.NarrowUse << "\n");
  946. IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
  947. Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
  948. DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
  949. }
  950. /// If the narrow use is a compare instruction, then widen the compare
  951. // (and possibly the other operand). The extend operation is hoisted into the
  952. // loop preheader as far as possible.
  953. bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
  954. ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
  955. if (!Cmp)
  956. return false;
  957. // Sign of IV user and compare must match.
  958. if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
  959. return false;
  960. Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
  961. unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
  962. unsigned IVWidth = SE->getTypeSizeInBits(WideType);
  963. assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
  964. // Widen the compare instruction.
  965. IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
  966. DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
  967. // Widen the other operand of the compare, if necessary.
  968. if (CastWidth < IVWidth) {
  969. Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
  970. DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
  971. }
  972. return true;
  973. }
  974. /// WidenIVUse - Determine whether an individual user of the narrow IV can be
  975. /// widened. If so, return the wide clone of the user.
  976. Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
  977. // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
  978. if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
  979. if (LI->getLoopFor(UsePhi->getParent()) != L) {
  980. // For LCSSA phis, sink the truncate outside the loop.
  981. // After SimplifyCFG most loop exit targets have a single predecessor.
  982. // Otherwise fall back to a truncate within the loop.
  983. if (UsePhi->getNumOperands() != 1)
  984. truncateIVUse(DU, DT);
  985. else {
  986. PHINode *WidePhi =
  987. PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
  988. UsePhi);
  989. WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
  990. IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
  991. Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
  992. UsePhi->replaceAllUsesWith(Trunc);
  993. DeadInsts.emplace_back(UsePhi);
  994. DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
  995. << " to " << *WidePhi << "\n");
  996. }
  997. return nullptr;
  998. }
  999. }
  1000. // Our raison d'etre! Eliminate sign and zero extension.
  1001. if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
  1002. Value *NewDef = DU.WideDef;
  1003. if (DU.NarrowUse->getType() != WideType) {
  1004. unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
  1005. unsigned IVWidth = SE->getTypeSizeInBits(WideType);
  1006. if (CastWidth < IVWidth) {
  1007. // The cast isn't as wide as the IV, so insert a Trunc.
  1008. IRBuilder<> Builder(DU.NarrowUse);
  1009. NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
  1010. }
  1011. else {
  1012. // A wider extend was hidden behind a narrower one. This may induce
  1013. // another round of IV widening in which the intermediate IV becomes
  1014. // dead. It should be very rare.
  1015. DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
  1016. << " not wide enough to subsume " << *DU.NarrowUse << "\n");
  1017. DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
  1018. NewDef = DU.NarrowUse;
  1019. }
  1020. }
  1021. if (NewDef != DU.NarrowUse) {
  1022. DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
  1023. << " replaced by " << *DU.WideDef << "\n");
  1024. ++NumElimExt;
  1025. DU.NarrowUse->replaceAllUsesWith(NewDef);
  1026. DeadInsts.emplace_back(DU.NarrowUse);
  1027. }
  1028. // Now that the extend is gone, we want to expose it's uses for potential
  1029. // further simplification. We don't need to directly inform SimplifyIVUsers
  1030. // of the new users, because their parent IV will be processed later as a
  1031. // new loop phi. If we preserved IVUsers analysis, we would also want to
  1032. // push the uses of WideDef here.
  1033. // No further widening is needed. The deceased [sz]ext had done it for us.
  1034. return nullptr;
  1035. }
  1036. // Does this user itself evaluate to a recurrence after widening?
  1037. const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
  1038. if (!WideAddRec)
  1039. WideAddRec = GetExtendedOperandRecurrence(DU);
  1040. if (!WideAddRec) {
  1041. // If use is a loop condition, try to promote the condition instead of
  1042. // truncating the IV first.
  1043. if (WidenLoopCompare(DU))
  1044. return nullptr;
  1045. // This user does not evaluate to a recurence after widening, so don't
  1046. // follow it. Instead insert a Trunc to kill off the original use,
  1047. // eventually isolating the original narrow IV so it can be removed.
  1048. truncateIVUse(DU, DT);
  1049. return nullptr;
  1050. }
  1051. // Assume block terminators cannot evaluate to a recurrence. We can't to
  1052. // insert a Trunc after a terminator if there happens to be a critical edge.
  1053. assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
  1054. "SCEV is not expected to evaluate a block terminator");
  1055. // Reuse the IV increment that SCEVExpander created as long as it dominates
  1056. // NarrowUse.
  1057. Instruction *WideUse = nullptr;
  1058. if (WideAddRec == WideIncExpr
  1059. && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
  1060. WideUse = WideInc;
  1061. else {
  1062. WideUse = CloneIVUser(DU);
  1063. if (!WideUse)
  1064. return nullptr;
  1065. }
  1066. // Evaluation of WideAddRec ensured that the narrow expression could be
  1067. // extended outside the loop without overflow. This suggests that the wide use
  1068. // evaluates to the same expression as the extended narrow use, but doesn't
  1069. // absolutely guarantee it. Hence the following failsafe check. In rare cases
  1070. // where it fails, we simply throw away the newly created wide use.
  1071. if (WideAddRec != SE->getSCEV(WideUse)) {
  1072. DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
  1073. << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
  1074. DeadInsts.emplace_back(WideUse);
  1075. return nullptr;
  1076. }
  1077. // Returning WideUse pushes it on the worklist.
  1078. return WideUse;
  1079. }
  1080. /// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
  1081. ///
  1082. void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
  1083. for (User *U : NarrowDef->users()) {
  1084. Instruction *NarrowUser = cast<Instruction>(U);
  1085. // Handle data flow merges and bizarre phi cycles.
  1086. if (!Widened.insert(NarrowUser).second)
  1087. continue;
  1088. NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
  1089. }
  1090. }
  1091. /// CreateWideIV - Process a single induction variable. First use the
  1092. /// SCEVExpander to create a wide induction variable that evaluates to the same
  1093. /// recurrence as the original narrow IV. Then use a worklist to forward
  1094. /// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
  1095. /// interesting IV users, the narrow IV will be isolated for removal by
  1096. /// DeleteDeadPHIs.
  1097. ///
  1098. /// It would be simpler to delete uses as they are processed, but we must avoid
  1099. /// invalidating SCEV expressions.
  1100. ///
  1101. PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
  1102. // Is this phi an induction variable?
  1103. const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
  1104. if (!AddRec)
  1105. return nullptr;
  1106. // Widen the induction variable expression.
  1107. const SCEV *WideIVExpr = IsSigned ?
  1108. SE->getSignExtendExpr(AddRec, WideType) :
  1109. SE->getZeroExtendExpr(AddRec, WideType);
  1110. assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
  1111. "Expect the new IV expression to preserve its type");
  1112. // Can the IV be extended outside the loop without overflow?
  1113. AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
  1114. if (!AddRec || AddRec->getLoop() != L)
  1115. return nullptr;
  1116. // An AddRec must have loop-invariant operands. Since this AddRec is
  1117. // materialized by a loop header phi, the expression cannot have any post-loop
  1118. // operands, so they must dominate the loop header.
  1119. assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
  1120. SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
  1121. && "Loop header phi recurrence inputs do not dominate the loop");
  1122. // The rewriter provides a value for the desired IV expression. This may
  1123. // either find an existing phi or materialize a new one. Either way, we
  1124. // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
  1125. // of the phi-SCC dominates the loop entry.
  1126. Instruction *InsertPt = L->getHeader()->begin();
  1127. WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
  1128. // Remembering the WideIV increment generated by SCEVExpander allows
  1129. // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
  1130. // employ a general reuse mechanism because the call above is the only call to
  1131. // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
  1132. if (BasicBlock *LatchBlock = L->getLoopLatch()) {
  1133. WideInc =
  1134. cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
  1135. WideIncExpr = SE->getSCEV(WideInc);
  1136. }
  1137. DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
  1138. ++NumWidened;
  1139. // Traverse the def-use chain using a worklist starting at the original IV.
  1140. assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
  1141. Widened.insert(OrigPhi);
  1142. pushNarrowIVUsers(OrigPhi, WidePhi);
  1143. while (!NarrowIVUsers.empty()) {
  1144. NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
  1145. // Process a def-use edge. This may replace the use, so don't hold a
  1146. // use_iterator across it.
  1147. Instruction *WideUse = WidenIVUse(DU, Rewriter);
  1148. // Follow all def-use edges from the previous narrow use.
  1149. if (WideUse)
  1150. pushNarrowIVUsers(DU.NarrowUse, WideUse);
  1151. // WidenIVUse may have removed the def-use edge.
  1152. if (DU.NarrowDef->use_empty())
  1153. DeadInsts.emplace_back(DU.NarrowDef);
  1154. }
  1155. return WidePhi;
  1156. }
  1157. //===----------------------------------------------------------------------===//
  1158. // Live IV Reduction - Minimize IVs live across the loop.
  1159. //===----------------------------------------------------------------------===//
  1160. //===----------------------------------------------------------------------===//
  1161. // Simplification of IV users based on SCEV evaluation.
  1162. //===----------------------------------------------------------------------===//
  1163. namespace {
  1164. class IndVarSimplifyVisitor : public IVVisitor {
  1165. ScalarEvolution *SE;
  1166. const TargetTransformInfo *TTI;
  1167. PHINode *IVPhi;
  1168. public:
  1169. WideIVInfo WI;
  1170. IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
  1171. const TargetTransformInfo *TTI,
  1172. const DominatorTree *DTree)
  1173. : SE(SCEV), TTI(TTI), IVPhi(IV) {
  1174. DT = DTree;
  1175. WI.NarrowIV = IVPhi;
  1176. if (ReduceLiveIVs)
  1177. setSplitOverflowIntrinsics();
  1178. }
  1179. // Implement the interface used by simplifyUsersOfIV.
  1180. void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
  1181. };
  1182. }
  1183. /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
  1184. /// users. Each successive simplification may push more users which may
  1185. /// themselves be candidates for simplification.
  1186. ///
  1187. /// Sign/Zero extend elimination is interleaved with IV simplification.
  1188. ///
  1189. void IndVarSimplify::SimplifyAndExtend(Loop *L,
  1190. SCEVExpander &Rewriter,
  1191. LPPassManager &LPM) {
  1192. SmallVector<WideIVInfo, 8> WideIVs;
  1193. SmallVector<PHINode*, 8> LoopPhis;
  1194. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
  1195. LoopPhis.push_back(cast<PHINode>(I));
  1196. }
  1197. // Each round of simplification iterates through the SimplifyIVUsers worklist
  1198. // for all current phis, then determines whether any IVs can be
  1199. // widened. Widening adds new phis to LoopPhis, inducing another round of
  1200. // simplification on the wide IVs.
  1201. while (!LoopPhis.empty()) {
  1202. // Evaluate as many IV expressions as possible before widening any IVs. This
  1203. // forces SCEV to set no-wrap flags before evaluating sign/zero
  1204. // extension. The first time SCEV attempts to normalize sign/zero extension,
  1205. // the result becomes final. So for the most predictable results, we delay
  1206. // evaluation of sign/zero extend evaluation until needed, and avoid running
  1207. // other SCEV based analysis prior to SimplifyAndExtend.
  1208. do {
  1209. PHINode *CurrIV = LoopPhis.pop_back_val();
  1210. // Information about sign/zero extensions of CurrIV.
  1211. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
  1212. Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
  1213. if (Visitor.WI.WidestNativeType) {
  1214. WideIVs.push_back(Visitor.WI);
  1215. }
  1216. } while(!LoopPhis.empty());
  1217. for (; !WideIVs.empty(); WideIVs.pop_back()) {
  1218. WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
  1219. if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
  1220. Changed = true;
  1221. LoopPhis.push_back(WidePhi);
  1222. }
  1223. }
  1224. }
  1225. }
  1226. //===----------------------------------------------------------------------===//
  1227. // LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
  1228. //===----------------------------------------------------------------------===//
  1229. /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
  1230. /// count expression can be safely and cheaply expanded into an instruction
  1231. /// sequence that can be used by LinearFunctionTestReplace.
  1232. ///
  1233. /// TODO: This fails for pointer-type loop counters with greater than one byte
  1234. /// strides, consequently preventing LFTR from running. For the purpose of LFTR
  1235. /// we could skip this check in the case that the LFTR loop counter (chosen by
  1236. /// FindLoopCounter) is also pointer type. Instead, we could directly convert
  1237. /// the loop test to an inequality test by checking the target data's alignment
  1238. /// of element types (given that the initial pointer value originates from or is
  1239. /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
  1240. /// However, we don't yet have a strong motivation for converting loop tests
  1241. /// into inequality tests.
  1242. static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
  1243. SCEVExpander &Rewriter) {
  1244. const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
  1245. if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
  1246. BackedgeTakenCount->isZero())
  1247. return false;
  1248. if (!L->getExitingBlock())
  1249. return false;
  1250. // Can't rewrite non-branch yet.
  1251. if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
  1252. return false;
  1253. if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
  1254. return false;
  1255. return true;
  1256. }
  1257. /// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
  1258. /// invariant value to the phi.
  1259. static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
  1260. Instruction *IncI = dyn_cast<Instruction>(IncV);
  1261. if (!IncI)
  1262. return nullptr;
  1263. switch (IncI->getOpcode()) {
  1264. case Instruction::Add:
  1265. case Instruction::Sub:
  1266. break;
  1267. case Instruction::GetElementPtr:
  1268. // An IV counter must preserve its type.
  1269. if (IncI->getNumOperands() == 2)
  1270. break;
  1271. default:
  1272. return nullptr;
  1273. }
  1274. PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
  1275. if (Phi && Phi->getParent() == L->getHeader()) {
  1276. if (isLoopInvariant(IncI->getOperand(1), L, DT))
  1277. return Phi;
  1278. return nullptr;
  1279. }
  1280. if (IncI->getOpcode() == Instruction::GetElementPtr)
  1281. return nullptr;
  1282. // Allow add/sub to be commuted.
  1283. Phi = dyn_cast<PHINode>(IncI->getOperand(1));
  1284. if (Phi && Phi->getParent() == L->getHeader()) {
  1285. if (isLoopInvariant(IncI->getOperand(0), L, DT))
  1286. return Phi;
  1287. }
  1288. return nullptr;
  1289. }
  1290. /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
  1291. static ICmpInst *getLoopTest(Loop *L) {
  1292. assert(L->getExitingBlock() && "expected loop exit");
  1293. BasicBlock *LatchBlock = L->getLoopLatch();
  1294. // Don't bother with LFTR if the loop is not properly simplified.
  1295. if (!LatchBlock)
  1296. return nullptr;
  1297. BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
  1298. assert(BI && "expected exit branch");
  1299. return dyn_cast<ICmpInst>(BI->getCondition());
  1300. }
  1301. /// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
  1302. /// that the current exit test is already sufficiently canonical.
  1303. static bool needsLFTR(Loop *L, DominatorTree *DT) {
  1304. // Do LFTR to simplify the exit condition to an ICMP.
  1305. ICmpInst *Cond = getLoopTest(L);
  1306. if (!Cond)
  1307. return true;
  1308. // Do LFTR to simplify the exit ICMP to EQ/NE
  1309. ICmpInst::Predicate Pred = Cond->getPredicate();
  1310. if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
  1311. return true;
  1312. // Look for a loop invariant RHS
  1313. Value *LHS = Cond->getOperand(0);
  1314. Value *RHS = Cond->getOperand(1);
  1315. if (!isLoopInvariant(RHS, L, DT)) {
  1316. if (!isLoopInvariant(LHS, L, DT))
  1317. return true;
  1318. std::swap(LHS, RHS);
  1319. }
  1320. // Look for a simple IV counter LHS
  1321. PHINode *Phi = dyn_cast<PHINode>(LHS);
  1322. if (!Phi)
  1323. Phi = getLoopPhiForCounter(LHS, L, DT);
  1324. if (!Phi)
  1325. return true;
  1326. // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
  1327. int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
  1328. if (Idx < 0)
  1329. return true;
  1330. // Do LFTR if the exit condition's IV is *not* a simple counter.
  1331. Value *IncV = Phi->getIncomingValue(Idx);
  1332. return Phi != getLoopPhiForCounter(IncV, L, DT);
  1333. }
  1334. /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
  1335. /// down to checking that all operands are constant and listing instructions
  1336. /// that may hide undef.
  1337. static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
  1338. unsigned Depth) {
  1339. if (isa<Constant>(V))
  1340. return !isa<UndefValue>(V);
  1341. if (Depth >= 6)
  1342. return false;
  1343. // Conservatively handle non-constant non-instructions. For example, Arguments
  1344. // may be undef.
  1345. Instruction *I = dyn_cast<Instruction>(V);
  1346. if (!I)
  1347. return false;
  1348. // Load and return values may be undef.
  1349. if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
  1350. return false;
  1351. // Optimistically handle other instructions.
  1352. for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
  1353. if (!Visited.insert(*OI).second)
  1354. continue;
  1355. if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
  1356. return false;
  1357. }
  1358. return true;
  1359. }
  1360. /// Return true if the given value is concrete. We must prove that undef can
  1361. /// never reach it.
  1362. ///
  1363. /// TODO: If we decide that this is a good approach to checking for undef, we
  1364. /// may factor it into a common location.
  1365. static bool hasConcreteDef(Value *V) {
  1366. SmallPtrSet<Value*, 8> Visited;
  1367. Visited.insert(V);
  1368. return hasConcreteDefImpl(V, Visited, 0);
  1369. }
  1370. /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
  1371. /// be rewritten) loop exit test.
  1372. static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
  1373. int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
  1374. Value *IncV = Phi->getIncomingValue(LatchIdx);
  1375. for (User *U : Phi->users())
  1376. if (U != Cond && U != IncV) return false;
  1377. for (User *U : IncV->users())
  1378. if (U != Cond && U != Phi) return false;
  1379. return true;
  1380. }
  1381. /// FindLoopCounter - Find an affine IV in canonical form.
  1382. ///
  1383. /// BECount may be an i8* pointer type. The pointer difference is already
  1384. /// valid count without scaling the address stride, so it remains a pointer
  1385. /// expression as far as SCEV is concerned.
  1386. ///
  1387. /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
  1388. ///
  1389. /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
  1390. ///
  1391. /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
  1392. /// This is difficult in general for SCEV because of potential overflow. But we
  1393. /// could at least handle constant BECounts.
  1394. static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
  1395. ScalarEvolution *SE, DominatorTree *DT) {
  1396. uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
  1397. Value *Cond =
  1398. cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
  1399. // Loop over all of the PHI nodes, looking for a simple counter.
  1400. PHINode *BestPhi = nullptr;
  1401. const SCEV *BestInit = nullptr;
  1402. BasicBlock *LatchBlock = L->getLoopLatch();
  1403. assert(LatchBlock && "needsLFTR should guarantee a loop latch");
  1404. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
  1405. PHINode *Phi = cast<PHINode>(I);
  1406. if (!SE->isSCEVable(Phi->getType()))
  1407. continue;
  1408. // Avoid comparing an integer IV against a pointer Limit.
  1409. if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
  1410. continue;
  1411. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
  1412. if (!AR || AR->getLoop() != L || !AR->isAffine())
  1413. continue;
  1414. // AR may be a pointer type, while BECount is an integer type.
  1415. // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
  1416. // AR may not be a narrower type, or we may never exit.
  1417. uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
  1418. if (PhiWidth < BCWidth ||
  1419. !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
  1420. continue;
  1421. const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
  1422. if (!Step || !Step->isOne())
  1423. continue;
  1424. int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
  1425. Value *IncV = Phi->getIncomingValue(LatchIdx);
  1426. if (getLoopPhiForCounter(IncV, L, DT) != Phi)
  1427. continue;
  1428. // Avoid reusing a potentially undef value to compute other values that may
  1429. // have originally had a concrete definition.
  1430. if (!hasConcreteDef(Phi)) {
  1431. // We explicitly allow unknown phis as long as they are already used by
  1432. // the loop test. In this case we assume that performing LFTR could not
  1433. // increase the number of undef users.
  1434. if (ICmpInst *Cond = getLoopTest(L)) {
  1435. if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
  1436. && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
  1437. continue;
  1438. }
  1439. }
  1440. }
  1441. const SCEV *Init = AR->getStart();
  1442. if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
  1443. // Don't force a live loop counter if another IV can be used.
  1444. if (AlmostDeadIV(Phi, LatchBlock, Cond))
  1445. continue;
  1446. // Prefer to count-from-zero. This is a more "canonical" counter form. It
  1447. // also prefers integer to pointer IVs.
  1448. if (BestInit->isZero() != Init->isZero()) {
  1449. if (BestInit->isZero())
  1450. continue;
  1451. }
  1452. // If two IVs both count from zero or both count from nonzero then the
  1453. // narrower is likely a dead phi that has been widened. Use the wider phi
  1454. // to allow the other to be eliminated.
  1455. else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
  1456. continue;
  1457. }
  1458. BestPhi = Phi;
  1459. BestInit = Init;
  1460. }
  1461. return BestPhi;
  1462. }
  1463. /// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
  1464. /// holds the RHS of the new loop test.
  1465. static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
  1466. SCEVExpander &Rewriter, ScalarEvolution *SE) {
  1467. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
  1468. assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
  1469. const SCEV *IVInit = AR->getStart();
  1470. // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
  1471. // finds a valid pointer IV. Sign extend BECount in order to materialize a
  1472. // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
  1473. // the existing GEPs whenever possible.
  1474. if (IndVar->getType()->isPointerTy()
  1475. && !IVCount->getType()->isPointerTy()) {
  1476. // IVOffset will be the new GEP offset that is interpreted by GEP as a
  1477. // signed value. IVCount on the other hand represents the loop trip count,
  1478. // which is an unsigned value. FindLoopCounter only allows induction
  1479. // variables that have a positive unit stride of one. This means we don't
  1480. // have to handle the case of negative offsets (yet) and just need to zero
  1481. // extend IVCount.
  1482. Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
  1483. const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
  1484. // Expand the code for the iteration count.
  1485. assert(SE->isLoopInvariant(IVOffset, L) &&
  1486. "Computed iteration count is not loop invariant!");
  1487. BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
  1488. Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
  1489. Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
  1490. assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
  1491. // We could handle pointer IVs other than i8*, but we need to compensate for
  1492. // gep index scaling. See canExpandBackedgeTakenCount comments.
  1493. assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
  1494. cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
  1495. && "unit stride pointer IV must be i8*");
  1496. IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
  1497. return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
  1498. }
  1499. else {
  1500. // In any other case, convert both IVInit and IVCount to integers before
  1501. // comparing. This may result in SCEV expension of pointers, but in practice
  1502. // SCEV will fold the pointer arithmetic away as such:
  1503. // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
  1504. //
  1505. // Valid Cases: (1) both integers is most common; (2) both may be pointers
  1506. // for simple memset-style loops.
  1507. //
  1508. // IVInit integer and IVCount pointer would only occur if a canonical IV
  1509. // were generated on top of case #2, which is not expected.
  1510. const SCEV *IVLimit = nullptr;
  1511. // For unit stride, IVCount = Start + BECount with 2's complement overflow.
  1512. // For non-zero Start, compute IVCount here.
  1513. if (AR->getStart()->isZero())
  1514. IVLimit = IVCount;
  1515. else {
  1516. assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
  1517. const SCEV *IVInit = AR->getStart();
  1518. // For integer IVs, truncate the IV before computing IVInit + BECount.
  1519. if (SE->getTypeSizeInBits(IVInit->getType())
  1520. > SE->getTypeSizeInBits(IVCount->getType()))
  1521. IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
  1522. IVLimit = SE->getAddExpr(IVInit, IVCount);
  1523. }
  1524. // Expand the code for the iteration count.
  1525. BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
  1526. IRBuilder<> Builder(BI);
  1527. assert(SE->isLoopInvariant(IVLimit, L) &&
  1528. "Computed iteration count is not loop invariant!");
  1529. // Ensure that we generate the same type as IndVar, or a smaller integer
  1530. // type. In the presence of null pointer values, we have an integer type
  1531. // SCEV expression (IVInit) for a pointer type IV value (IndVar).
  1532. Type *LimitTy = IVCount->getType()->isPointerTy() ?
  1533. IndVar->getType() : IVCount->getType();
  1534. return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
  1535. }
  1536. }
  1537. /// LinearFunctionTestReplace - This method rewrites the exit condition of the
  1538. /// loop to be a canonical != comparison against the incremented loop induction
  1539. /// variable. This pass is able to rewrite the exit tests of any loop where the
  1540. /// SCEV analysis can determine a loop-invariant trip count of the loop, which
  1541. /// is actually a much broader range than just linear tests.
  1542. Value *IndVarSimplify::
  1543. LinearFunctionTestReplace(Loop *L,
  1544. const SCEV *BackedgeTakenCount,
  1545. PHINode *IndVar,
  1546. SCEVExpander &Rewriter) {
  1547. assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
  1548. // Initialize CmpIndVar and IVCount to their preincremented values.
  1549. Value *CmpIndVar = IndVar;
  1550. const SCEV *IVCount = BackedgeTakenCount;
  1551. // If the exiting block is the same as the backedge block, we prefer to
  1552. // compare against the post-incremented value, otherwise we must compare
  1553. // against the preincremented value.
  1554. if (L->getExitingBlock() == L->getLoopLatch()) {
  1555. // Add one to the "backedge-taken" count to get the trip count.
  1556. // This addition may overflow, which is valid as long as the comparison is
  1557. // truncated to BackedgeTakenCount->getType().
  1558. IVCount = SE->getAddExpr(BackedgeTakenCount,
  1559. SE->getConstant(BackedgeTakenCount->getType(), 1));
  1560. // The BackedgeTaken expression contains the number of times that the
  1561. // backedge branches to the loop header. This is one less than the
  1562. // number of times the loop executes, so use the incremented indvar.
  1563. CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
  1564. }
  1565. Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
  1566. assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
  1567. && "genLoopLimit missed a cast");
  1568. // Insert a new icmp_ne or icmp_eq instruction before the branch.
  1569. BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
  1570. ICmpInst::Predicate P;
  1571. if (L->contains(BI->getSuccessor(0)))
  1572. P = ICmpInst::ICMP_NE;
  1573. else
  1574. P = ICmpInst::ICMP_EQ;
  1575. DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
  1576. << " LHS:" << *CmpIndVar << '\n'
  1577. << " op:\t"
  1578. << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
  1579. << " RHS:\t" << *ExitCnt << "\n"
  1580. << " IVCount:\t" << *IVCount << "\n");
  1581. IRBuilder<> Builder(BI);
  1582. // LFTR can ignore IV overflow and truncate to the width of
  1583. // BECount. This avoids materializing the add(zext(add)) expression.
  1584. unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
  1585. unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
  1586. if (CmpIndVarSize > ExitCntSize) {
  1587. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
  1588. const SCEV *ARStart = AR->getStart();
  1589. const SCEV *ARStep = AR->getStepRecurrence(*SE);
  1590. // For constant IVCount, avoid truncation.
  1591. if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
  1592. const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
  1593. APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
  1594. // Note that the post-inc value of BackedgeTakenCount may have overflowed
  1595. // above such that IVCount is now zero.
  1596. if (IVCount != BackedgeTakenCount && Count == 0) {
  1597. Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
  1598. ++Count;
  1599. }
  1600. else
  1601. Count = Count.zext(CmpIndVarSize);
  1602. APInt NewLimit;
  1603. if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
  1604. NewLimit = Start - Count;
  1605. else
  1606. NewLimit = Start + Count;
  1607. ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
  1608. DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
  1609. } else {
  1610. CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
  1611. "lftr.wideiv");
  1612. }
  1613. }
  1614. Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
  1615. Value *OrigCond = BI->getCondition();
  1616. // It's tempting to use replaceAllUsesWith here to fully replace the old
  1617. // comparison, but that's not immediately safe, since users of the old
  1618. // comparison may not be dominated by the new comparison. Instead, just
  1619. // update the branch to use the new comparison; in the common case this
  1620. // will make old comparison dead.
  1621. BI->setCondition(Cond);
  1622. DeadInsts.push_back(OrigCond);
  1623. ++NumLFTR;
  1624. Changed = true;
  1625. return Cond;
  1626. }
  1627. //===----------------------------------------------------------------------===//
  1628. // SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
  1629. //===----------------------------------------------------------------------===//
  1630. /// If there's a single exit block, sink any loop-invariant values that
  1631. /// were defined in the preheader but not used inside the loop into the
  1632. /// exit block to reduce register pressure in the loop.
  1633. void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
  1634. BasicBlock *ExitBlock = L->getExitBlock();
  1635. if (!ExitBlock) return;
  1636. BasicBlock *Preheader = L->getLoopPreheader();
  1637. if (!Preheader) return;
  1638. Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
  1639. BasicBlock::iterator I = Preheader->getTerminator();
  1640. while (I != Preheader->begin()) {
  1641. --I;
  1642. // New instructions were inserted at the end of the preheader.
  1643. if (isa<PHINode>(I))
  1644. break;
  1645. // Don't move instructions which might have side effects, since the side
  1646. // effects need to complete before instructions inside the loop. Also don't
  1647. // move instructions which might read memory, since the loop may modify
  1648. // memory. Note that it's okay if the instruction might have undefined
  1649. // behavior: LoopSimplify guarantees that the preheader dominates the exit
  1650. // block.
  1651. if (I->mayHaveSideEffects() || I->mayReadFromMemory())
  1652. continue;
  1653. // Skip debug info intrinsics.
  1654. if (isa<DbgInfoIntrinsic>(I))
  1655. continue;
  1656. // Skip landingpad instructions.
  1657. if (isa<LandingPadInst>(I))
  1658. continue;
  1659. // Don't sink alloca: we never want to sink static alloca's out of the
  1660. // entry block, and correctly sinking dynamic alloca's requires
  1661. // checks for stacksave/stackrestore intrinsics.
  1662. // FIXME: Refactor this check somehow?
  1663. if (isa<AllocaInst>(I))
  1664. continue;
  1665. // Determine if there is a use in or before the loop (direct or
  1666. // otherwise).
  1667. bool UsedInLoop = false;
  1668. for (Use &U : I->uses()) {
  1669. Instruction *User = cast<Instruction>(U.getUser());
  1670. BasicBlock *UseBB = User->getParent();
  1671. if (PHINode *P = dyn_cast<PHINode>(User)) {
  1672. unsigned i =
  1673. PHINode::getIncomingValueNumForOperand(U.getOperandNo());
  1674. UseBB = P->getIncomingBlock(i);
  1675. }
  1676. if (UseBB == Preheader || L->contains(UseBB)) {
  1677. UsedInLoop = true;
  1678. break;
  1679. }
  1680. }
  1681. // If there is, the def must remain in the preheader.
  1682. if (UsedInLoop)
  1683. continue;
  1684. // Otherwise, sink it to the exit block.
  1685. Instruction *ToMove = I;
  1686. bool Done = false;
  1687. if (I != Preheader->begin()) {
  1688. // Skip debug info intrinsics.
  1689. do {
  1690. --I;
  1691. } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
  1692. if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
  1693. Done = true;
  1694. } else {
  1695. Done = true;
  1696. }
  1697. ToMove->moveBefore(InsertPt);
  1698. if (Done) break;
  1699. InsertPt = ToMove;
  1700. }
  1701. }
  1702. //===----------------------------------------------------------------------===//
  1703. // IndVarSimplify driver. Manage several subpasses of IV simplification.
  1704. //===----------------------------------------------------------------------===//
  1705. bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  1706. if (skipOptnoneFunction(L))
  1707. return false;
  1708. // If LoopSimplify form is not available, stay out of trouble. Some notes:
  1709. // - LSR currently only supports LoopSimplify-form loops. Indvars'
  1710. // canonicalization can be a pessimization without LSR to "clean up"
  1711. // afterwards.
  1712. // - We depend on having a preheader; in particular,
  1713. // Loop::getCanonicalInductionVariable only supports loops with preheaders,
  1714. // and we're in trouble if we can't find the induction variable even when
  1715. // we've manually inserted one.
  1716. if (!L->isLoopSimplifyForm())
  1717. return false;
  1718. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  1719. SE = &getAnalysis<ScalarEvolution>();
  1720. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1721. auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
  1722. TLI = TLIP ? &TLIP->getTLI() : nullptr;
  1723. auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
  1724. TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
  1725. const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
  1726. DeadInsts.clear();
  1727. Changed = false;
  1728. // If there are any floating-point recurrences, attempt to
  1729. // transform them to use integer recurrences.
  1730. RewriteNonIntegerIVs(L);
  1731. const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
  1732. // Create a rewriter object which we'll use to transform the code with.
  1733. SCEVExpander Rewriter(*SE, DL, "indvars");
  1734. #ifndef NDEBUG
  1735. Rewriter.setDebugType(DEBUG_TYPE);
  1736. #endif
  1737. // Eliminate redundant IV users.
  1738. //
  1739. // Simplification works best when run before other consumers of SCEV. We
  1740. // attempt to avoid evaluating SCEVs for sign/zero extend operations until
  1741. // other expressions involving loop IVs have been evaluated. This helps SCEV
  1742. // set no-wrap flags before normalizing sign/zero extension.
  1743. Rewriter.disableCanonicalMode();
  1744. SimplifyAndExtend(L, Rewriter, LPM);
  1745. // Check to see if this loop has a computable loop-invariant execution count.
  1746. // If so, this means that we can compute the final value of any expressions
  1747. // that are recurrent in the loop, and substitute the exit values from the
  1748. // loop into any instructions outside of the loop that use the final values of
  1749. // the current expressions.
  1750. //
  1751. if (ReplaceExitValue != NeverRepl &&
  1752. !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
  1753. RewriteLoopExitValues(L, Rewriter);
  1754. // Eliminate redundant IV cycles.
  1755. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
  1756. // If we have a trip count expression, rewrite the loop's exit condition
  1757. // using it. We can currently only handle loops with a single exit.
  1758. if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
  1759. PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
  1760. if (IndVar) {
  1761. // Check preconditions for proper SCEVExpander operation. SCEV does not
  1762. // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
  1763. // pass that uses the SCEVExpander must do it. This does not work well for
  1764. // loop passes because SCEVExpander makes assumptions about all loops,
  1765. // while LoopPassManager only forces the current loop to be simplified.
  1766. //
  1767. // FIXME: SCEV expansion has no way to bail out, so the caller must
  1768. // explicitly check any assumptions made by SCEV. Brittle.
  1769. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
  1770. if (!AR || AR->getLoop()->getLoopPreheader())
  1771. (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
  1772. Rewriter);
  1773. }
  1774. }
  1775. // Clear the rewriter cache, because values that are in the rewriter's cache
  1776. // can be deleted in the loop below, causing the AssertingVH in the cache to
  1777. // trigger.
  1778. Rewriter.clear();
  1779. // Now that we're done iterating through lists, clean up any instructions
  1780. // which are now dead.
  1781. while (!DeadInsts.empty())
  1782. if (Instruction *Inst =
  1783. dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
  1784. RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
  1785. // The Rewriter may not be used from this point on.
  1786. // Loop-invariant instructions in the preheader that aren't used in the
  1787. // loop may be sunk below the loop to reduce register pressure.
  1788. SinkUnusedInvariants(L);
  1789. // Clean up dead instructions.
  1790. Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
  1791. // Check a post-condition.
  1792. assert(L->isLCSSAForm(*DT) &&
  1793. "Indvars did not leave the loop in lcssa form!");
  1794. // Verify that LFTR, and any other change have not interfered with SCEV's
  1795. // ability to compute trip count.
  1796. #ifndef NDEBUG
  1797. if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
  1798. SE->forgetLoop(L);
  1799. const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
  1800. if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
  1801. SE->getTypeSizeInBits(NewBECount->getType()))
  1802. NewBECount = SE->getTruncateOrNoop(NewBECount,
  1803. BackedgeTakenCount->getType());
  1804. else
  1805. BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
  1806. NewBECount->getType());
  1807. assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
  1808. }
  1809. #endif
  1810. return Changed;
  1811. }