InductiveRangeCheckElimination.cpp 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495
  1. //===-- InductiveRangeCheckElimination.cpp - ------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. // The InductiveRangeCheckElimination pass splits a loop's iteration space into
  10. // three disjoint ranges. It does that in a way such that the loop running in
  11. // the middle loop provably does not need range checks. As an example, it will
  12. // convert
  13. //
  14. // len = < known positive >
  15. // for (i = 0; i < n; i++) {
  16. // if (0 <= i && i < len) {
  17. // do_something();
  18. // } else {
  19. // throw_out_of_bounds();
  20. // }
  21. // }
  22. //
  23. // to
  24. //
  25. // len = < known positive >
  26. // limit = smin(n, len)
  27. // // no first segment
  28. // for (i = 0; i < limit; i++) {
  29. // if (0 <= i && i < len) { // this check is fully redundant
  30. // do_something();
  31. // } else {
  32. // throw_out_of_bounds();
  33. // }
  34. // }
  35. // for (i = limit; i < n; i++) {
  36. // if (0 <= i && i < len) {
  37. // do_something();
  38. // } else {
  39. // throw_out_of_bounds();
  40. // }
  41. // }
  42. //===----------------------------------------------------------------------===//
  43. #include "llvm/ADT/Optional.h"
  44. #include "llvm/Analysis/BranchProbabilityInfo.h"
  45. #include "llvm/Analysis/InstructionSimplify.h"
  46. #include "llvm/Analysis/LoopInfo.h"
  47. #include "llvm/Analysis/LoopPass.h"
  48. #include "llvm/Analysis/ScalarEvolution.h"
  49. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  50. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  51. #include "llvm/Analysis/ValueTracking.h"
  52. #include "llvm/IR/Dominators.h"
  53. #include "llvm/IR/Function.h"
  54. #include "llvm/IR/IRBuilder.h"
  55. #include "llvm/IR/Instructions.h"
  56. #include "llvm/IR/Module.h"
  57. #include "llvm/IR/PatternMatch.h"
  58. #include "llvm/IR/ValueHandle.h"
  59. #include "llvm/IR/Verifier.h"
  60. #include "llvm/Pass.h"
  61. #include "llvm/Support/Debug.h"
  62. #include "llvm/Support/raw_ostream.h"
  63. #include "llvm/Transforms/Scalar.h"
  64. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  65. #include "llvm/Transforms/Utils/Cloning.h"
  66. #include "llvm/Transforms/Utils/LoopUtils.h"
  67. #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  68. #include "llvm/Transforms/Utils/UnrollLoop.h"
  69. #include <array>
  70. using namespace llvm;
  71. static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,
  72. cl::init(64));
  73. static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
  74. cl::init(false));
  75. static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
  76. cl::init(false));
  77. static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
  78. cl::Hidden, cl::init(10));
  79. #define DEBUG_TYPE "irce"
  80. namespace {
  81. /// An inductive range check is conditional branch in a loop with
  82. ///
  83. /// 1. a very cold successor (i.e. the branch jumps to that successor very
  84. /// rarely)
  85. ///
  86. /// and
  87. ///
  88. /// 2. a condition that is provably true for some contiguous range of values
  89. /// taken by the containing loop's induction variable.
  90. ///
  91. class InductiveRangeCheck {
  92. // Classifies a range check
  93. enum RangeCheckKind : unsigned {
  94. // Range check of the form "0 <= I".
  95. RANGE_CHECK_LOWER = 1,
  96. // Range check of the form "I < L" where L is known positive.
  97. RANGE_CHECK_UPPER = 2,
  98. // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER
  99. // conditions.
  100. RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER,
  101. // Unrecognized range check condition.
  102. RANGE_CHECK_UNKNOWN = (unsigned)-1
  103. };
  104. static const char *rangeCheckKindToStr(RangeCheckKind);
  105. const SCEV *Offset;
  106. const SCEV *Scale;
  107. Value *Length;
  108. BranchInst *Branch;
  109. RangeCheckKind Kind;
  110. static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
  111. ScalarEvolution &SE, Value *&Index,
  112. Value *&Length);
  113. static InductiveRangeCheck::RangeCheckKind
  114. parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition,
  115. const SCEV *&Index, Value *&UpperLimit);
  116. InductiveRangeCheck() :
  117. Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { }
  118. public:
  119. const SCEV *getOffset() const { return Offset; }
  120. const SCEV *getScale() const { return Scale; }
  121. Value *getLength() const { return Length; }
  122. void print(raw_ostream &OS) const {
  123. OS << "InductiveRangeCheck:\n";
  124. OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n";
  125. OS << " Offset: ";
  126. Offset->print(OS);
  127. OS << " Scale: ";
  128. Scale->print(OS);
  129. OS << " Length: ";
  130. if (Length)
  131. Length->print(OS);
  132. else
  133. OS << "(null)";
  134. OS << "\n Branch: ";
  135. getBranch()->print(OS);
  136. OS << "\n";
  137. }
  138. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  139. void dump() {
  140. print(dbgs());
  141. }
  142. #endif
  143. BranchInst *getBranch() const { return Branch; }
  144. /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If
  145. /// R.getEnd() sle R.getBegin(), then R denotes the empty range.
  146. class Range {
  147. const SCEV *Begin;
  148. const SCEV *End;
  149. public:
  150. Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) {
  151. assert(Begin->getType() == End->getType() && "ill-typed range!");
  152. }
  153. Type *getType() const { return Begin->getType(); }
  154. const SCEV *getBegin() const { return Begin; }
  155. const SCEV *getEnd() const { return End; }
  156. };
  157. typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy;
  158. /// This is the value the condition of the branch needs to evaluate to for the
  159. /// branch to take the hot successor (see (1) above).
  160. bool getPassingDirection() { return true; }
  161. /// Computes a range for the induction variable (IndVar) in which the range
  162. /// check is redundant and can be constant-folded away. The induction
  163. /// variable is not required to be the canonical {0,+,1} induction variable.
  164. Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
  165. const SCEVAddRecExpr *IndVar,
  166. IRBuilder<> &B) const;
  167. /// Create an inductive range check out of BI if possible, else return
  168. /// nullptr.
  169. static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI,
  170. Loop *L, ScalarEvolution &SE,
  171. BranchProbabilityInfo &BPI);
  172. };
  173. class InductiveRangeCheckElimination : public LoopPass {
  174. InductiveRangeCheck::AllocatorTy Allocator;
  175. public:
  176. static char ID;
  177. InductiveRangeCheckElimination() : LoopPass(ID) {
  178. initializeInductiveRangeCheckEliminationPass(
  179. *PassRegistry::getPassRegistry());
  180. }
  181. void getAnalysisUsage(AnalysisUsage &AU) const override {
  182. AU.addRequired<LoopInfoWrapperPass>();
  183. AU.addRequiredID(LoopSimplifyID);
  184. AU.addRequiredID(LCSSAID);
  185. AU.addRequired<ScalarEvolution>();
  186. AU.addRequired<BranchProbabilityInfo>();
  187. }
  188. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  189. };
  190. char InductiveRangeCheckElimination::ID = 0;
  191. }
  192. INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
  193. "Inductive range check elimination", false, false)
  194. const char *InductiveRangeCheck::rangeCheckKindToStr(
  195. InductiveRangeCheck::RangeCheckKind RCK) {
  196. switch (RCK) {
  197. case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
  198. return "RANGE_CHECK_UNKNOWN";
  199. case InductiveRangeCheck::RANGE_CHECK_UPPER:
  200. return "RANGE_CHECK_UPPER";
  201. case InductiveRangeCheck::RANGE_CHECK_LOWER:
  202. return "RANGE_CHECK_LOWER";
  203. case InductiveRangeCheck::RANGE_CHECK_BOTH:
  204. return "RANGE_CHECK_BOTH";
  205. }
  206. llvm_unreachable("unknown range check type!");
  207. }
  208. /// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI`
  209. /// cannot
  210. /// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
  211. /// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value
  212. /// being
  213. /// range checked, and set `Length` to the upper limit `Index` is being range
  214. /// checked with if (and only if) the range check type is stronger or equal to
  215. /// RANGE_CHECK_UPPER.
  216. ///
  217. InductiveRangeCheck::RangeCheckKind
  218. InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
  219. ScalarEvolution &SE, Value *&Index,
  220. Value *&Length) {
  221. auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) {
  222. const SCEV *S = SE.getSCEV(V);
  223. if (isa<SCEVCouldNotCompute>(S))
  224. return false;
  225. return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant &&
  226. SE.isKnownNonNegative(S);
  227. };
  228. using namespace llvm::PatternMatch;
  229. ICmpInst::Predicate Pred = ICI->getPredicate();
  230. Value *LHS = ICI->getOperand(0);
  231. Value *RHS = ICI->getOperand(1);
  232. switch (Pred) {
  233. default:
  234. return RANGE_CHECK_UNKNOWN;
  235. case ICmpInst::ICMP_SLE:
  236. std::swap(LHS, RHS);
  237. // fallthrough
  238. case ICmpInst::ICMP_SGE:
  239. if (match(RHS, m_ConstantInt<0>())) {
  240. Index = LHS;
  241. return RANGE_CHECK_LOWER;
  242. }
  243. return RANGE_CHECK_UNKNOWN;
  244. case ICmpInst::ICMP_SLT:
  245. std::swap(LHS, RHS);
  246. // fallthrough
  247. case ICmpInst::ICMP_SGT:
  248. if (match(RHS, m_ConstantInt<-1>())) {
  249. Index = LHS;
  250. return RANGE_CHECK_LOWER;
  251. }
  252. if (IsNonNegativeAndNotLoopVarying(LHS)) {
  253. Index = RHS;
  254. Length = LHS;
  255. return RANGE_CHECK_UPPER;
  256. }
  257. return RANGE_CHECK_UNKNOWN;
  258. case ICmpInst::ICMP_ULT:
  259. std::swap(LHS, RHS);
  260. // fallthrough
  261. case ICmpInst::ICMP_UGT:
  262. if (IsNonNegativeAndNotLoopVarying(LHS)) {
  263. Index = RHS;
  264. Length = LHS;
  265. return RANGE_CHECK_BOTH;
  266. }
  267. return RANGE_CHECK_UNKNOWN;
  268. }
  269. llvm_unreachable("default clause returns!");
  270. }
  271. /// Parses an arbitrary condition into a range check. `Length` is set only if
  272. /// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger.
  273. InductiveRangeCheck::RangeCheckKind
  274. InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE,
  275. Value *Condition, const SCEV *&Index,
  276. Value *&Length) {
  277. using namespace llvm::PatternMatch;
  278. Value *A = nullptr;
  279. Value *B = nullptr;
  280. if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
  281. Value *IndexA = nullptr, *IndexB = nullptr;
  282. Value *LengthA = nullptr, *LengthB = nullptr;
  283. ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B);
  284. if (!ICmpA || !ICmpB)
  285. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  286. auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA);
  287. auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB);
  288. if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN ||
  289. RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
  290. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  291. if (IndexA != IndexB)
  292. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  293. if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB)
  294. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  295. Index = SE.getSCEV(IndexA);
  296. if (isa<SCEVCouldNotCompute>(Index))
  297. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  298. Length = LengthA == nullptr ? LengthB : LengthA;
  299. return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB);
  300. }
  301. if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
  302. Value *IndexVal = nullptr;
  303. auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length);
  304. if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
  305. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  306. Index = SE.getSCEV(IndexVal);
  307. if (isa<SCEVCouldNotCompute>(Index))
  308. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  309. return RCKind;
  310. }
  311. return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
  312. }
  313. InductiveRangeCheck *
  314. InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
  315. Loop *L, ScalarEvolution &SE,
  316. BranchProbabilityInfo &BPI) {
  317. if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
  318. return nullptr;
  319. BranchProbability LikelyTaken(15, 16);
  320. if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken)
  321. return nullptr;
  322. Value *Length = nullptr;
  323. const SCEV *IndexSCEV = nullptr;
  324. auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(),
  325. IndexSCEV, Length);
  326. if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
  327. return nullptr;
  328. assert(IndexSCEV && "contract with SplitRangeCheckCondition!");
  329. assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) &&
  330. "contract with SplitRangeCheckCondition!");
  331. const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV);
  332. bool IsAffineIndex =
  333. IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
  334. if (!IsAffineIndex)
  335. return nullptr;
  336. InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck;
  337. IRC->Length = Length;
  338. IRC->Offset = IndexAddRec->getStart();
  339. IRC->Scale = IndexAddRec->getStepRecurrence(SE);
  340. IRC->Branch = BI;
  341. IRC->Kind = RCKind;
  342. return IRC;
  343. }
  344. namespace {
  345. // Keeps track of the structure of a loop. This is similar to llvm::Loop,
  346. // except that it is more lightweight and can track the state of a loop through
  347. // changing and potentially invalid IR. This structure also formalizes the
  348. // kinds of loops we can deal with -- ones that have a single latch that is also
  349. // an exiting block *and* have a canonical induction variable.
  350. struct LoopStructure {
  351. const char *Tag;
  352. BasicBlock *Header;
  353. BasicBlock *Latch;
  354. // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
  355. // successor is `LatchExit', the exit block of the loop.
  356. BranchInst *LatchBr;
  357. BasicBlock *LatchExit;
  358. unsigned LatchBrExitIdx;
  359. Value *IndVarNext;
  360. Value *IndVarStart;
  361. Value *LoopExitAt;
  362. bool IndVarIncreasing;
  363. LoopStructure()
  364. : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
  365. LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
  366. IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {}
  367. template <typename M> LoopStructure map(M Map) const {
  368. LoopStructure Result;
  369. Result.Tag = Tag;
  370. Result.Header = cast<BasicBlock>(Map(Header));
  371. Result.Latch = cast<BasicBlock>(Map(Latch));
  372. Result.LatchBr = cast<BranchInst>(Map(LatchBr));
  373. Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
  374. Result.LatchBrExitIdx = LatchBrExitIdx;
  375. Result.IndVarNext = Map(IndVarNext);
  376. Result.IndVarStart = Map(IndVarStart);
  377. Result.LoopExitAt = Map(LoopExitAt);
  378. Result.IndVarIncreasing = IndVarIncreasing;
  379. return Result;
  380. }
  381. static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &,
  382. BranchProbabilityInfo &BPI,
  383. Loop &,
  384. const char *&);
  385. };
  386. /// This class is used to constrain loops to run within a given iteration space.
  387. /// The algorithm this class implements is given a Loop and a range [Begin,
  388. /// End). The algorithm then tries to break out a "main loop" out of the loop
  389. /// it is given in a way that the "main loop" runs with the induction variable
  390. /// in a subset of [Begin, End). The algorithm emits appropriate pre and post
  391. /// loops to run any remaining iterations. The pre loop runs any iterations in
  392. /// which the induction variable is < Begin, and the post loop runs any
  393. /// iterations in which the induction variable is >= End.
  394. ///
  395. class LoopConstrainer {
  396. // The representation of a clone of the original loop we started out with.
  397. struct ClonedLoop {
  398. // The cloned blocks
  399. std::vector<BasicBlock *> Blocks;
  400. // `Map` maps values in the clonee into values in the cloned version
  401. ValueToValueMapTy Map;
  402. // An instance of `LoopStructure` for the cloned loop
  403. LoopStructure Structure;
  404. };
  405. // Result of rewriting the range of a loop. See changeIterationSpaceEnd for
  406. // more details on what these fields mean.
  407. struct RewrittenRangeInfo {
  408. BasicBlock *PseudoExit;
  409. BasicBlock *ExitSelector;
  410. std::vector<PHINode *> PHIValuesAtPseudoExit;
  411. PHINode *IndVarEnd;
  412. RewrittenRangeInfo()
  413. : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {}
  414. };
  415. // Calculated subranges we restrict the iteration space of the main loop to.
  416. // See the implementation of `calculateSubRanges' for more details on how
  417. // these fields are computed. `LowLimit` is None if there is no restriction
  418. // on low end of the restricted iteration space of the main loop. `HighLimit`
  419. // is None if there is no restriction on high end of the restricted iteration
  420. // space of the main loop.
  421. struct SubRanges {
  422. Optional<const SCEV *> LowLimit;
  423. Optional<const SCEV *> HighLimit;
  424. };
  425. // A utility function that does a `replaceUsesOfWith' on the incoming block
  426. // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
  427. // incoming block list with `ReplaceBy'.
  428. static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
  429. BasicBlock *ReplaceBy);
  430. // Compute a safe set of limits for the main loop to run in -- effectively the
  431. // intersection of `Range' and the iteration space of the original loop.
  432. // Return None if unable to compute the set of subranges.
  433. //
  434. Optional<SubRanges> calculateSubRanges() const;
  435. // Clone `OriginalLoop' and return the result in CLResult. The IR after
  436. // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
  437. // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
  438. // but there is no such edge.
  439. //
  440. void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
  441. // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
  442. // iteration space of the rewritten loop ends at ExitLoopAt. The start of the
  443. // iteration space is not changed. `ExitLoopAt' is assumed to be slt
  444. // `OriginalHeaderCount'.
  445. //
  446. // If there are iterations left to execute, control is made to jump to
  447. // `ContinuationBlock', otherwise they take the normal loop exit. The
  448. // returned `RewrittenRangeInfo' object is populated as follows:
  449. //
  450. // .PseudoExit is a basic block that unconditionally branches to
  451. // `ContinuationBlock'.
  452. //
  453. // .ExitSelector is a basic block that decides, on exit from the loop,
  454. // whether to branch to the "true" exit or to `PseudoExit'.
  455. //
  456. // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
  457. // for each PHINode in the loop header on taking the pseudo exit.
  458. //
  459. // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
  460. // preheader because it is made to branch to the loop header only
  461. // conditionally.
  462. //
  463. RewrittenRangeInfo
  464. changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader,
  465. Value *ExitLoopAt,
  466. BasicBlock *ContinuationBlock) const;
  467. // The loop denoted by `LS' has `OldPreheader' as its preheader. This
  468. // function creates a new preheader for `LS' and returns it.
  469. //
  470. BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
  471. const char *Tag) const;
  472. // `ContinuationBlockAndPreheader' was the continuation block for some call to
  473. // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
  474. // This function rewrites the PHI nodes in `LS.Header' to start with the
  475. // correct value.
  476. void rewriteIncomingValuesForPHIs(
  477. LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
  478. const LoopConstrainer::RewrittenRangeInfo &RRI) const;
  479. // Even though we do not preserve any passes at this time, we at least need to
  480. // keep the parent loop structure consistent. The `LPPassManager' seems to
  481. // verify this after running a loop pass. This function adds the list of
  482. // blocks denoted by BBs to this loops parent loop if required.
  483. void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs);
  484. // Some global state.
  485. Function &F;
  486. LLVMContext &Ctx;
  487. ScalarEvolution &SE;
  488. // Information about the original loop we started out with.
  489. Loop &OriginalLoop;
  490. LoopInfo &OriginalLoopInfo;
  491. const SCEV *LatchTakenCount;
  492. BasicBlock *OriginalPreheader;
  493. // The preheader of the main loop. This may or may not be different from
  494. // `OriginalPreheader'.
  495. BasicBlock *MainLoopPreheader;
  496. // The range we need to run the main loop in.
  497. InductiveRangeCheck::Range Range;
  498. // The structure of the main loop (see comment at the beginning of this class
  499. // for a definition)
  500. LoopStructure MainLoopStructure;
  501. public:
  502. LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
  503. ScalarEvolution &SE, InductiveRangeCheck::Range R)
  504. : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
  505. SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
  506. OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
  507. MainLoopStructure(LS) {}
  508. // Entry point for the algorithm. Returns true on success.
  509. bool run();
  510. };
  511. }
  512. void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
  513. BasicBlock *ReplaceBy) {
  514. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
  515. if (PN->getIncomingBlock(i) == Block)
  516. PN->setIncomingBlock(i, ReplaceBy);
  517. }
  518. static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) {
  519. APInt SMax =
  520. APInt::getSignedMaxValue(cast<IntegerType>(S->getType())->getBitWidth());
  521. return SE.getSignedRange(S).contains(SMax) &&
  522. SE.getUnsignedRange(S).contains(SMax);
  523. }
  524. static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
  525. APInt SMin =
  526. APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth());
  527. return SE.getSignedRange(S).contains(SMin) &&
  528. SE.getUnsignedRange(S).contains(SMin);
  529. }
  530. Optional<LoopStructure>
  531. LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI,
  532. Loop &L, const char *&FailureReason) {
  533. assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
  534. BasicBlock *Latch = L.getLoopLatch();
  535. if (!L.isLoopExiting(Latch)) {
  536. FailureReason = "no loop latch";
  537. return None;
  538. }
  539. BasicBlock *Header = L.getHeader();
  540. BasicBlock *Preheader = L.getLoopPreheader();
  541. if (!Preheader) {
  542. FailureReason = "no preheader";
  543. return None;
  544. }
  545. BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
  546. if (!LatchBr || LatchBr->isUnconditional()) {
  547. FailureReason = "latch terminator not conditional branch";
  548. return None;
  549. }
  550. unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
  551. BranchProbability ExitProbability =
  552. BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx);
  553. if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
  554. FailureReason = "short running loop, not profitable";
  555. return None;
  556. }
  557. ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
  558. if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
  559. FailureReason = "latch terminator branch not conditional on integral icmp";
  560. return None;
  561. }
  562. const SCEV *LatchCount = SE.getExitCount(&L, Latch);
  563. if (isa<SCEVCouldNotCompute>(LatchCount)) {
  564. FailureReason = "could not compute latch count";
  565. return None;
  566. }
  567. ICmpInst::Predicate Pred = ICI->getPredicate();
  568. Value *LeftValue = ICI->getOperand(0);
  569. const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
  570. IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
  571. Value *RightValue = ICI->getOperand(1);
  572. const SCEV *RightSCEV = SE.getSCEV(RightValue);
  573. // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
  574. if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
  575. if (isa<SCEVAddRecExpr>(RightSCEV)) {
  576. std::swap(LeftSCEV, RightSCEV);
  577. std::swap(LeftValue, RightValue);
  578. Pred = ICmpInst::getSwappedPredicate(Pred);
  579. } else {
  580. FailureReason = "no add recurrences in the icmp";
  581. return None;
  582. }
  583. }
  584. auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
  585. if (AR->getNoWrapFlags(SCEV::FlagNSW))
  586. return true;
  587. IntegerType *Ty = cast<IntegerType>(AR->getType());
  588. IntegerType *WideTy =
  589. IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
  590. const SCEVAddRecExpr *ExtendAfterOp =
  591. dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
  592. if (ExtendAfterOp) {
  593. const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
  594. const SCEV *ExtendedStep =
  595. SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
  596. bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
  597. ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
  598. if (NoSignedWrap)
  599. return true;
  600. }
  601. // We may have proved this when computing the sign extension above.
  602. return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
  603. };
  604. auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
  605. if (!AR->isAffine())
  606. return false;
  607. // Currently we only work with induction variables that have been proved to
  608. // not wrap. This restriction can potentially be lifted in the future.
  609. if (!HasNoSignedWrap(AR))
  610. return false;
  611. if (const SCEVConstant *StepExpr =
  612. dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
  613. ConstantInt *StepCI = StepExpr->getValue();
  614. if (StepCI->isOne() || StepCI->isMinusOne()) {
  615. IsIncreasing = StepCI->isOne();
  616. return true;
  617. }
  618. }
  619. return false;
  620. };
  621. // `ICI` is interpreted as taking the backedge if the *next* value of the
  622. // induction variable satisfies some constraint.
  623. const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
  624. bool IsIncreasing = false;
  625. if (!IsInductionVar(IndVarNext, IsIncreasing)) {
  626. FailureReason = "LHS in icmp not induction variable";
  627. return None;
  628. }
  629. ConstantInt *One = ConstantInt::get(IndVarTy, 1);
  630. // TODO: generalize the predicates here to also match their unsigned variants.
  631. if (IsIncreasing) {
  632. bool FoundExpectedPred =
  633. (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
  634. (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
  635. if (!FoundExpectedPred) {
  636. FailureReason = "expected icmp slt semantically, found something else";
  637. return None;
  638. }
  639. if (LatchBrExitIdx == 0) {
  640. if (CanBeSMax(SE, RightSCEV)) {
  641. // TODO: this restriction is easily removable -- we just have to
  642. // remember that the icmp was an slt and not an sle.
  643. FailureReason = "limit may overflow when coercing sle to slt";
  644. return None;
  645. }
  646. IRBuilder<> B(&*Preheader->rbegin());
  647. RightValue = B.CreateAdd(RightValue, One);
  648. }
  649. } else {
  650. bool FoundExpectedPred =
  651. (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
  652. (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
  653. if (!FoundExpectedPred) {
  654. FailureReason = "expected icmp sgt semantically, found something else";
  655. return None;
  656. }
  657. if (LatchBrExitIdx == 0) {
  658. if (CanBeSMin(SE, RightSCEV)) {
  659. // TODO: this restriction is easily removable -- we just have to
  660. // remember that the icmp was an sgt and not an sge.
  661. FailureReason = "limit may overflow when coercing sge to sgt";
  662. return None;
  663. }
  664. IRBuilder<> B(&*Preheader->rbegin());
  665. RightValue = B.CreateSub(RightValue, One);
  666. }
  667. }
  668. const SCEV *StartNext = IndVarNext->getStart();
  669. const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
  670. const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
  671. BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
  672. assert(SE.getLoopDisposition(LatchCount, &L) ==
  673. ScalarEvolution::LoopInvariant &&
  674. "loop variant exit count doesn't make sense!");
  675. assert(!L.contains(LatchExit) && "expected an exit block!");
  676. const DataLayout &DL = Preheader->getModule()->getDataLayout();
  677. Value *IndVarStartV =
  678. SCEVExpander(SE, DL, "irce")
  679. .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin());
  680. IndVarStartV->setName("indvar.start");
  681. LoopStructure Result;
  682. Result.Tag = "main";
  683. Result.Header = Header;
  684. Result.Latch = Latch;
  685. Result.LatchBr = LatchBr;
  686. Result.LatchExit = LatchExit;
  687. Result.LatchBrExitIdx = LatchBrExitIdx;
  688. Result.IndVarStart = IndVarStartV;
  689. Result.IndVarNext = LeftValue;
  690. Result.IndVarIncreasing = IsIncreasing;
  691. Result.LoopExitAt = RightValue;
  692. FailureReason = nullptr;
  693. return Result;
  694. }
  695. Optional<LoopConstrainer::SubRanges>
  696. LoopConstrainer::calculateSubRanges() const {
  697. IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
  698. if (Range.getType() != Ty)
  699. return None;
  700. LoopConstrainer::SubRanges Result;
  701. // I think we can be more aggressive here and make this nuw / nsw if the
  702. // addition that feeds into the icmp for the latch's terminating branch is nuw
  703. // / nsw. In any case, a wrapping 2's complement addition is safe.
  704. ConstantInt *One = ConstantInt::get(Ty, 1);
  705. const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
  706. const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
  707. bool Increasing = MainLoopStructure.IndVarIncreasing;
  708. // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
  709. // range of values the induction variable takes.
  710. const SCEV *Smallest = nullptr, *Greatest = nullptr;
  711. if (Increasing) {
  712. Smallest = Start;
  713. Greatest = End;
  714. } else {
  715. // These two computations may sign-overflow. Here is why that is okay:
  716. //
  717. // We know that the induction variable does not sign-overflow on any
  718. // iteration except the last one, and it starts at `Start` and ends at
  719. // `End`, decrementing by one every time.
  720. //
  721. // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
  722. // induction variable is decreasing we know that that the smallest value
  723. // the loop body is actually executed with is `INT_SMIN` == `Smallest`.
  724. //
  725. // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
  726. // that case, `Clamp` will always return `Smallest` and
  727. // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
  728. // will be an empty range. Returning an empty range is always safe.
  729. //
  730. Smallest = SE.getAddExpr(End, SE.getSCEV(One));
  731. Greatest = SE.getAddExpr(Start, SE.getSCEV(One));
  732. }
  733. auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
  734. return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S));
  735. };
  736. // In some cases we can prove that we don't need a pre or post loop
  737. bool ProvablyNoPreloop =
  738. SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest);
  739. if (!ProvablyNoPreloop)
  740. Result.LowLimit = Clamp(Range.getBegin());
  741. bool ProvablyNoPostLoop =
  742. SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
  743. if (!ProvablyNoPostLoop)
  744. Result.HighLimit = Clamp(Range.getEnd());
  745. return Result;
  746. }
  747. void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
  748. const char *Tag) const {
  749. for (BasicBlock *BB : OriginalLoop.getBlocks()) {
  750. BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
  751. Result.Blocks.push_back(Clone);
  752. Result.Map[BB] = Clone;
  753. }
  754. auto GetClonedValue = [&Result](Value *V) {
  755. assert(V && "null values not in domain!");
  756. auto It = Result.Map.find(V);
  757. if (It == Result.Map.end())
  758. return V;
  759. return static_cast<Value *>(It->second);
  760. };
  761. Result.Structure = MainLoopStructure.map(GetClonedValue);
  762. Result.Structure.Tag = Tag;
  763. for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
  764. BasicBlock *ClonedBB = Result.Blocks[i];
  765. BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
  766. assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
  767. for (Instruction &I : *ClonedBB)
  768. RemapInstruction(&I, Result.Map,
  769. RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
  770. // Exit blocks will now have one more predecessor and their PHI nodes need
  771. // to be edited to reflect that. No phi nodes need to be introduced because
  772. // the loop is in LCSSA.
  773. for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB);
  774. SBBI != SBBE; ++SBBI) {
  775. if (OriginalLoop.contains(*SBBI))
  776. continue; // not an exit block
  777. for (Instruction &I : **SBBI) {
  778. if (!isa<PHINode>(&I))
  779. break;
  780. PHINode *PN = cast<PHINode>(&I);
  781. Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
  782. PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
  783. }
  784. }
  785. }
  786. }
  787. LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
  788. const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
  789. BasicBlock *ContinuationBlock) const {
  790. // We start with a loop with a single latch:
  791. //
  792. // +--------------------+
  793. // | |
  794. // | preheader |
  795. // | |
  796. // +--------+-----------+
  797. // | ----------------\
  798. // | / |
  799. // +--------v----v------+ |
  800. // | | |
  801. // | header | |
  802. // | | |
  803. // +--------------------+ |
  804. // |
  805. // ..... |
  806. // |
  807. // +--------------------+ |
  808. // | | |
  809. // | latch >----------/
  810. // | |
  811. // +-------v------------+
  812. // |
  813. // |
  814. // | +--------------------+
  815. // | | |
  816. // +---> original exit |
  817. // | |
  818. // +--------------------+
  819. //
  820. // We change the control flow to look like
  821. //
  822. //
  823. // +--------------------+
  824. // | |
  825. // | preheader >-------------------------+
  826. // | | |
  827. // +--------v-----------+ |
  828. // | /-------------+ |
  829. // | / | |
  830. // +--------v--v--------+ | |
  831. // | | | |
  832. // | header | | +--------+ |
  833. // | | | | | |
  834. // +--------------------+ | | +-----v-----v-----------+
  835. // | | | |
  836. // | | | .pseudo.exit |
  837. // | | | |
  838. // | | +-----------v-----------+
  839. // | | |
  840. // ..... | | |
  841. // | | +--------v-------------+
  842. // +--------------------+ | | | |
  843. // | | | | | ContinuationBlock |
  844. // | latch >------+ | | |
  845. // | | | +----------------------+
  846. // +---------v----------+ |
  847. // | |
  848. // | |
  849. // | +---------------^-----+
  850. // | | |
  851. // +-----> .exit.selector |
  852. // | |
  853. // +----------v----------+
  854. // |
  855. // +--------------------+ |
  856. // | | |
  857. // | original exit <----+
  858. // | |
  859. // +--------------------+
  860. //
  861. RewrittenRangeInfo RRI;
  862. auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
  863. RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
  864. &F, BBInsertLocation);
  865. RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
  866. BBInsertLocation);
  867. BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
  868. bool Increasing = LS.IndVarIncreasing;
  869. IRBuilder<> B(PreheaderJump);
  870. // EnterLoopCond - is it okay to start executing this `LS'?
  871. Value *EnterLoopCond = Increasing
  872. ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
  873. : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt);
  874. B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
  875. PreheaderJump->eraseFromParent();
  876. LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
  877. B.SetInsertPoint(LS.LatchBr);
  878. Value *TakeBackedgeLoopCond =
  879. Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt)
  880. : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt);
  881. Value *CondForBranch = LS.LatchBrExitIdx == 1
  882. ? TakeBackedgeLoopCond
  883. : B.CreateNot(TakeBackedgeLoopCond);
  884. LS.LatchBr->setCondition(CondForBranch);
  885. B.SetInsertPoint(RRI.ExitSelector);
  886. // IterationsLeft - are there any more iterations left, given the original
  887. // upper bound on the induction variable? If not, we branch to the "real"
  888. // exit.
  889. Value *IterationsLeft = Increasing
  890. ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt)
  891. : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt);
  892. B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
  893. BranchInst *BranchToContinuation =
  894. BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
  895. // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
  896. // each of the PHI nodes in the loop header. This feeds into the initial
  897. // value of the same PHI nodes if/when we continue execution.
  898. for (Instruction &I : *LS.Header) {
  899. if (!isa<PHINode>(&I))
  900. break;
  901. PHINode *PN = cast<PHINode>(&I);
  902. PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
  903. BranchToContinuation);
  904. NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
  905. NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
  906. RRI.ExitSelector);
  907. RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
  908. }
  909. RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end",
  910. BranchToContinuation);
  911. RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
  912. RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector);
  913. // The latch exit now has a branch from `RRI.ExitSelector' instead of
  914. // `LS.Latch'. The PHI nodes need to be updated to reflect that.
  915. for (Instruction &I : *LS.LatchExit) {
  916. if (PHINode *PN = dyn_cast<PHINode>(&I))
  917. replacePHIBlock(PN, LS.Latch, RRI.ExitSelector);
  918. else
  919. break;
  920. }
  921. return RRI;
  922. }
  923. void LoopConstrainer::rewriteIncomingValuesForPHIs(
  924. LoopStructure &LS, BasicBlock *ContinuationBlock,
  925. const LoopConstrainer::RewrittenRangeInfo &RRI) const {
  926. unsigned PHIIndex = 0;
  927. for (Instruction &I : *LS.Header) {
  928. if (!isa<PHINode>(&I))
  929. break;
  930. PHINode *PN = cast<PHINode>(&I);
  931. for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
  932. if (PN->getIncomingBlock(i) == ContinuationBlock)
  933. PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
  934. }
  935. LS.IndVarStart = RRI.IndVarEnd;
  936. }
  937. BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
  938. BasicBlock *OldPreheader,
  939. const char *Tag) const {
  940. BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
  941. BranchInst::Create(LS.Header, Preheader);
  942. for (Instruction &I : *LS.Header) {
  943. if (!isa<PHINode>(&I))
  944. break;
  945. PHINode *PN = cast<PHINode>(&I);
  946. for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
  947. replacePHIBlock(PN, OldPreheader, Preheader);
  948. }
  949. return Preheader;
  950. }
  951. void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
  952. Loop *ParentLoop = OriginalLoop.getParentLoop();
  953. if (!ParentLoop)
  954. return;
  955. for (BasicBlock *BB : BBs)
  956. ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo);
  957. }
  958. bool LoopConstrainer::run() {
  959. BasicBlock *Preheader = nullptr;
  960. LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
  961. Preheader = OriginalLoop.getLoopPreheader();
  962. assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
  963. "preconditions!");
  964. OriginalPreheader = Preheader;
  965. MainLoopPreheader = Preheader;
  966. Optional<SubRanges> MaybeSR = calculateSubRanges();
  967. if (!MaybeSR.hasValue()) {
  968. DEBUG(dbgs() << "irce: could not compute subranges\n");
  969. return false;
  970. }
  971. SubRanges SR = MaybeSR.getValue();
  972. bool Increasing = MainLoopStructure.IndVarIncreasing;
  973. IntegerType *IVTy =
  974. cast<IntegerType>(MainLoopStructure.IndVarNext->getType());
  975. SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
  976. Instruction *InsertPt = OriginalPreheader->getTerminator();
  977. // It would have been better to make `PreLoop' and `PostLoop'
  978. // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
  979. // constructor.
  980. ClonedLoop PreLoop, PostLoop;
  981. bool NeedsPreLoop =
  982. Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
  983. bool NeedsPostLoop =
  984. Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
  985. Value *ExitPreLoopAt = nullptr;
  986. Value *ExitMainLoopAt = nullptr;
  987. const SCEVConstant *MinusOneS =
  988. cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
  989. if (NeedsPreLoop) {
  990. const SCEV *ExitPreLoopAtSCEV = nullptr;
  991. if (Increasing)
  992. ExitPreLoopAtSCEV = *SR.LowLimit;
  993. else {
  994. if (CanBeSMin(SE, *SR.HighLimit)) {
  995. DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
  996. << "preloop exit limit. HighLimit = " << *(*SR.HighLimit)
  997. << "\n");
  998. return false;
  999. }
  1000. ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
  1001. }
  1002. ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
  1003. ExitPreLoopAt->setName("exit.preloop.at");
  1004. }
  1005. if (NeedsPostLoop) {
  1006. const SCEV *ExitMainLoopAtSCEV = nullptr;
  1007. if (Increasing)
  1008. ExitMainLoopAtSCEV = *SR.HighLimit;
  1009. else {
  1010. if (CanBeSMin(SE, *SR.LowLimit)) {
  1011. DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
  1012. << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit)
  1013. << "\n");
  1014. return false;
  1015. }
  1016. ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
  1017. }
  1018. ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
  1019. ExitMainLoopAt->setName("exit.mainloop.at");
  1020. }
  1021. // We clone these ahead of time so that we don't have to deal with changing
  1022. // and temporarily invalid IR as we transform the loops.
  1023. if (NeedsPreLoop)
  1024. cloneLoop(PreLoop, "preloop");
  1025. if (NeedsPostLoop)
  1026. cloneLoop(PostLoop, "postloop");
  1027. RewrittenRangeInfo PreLoopRRI;
  1028. if (NeedsPreLoop) {
  1029. Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
  1030. PreLoop.Structure.Header);
  1031. MainLoopPreheader =
  1032. createPreheader(MainLoopStructure, Preheader, "mainloop");
  1033. PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
  1034. ExitPreLoopAt, MainLoopPreheader);
  1035. rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
  1036. PreLoopRRI);
  1037. }
  1038. BasicBlock *PostLoopPreheader = nullptr;
  1039. RewrittenRangeInfo PostLoopRRI;
  1040. if (NeedsPostLoop) {
  1041. PostLoopPreheader =
  1042. createPreheader(PostLoop.Structure, Preheader, "postloop");
  1043. PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
  1044. ExitMainLoopAt, PostLoopPreheader);
  1045. rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
  1046. PostLoopRRI);
  1047. }
  1048. BasicBlock *NewMainLoopPreheader =
  1049. MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
  1050. BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
  1051. PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
  1052. PostLoopRRI.ExitSelector, NewMainLoopPreheader};
  1053. // Some of the above may be nullptr, filter them out before passing to
  1054. // addToParentLoopIfNeeded.
  1055. auto NewBlocksEnd =
  1056. std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
  1057. addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd));
  1058. addToParentLoopIfNeeded(PreLoop.Blocks);
  1059. addToParentLoopIfNeeded(PostLoop.Blocks);
  1060. return true;
  1061. }
  1062. /// Computes and returns a range of values for the induction variable (IndVar)
  1063. /// in which the range check can be safely elided. If it cannot compute such a
  1064. /// range, returns None.
  1065. Optional<InductiveRangeCheck::Range>
  1066. InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
  1067. const SCEVAddRecExpr *IndVar,
  1068. IRBuilder<> &) const {
  1069. // IndVar is of the form "A + B * I" (where "I" is the canonical induction
  1070. // variable, that may or may not exist as a real llvm::Value in the loop) and
  1071. // this inductive range check is a range check on the "C + D * I" ("C" is
  1072. // getOffset() and "D" is getScale()). We rewrite the value being range
  1073. // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
  1074. // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code
  1075. // can be generalized as needed.
  1076. //
  1077. // The actual inequalities we solve are of the form
  1078. //
  1079. // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1)
  1080. //
  1081. // The inequality is satisfied by -M <= IndVar < (L - M) [^1]. All additions
  1082. // and subtractions are twos-complement wrapping and comparisons are signed.
  1083. //
  1084. // Proof:
  1085. //
  1086. // If there exists IndVar such that -M <= IndVar < (L - M) then it follows
  1087. // that -M <= (-M + L) [== Eq. 1]. Since L >= 0, if (-M + L) sign-overflows
  1088. // then (-M + L) < (-M). Hence by [Eq. 1], (-M + L) could not have
  1089. // overflown.
  1090. //
  1091. // This means IndVar = t + (-M) for t in [0, L). Hence (IndVar + M) = t.
  1092. // Hence 0 <= (IndVar + M) < L
  1093. // [^1]: Note that the solution does _not_ apply if L < 0; consider values M =
  1094. // 127, IndVar = 126 and L = -2 in an i8 world.
  1095. if (!IndVar->isAffine())
  1096. return None;
  1097. const SCEV *A = IndVar->getStart();
  1098. const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
  1099. if (!B)
  1100. return None;
  1101. const SCEV *C = getOffset();
  1102. const SCEVConstant *D = dyn_cast<SCEVConstant>(getScale());
  1103. if (D != B)
  1104. return None;
  1105. ConstantInt *ConstD = D->getValue();
  1106. if (!(ConstD->isMinusOne() || ConstD->isOne()))
  1107. return None;
  1108. const SCEV *M = SE.getMinusSCEV(C, A);
  1109. const SCEV *Begin = SE.getNegativeSCEV(M);
  1110. const SCEV *UpperLimit = nullptr;
  1111. // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
  1112. // We can potentially do much better here.
  1113. if (Value *V = getLength()) {
  1114. UpperLimit = SE.getSCEV(V);
  1115. } else {
  1116. assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
  1117. unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
  1118. UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
  1119. }
  1120. const SCEV *End = SE.getMinusSCEV(UpperLimit, M);
  1121. return InductiveRangeCheck::Range(Begin, End);
  1122. }
  1123. static Optional<InductiveRangeCheck::Range>
  1124. IntersectRange(ScalarEvolution &SE,
  1125. const Optional<InductiveRangeCheck::Range> &R1,
  1126. const InductiveRangeCheck::Range &R2, IRBuilder<> &B) {
  1127. if (!R1.hasValue())
  1128. return R2;
  1129. auto &R1Value = R1.getValue();
  1130. // TODO: we could widen the smaller range and have this work; but for now we
  1131. // bail out to keep things simple.
  1132. if (R1Value.getType() != R2.getType())
  1133. return None;
  1134. const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin());
  1135. const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd());
  1136. return InductiveRangeCheck::Range(NewBegin, NewEnd);
  1137. }
  1138. bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
  1139. if (L->getBlocks().size() >= LoopSizeCutoff) {
  1140. DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";);
  1141. return false;
  1142. }
  1143. BasicBlock *Preheader = L->getLoopPreheader();
  1144. if (!Preheader) {
  1145. DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
  1146. return false;
  1147. }
  1148. LLVMContext &Context = Preheader->getContext();
  1149. InductiveRangeCheck::AllocatorTy IRCAlloc;
  1150. SmallVector<InductiveRangeCheck *, 16> RangeChecks;
  1151. ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
  1152. BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
  1153. for (auto BBI : L->getBlocks())
  1154. if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
  1155. if (InductiveRangeCheck *IRC =
  1156. InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI))
  1157. RangeChecks.push_back(IRC);
  1158. if (RangeChecks.empty())
  1159. return false;
  1160. auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
  1161. OS << "irce: looking at loop "; L->print(OS);
  1162. OS << "irce: loop has " << RangeChecks.size()
  1163. << " inductive range checks: \n";
  1164. for (InductiveRangeCheck *IRC : RangeChecks)
  1165. IRC->print(OS);
  1166. };
  1167. DEBUG(PrintRecognizedRangeChecks(dbgs()));
  1168. if (PrintRangeChecks)
  1169. PrintRecognizedRangeChecks(errs());
  1170. const char *FailureReason = nullptr;
  1171. Optional<LoopStructure> MaybeLoopStructure =
  1172. LoopStructure::parseLoopStructure(SE, BPI, *L, FailureReason);
  1173. if (!MaybeLoopStructure.hasValue()) {
  1174. DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason
  1175. << "\n";);
  1176. return false;
  1177. }
  1178. LoopStructure LS = MaybeLoopStructure.getValue();
  1179. bool Increasing = LS.IndVarIncreasing;
  1180. const SCEV *MinusOne =
  1181. SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
  1182. const SCEVAddRecExpr *IndVar =
  1183. cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
  1184. Optional<InductiveRangeCheck::Range> SafeIterRange;
  1185. Instruction *ExprInsertPt = Preheader->getTerminator();
  1186. SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate;
  1187. IRBuilder<> B(ExprInsertPt);
  1188. for (InductiveRangeCheck *IRC : RangeChecks) {
  1189. auto Result = IRC->computeSafeIterationSpace(SE, IndVar, B);
  1190. if (Result.hasValue()) {
  1191. auto MaybeSafeIterRange =
  1192. IntersectRange(SE, SafeIterRange, Result.getValue(), B);
  1193. if (MaybeSafeIterRange.hasValue()) {
  1194. RangeChecksToEliminate.push_back(IRC);
  1195. SafeIterRange = MaybeSafeIterRange.getValue();
  1196. }
  1197. }
  1198. }
  1199. if (!SafeIterRange.hasValue())
  1200. return false;
  1201. LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
  1202. SE, SafeIterRange.getValue());
  1203. bool Changed = LC.run();
  1204. if (Changed) {
  1205. auto PrintConstrainedLoopInfo = [L]() {
  1206. dbgs() << "irce: in function ";
  1207. dbgs() << L->getHeader()->getParent()->getName() << ": ";
  1208. dbgs() << "constrained ";
  1209. L->print(dbgs());
  1210. };
  1211. DEBUG(PrintConstrainedLoopInfo());
  1212. if (PrintChangedLoops)
  1213. PrintConstrainedLoopInfo();
  1214. // Optimize away the now-redundant range checks.
  1215. for (InductiveRangeCheck *IRC : RangeChecksToEliminate) {
  1216. ConstantInt *FoldedRangeCheck = IRC->getPassingDirection()
  1217. ? ConstantInt::getTrue(Context)
  1218. : ConstantInt::getFalse(Context);
  1219. IRC->getBranch()->setCondition(FoldedRangeCheck);
  1220. }
  1221. }
  1222. return Changed;
  1223. }
  1224. Pass *llvm::createInductiveRangeCheckEliminationPass() {
  1225. return new InductiveRangeCheckElimination;
  1226. }