LoopInterchange.cpp 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300
  1. //===- LoopInterchange.cpp - Loop interchange pass------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This Pass handles loop interchange transform.
  11. // This pass interchanges loops to provide a more cache-friendly memory access
  12. // patterns.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/Analysis/AliasAnalysis.h"
  17. #include "llvm/Analysis/AliasSetTracker.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/BlockFrequencyInfo.h"
  20. #include "llvm/Analysis/CodeMetrics.h"
  21. #include "llvm/Analysis/DependenceAnalysis.h"
  22. #include "llvm/Analysis/LoopInfo.h"
  23. #include "llvm/Analysis/LoopIterator.h"
  24. #include "llvm/Analysis/LoopPass.h"
  25. #include "llvm/Analysis/ScalarEvolution.h"
  26. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  27. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  28. #include "llvm/Analysis/TargetTransformInfo.h"
  29. #include "llvm/Analysis/ValueTracking.h"
  30. #include "llvm/IR/Dominators.h"
  31. #include "llvm/IR/Function.h"
  32. #include "llvm/IR/IRBuilder.h"
  33. #include "llvm/IR/InstIterator.h"
  34. #include "llvm/IR/IntrinsicInst.h"
  35. #include "llvm/IR/Module.h"
  36. #include "llvm/Pass.h"
  37. #include "llvm/Support/Debug.h"
  38. #include "llvm/Support/raw_ostream.h"
  39. #include "llvm/Transforms/Scalar.h"
  40. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  41. #include "llvm/Transforms/Utils/LoopUtils.h"
  42. #include "llvm/Transforms/Utils/SSAUpdater.h"
  43. using namespace llvm;
  44. #define DEBUG_TYPE "loop-interchange"
  45. namespace {
  46. typedef SmallVector<Loop *, 8> LoopVector;
  47. // TODO: Check if we can use a sparse matrix here.
  48. typedef std::vector<std::vector<char>> CharMatrix;
  49. // Maximum number of dependencies that can be handled in the dependency matrix.
  50. static const unsigned MaxMemInstrCount = 100;
  51. // Maximum loop depth supported.
  52. static const unsigned MaxLoopNestDepth = 10;
  53. struct LoopInterchange;
  54. #ifdef DUMP_DEP_MATRICIES
  55. void printDepMatrix(CharMatrix &DepMatrix) {
  56. for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
  57. std::vector<char> Vec = *I;
  58. for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
  59. DEBUG(dbgs() << *II << " ");
  60. DEBUG(dbgs() << "\n");
  61. }
  62. }
  63. #endif
  64. static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
  65. Loop *L, DependenceAnalysis *DA) {
  66. typedef SmallVector<Value *, 16> ValueVector;
  67. ValueVector MemInstr;
  68. if (Level > MaxLoopNestDepth) {
  69. DEBUG(dbgs() << "Cannot handle loops of depth greater than "
  70. << MaxLoopNestDepth << "\n");
  71. return false;
  72. }
  73. // For each block.
  74. for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
  75. BB != BE; ++BB) {
  76. // Scan the BB and collect legal loads and stores.
  77. for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
  78. ++I) {
  79. Instruction *Ins = dyn_cast<Instruction>(I);
  80. if (!Ins)
  81. return false;
  82. LoadInst *Ld = dyn_cast<LoadInst>(I);
  83. StoreInst *St = dyn_cast<StoreInst>(I);
  84. if (!St && !Ld)
  85. continue;
  86. if (Ld && !Ld->isSimple())
  87. return false;
  88. if (St && !St->isSimple())
  89. return false;
  90. MemInstr.push_back(I);
  91. }
  92. }
  93. DEBUG(dbgs() << "Found " << MemInstr.size()
  94. << " Loads and Stores to analyze\n");
  95. ValueVector::iterator I, IE, J, JE;
  96. for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
  97. for (J = I, JE = MemInstr.end(); J != JE; ++J) {
  98. std::vector<char> Dep;
  99. Instruction *Src = dyn_cast<Instruction>(*I);
  100. Instruction *Des = dyn_cast<Instruction>(*J);
  101. if (Src == Des)
  102. continue;
  103. if (isa<LoadInst>(Src) && isa<LoadInst>(Des))
  104. continue;
  105. if (auto D = DA->depends(Src, Des, true)) {
  106. DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des
  107. << "\n");
  108. if (D->isFlow()) {
  109. // TODO: Handle Flow dependence.Check if it is sufficient to populate
  110. // the Dependence Matrix with the direction reversed.
  111. DEBUG(dbgs() << "Flow dependence not handled");
  112. return false;
  113. }
  114. if (D->isAnti()) {
  115. DEBUG(dbgs() << "Found Anti dependence \n");
  116. unsigned Levels = D->getLevels();
  117. char Direction;
  118. for (unsigned II = 1; II <= Levels; ++II) {
  119. const SCEV *Distance = D->getDistance(II);
  120. const SCEVConstant *SCEVConst =
  121. dyn_cast_or_null<SCEVConstant>(Distance);
  122. if (SCEVConst) {
  123. const ConstantInt *CI = SCEVConst->getValue();
  124. if (CI->isNegative())
  125. Direction = '<';
  126. else if (CI->isZero())
  127. Direction = '=';
  128. else
  129. Direction = '>';
  130. Dep.push_back(Direction);
  131. } else if (D->isScalar(II)) {
  132. Direction = 'S';
  133. Dep.push_back(Direction);
  134. } else {
  135. unsigned Dir = D->getDirection(II);
  136. if (Dir == Dependence::DVEntry::LT ||
  137. Dir == Dependence::DVEntry::LE)
  138. Direction = '<';
  139. else if (Dir == Dependence::DVEntry::GT ||
  140. Dir == Dependence::DVEntry::GE)
  141. Direction = '>';
  142. else if (Dir == Dependence::DVEntry::EQ)
  143. Direction = '=';
  144. else
  145. Direction = '*';
  146. Dep.push_back(Direction);
  147. }
  148. }
  149. while (Dep.size() != Level) {
  150. Dep.push_back('I');
  151. }
  152. DepMatrix.push_back(Dep);
  153. if (DepMatrix.size() > MaxMemInstrCount) {
  154. DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
  155. << " dependencies inside loop\n");
  156. return false;
  157. }
  158. }
  159. }
  160. }
  161. }
  162. // We don't have a DepMatrix to check legality return false
  163. if (DepMatrix.size() == 0)
  164. return false;
  165. return true;
  166. }
  167. // A loop is moved from index 'from' to an index 'to'. Update the Dependence
  168. // matrix by exchanging the two columns.
  169. static void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx,
  170. unsigned ToIndx) {
  171. unsigned numRows = DepMatrix.size();
  172. for (unsigned i = 0; i < numRows; ++i) {
  173. char TmpVal = DepMatrix[i][ToIndx];
  174. DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
  175. DepMatrix[i][FromIndx] = TmpVal;
  176. }
  177. }
  178. // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
  179. // '>'
  180. static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
  181. unsigned Column) {
  182. for (unsigned i = 0; i <= Column; ++i) {
  183. if (DepMatrix[Row][i] == '<')
  184. return false;
  185. if (DepMatrix[Row][i] == '>')
  186. return true;
  187. }
  188. // All dependencies were '=','S' or 'I'
  189. return false;
  190. }
  191. // Checks if no dependence exist in the dependency matrix in Row before Column.
  192. static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
  193. unsigned Column) {
  194. for (unsigned i = 0; i < Column; ++i) {
  195. if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' ||
  196. DepMatrix[Row][i] != 'I')
  197. return false;
  198. }
  199. return true;
  200. }
  201. static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
  202. unsigned OuterLoopId, char InnerDep,
  203. char OuterDep) {
  204. if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
  205. return false;
  206. if (InnerDep == OuterDep)
  207. return true;
  208. // It is legal to interchange if and only if after interchange no row has a
  209. // '>' direction as the leftmost non-'='.
  210. if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
  211. return true;
  212. if (InnerDep == '<')
  213. return true;
  214. if (InnerDep == '>') {
  215. // If OuterLoopId represents outermost loop then interchanging will make the
  216. // 1st dependency as '>'
  217. if (OuterLoopId == 0)
  218. return false;
  219. // If all dependencies before OuterloopId are '=','S'or 'I'. Then
  220. // interchanging will result in this row having an outermost non '='
  221. // dependency of '>'
  222. if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
  223. return true;
  224. }
  225. return false;
  226. }
  227. // Checks if it is legal to interchange 2 loops.
  228. // [Theorem] A permutation of the loops in a perfect nest is legal if and only
  229. // if
  230. // the direction matrix, after the same permutation is applied to its columns,
  231. // has no ">" direction as the leftmost non-"=" direction in any row.
  232. static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
  233. unsigned InnerLoopId,
  234. unsigned OuterLoopId) {
  235. unsigned NumRows = DepMatrix.size();
  236. // For each row check if it is valid to interchange.
  237. for (unsigned Row = 0; Row < NumRows; ++Row) {
  238. char InnerDep = DepMatrix[Row][InnerLoopId];
  239. char OuterDep = DepMatrix[Row][OuterLoopId];
  240. if (InnerDep == '*' || OuterDep == '*')
  241. return false;
  242. else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep,
  243. OuterDep))
  244. return false;
  245. }
  246. return true;
  247. }
  248. static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
  249. DEBUG(dbgs() << "Calling populateWorklist called\n");
  250. LoopVector LoopList;
  251. Loop *CurrentLoop = &L;
  252. const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
  253. while (!Vec->empty()) {
  254. // The current loop has multiple subloops in it hence it is not tightly
  255. // nested.
  256. // Discard all loops above it added into Worklist.
  257. if (Vec->size() != 1) {
  258. LoopList.clear();
  259. return;
  260. }
  261. LoopList.push_back(CurrentLoop);
  262. CurrentLoop = Vec->front();
  263. Vec = &CurrentLoop->getSubLoops();
  264. }
  265. LoopList.push_back(CurrentLoop);
  266. V.push_back(std::move(LoopList));
  267. }
  268. static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
  269. PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
  270. if (InnerIndexVar)
  271. return InnerIndexVar;
  272. if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
  273. return nullptr;
  274. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
  275. PHINode *PhiVar = cast<PHINode>(I);
  276. Type *PhiTy = PhiVar->getType();
  277. if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
  278. !PhiTy->isPointerTy())
  279. return nullptr;
  280. const SCEVAddRecExpr *AddRec =
  281. dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
  282. if (!AddRec || !AddRec->isAffine())
  283. continue;
  284. const SCEV *Step = AddRec->getStepRecurrence(*SE);
  285. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
  286. if (!C)
  287. continue;
  288. // Found the induction variable.
  289. // FIXME: Handle loops with more than one induction variable. Note that,
  290. // currently, legality makes sure we have only one induction variable.
  291. return PhiVar;
  292. }
  293. return nullptr;
  294. }
  295. /// LoopInterchangeLegality checks if it is legal to interchange the loop.
  296. class LoopInterchangeLegality {
  297. public:
  298. LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
  299. LoopInterchange *Pass)
  300. : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
  301. InnerLoopHasReduction(false) {}
  302. /// Check if the loops can be interchanged.
  303. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
  304. CharMatrix &DepMatrix);
  305. /// Check if the loop structure is understood. We do not handle triangular
  306. /// loops for now.
  307. bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
  308. bool currentLimitations();
  309. bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
  310. private:
  311. bool tightlyNested(Loop *Outer, Loop *Inner);
  312. bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
  313. bool areAllUsesReductions(Instruction *Ins, Loop *L);
  314. bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
  315. bool findInductionAndReductions(Loop *L,
  316. SmallVector<PHINode *, 8> &Inductions,
  317. SmallVector<PHINode *, 8> &Reductions);
  318. Loop *OuterLoop;
  319. Loop *InnerLoop;
  320. /// Scev analysis.
  321. ScalarEvolution *SE;
  322. LoopInterchange *CurrentPass;
  323. bool InnerLoopHasReduction;
  324. };
  325. /// LoopInterchangeProfitability checks if it is profitable to interchange the
  326. /// loop.
  327. class LoopInterchangeProfitability {
  328. public:
  329. LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
  330. : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
  331. /// Check if the loop interchange is profitable
  332. bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
  333. CharMatrix &DepMatrix);
  334. private:
  335. int getInstrOrderCost();
  336. Loop *OuterLoop;
  337. Loop *InnerLoop;
  338. /// Scev analysis.
  339. ScalarEvolution *SE;
  340. };
  341. /// LoopInterchangeTransform interchanges the loop
  342. class LoopInterchangeTransform {
  343. public:
  344. LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
  345. LoopInfo *LI, DominatorTree *DT,
  346. LoopInterchange *Pass, BasicBlock *LoopNestExit,
  347. bool InnerLoopContainsReductions)
  348. : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
  349. LoopExit(LoopNestExit),
  350. InnerLoopHasReduction(InnerLoopContainsReductions) {}
  351. /// Interchange OuterLoop and InnerLoop.
  352. bool transform();
  353. void restructureLoops(Loop *InnerLoop, Loop *OuterLoop);
  354. void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
  355. private:
  356. void splitInnerLoopLatch(Instruction *);
  357. void splitOuterLoopLatch();
  358. void splitInnerLoopHeader();
  359. bool adjustLoopLinks();
  360. void adjustLoopPreheaders();
  361. void adjustOuterLoopPreheader();
  362. void adjustInnerLoopPreheader();
  363. bool adjustLoopBranches();
  364. void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
  365. BasicBlock *NewPred);
  366. Loop *OuterLoop;
  367. Loop *InnerLoop;
  368. /// Scev analysis.
  369. ScalarEvolution *SE;
  370. LoopInfo *LI;
  371. DominatorTree *DT;
  372. BasicBlock *LoopExit;
  373. bool InnerLoopHasReduction;
  374. };
  375. // Main LoopInterchange Pass
  376. struct LoopInterchange : public FunctionPass {
  377. static char ID;
  378. ScalarEvolution *SE;
  379. LoopInfo *LI;
  380. DependenceAnalysis *DA;
  381. DominatorTree *DT;
  382. LoopInterchange()
  383. : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
  384. initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
  385. }
  386. void getAnalysisUsage(AnalysisUsage &AU) const override {
  387. AU.addRequired<ScalarEvolution>();
  388. AU.addRequired<AliasAnalysis>();
  389. AU.addRequired<DominatorTreeWrapperPass>();
  390. AU.addRequired<LoopInfoWrapperPass>();
  391. AU.addRequired<DependenceAnalysis>();
  392. AU.addRequiredID(LoopSimplifyID);
  393. AU.addRequiredID(LCSSAID);
  394. }
  395. bool runOnFunction(Function &F) override {
  396. SE = &getAnalysis<ScalarEvolution>();
  397. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  398. DA = &getAnalysis<DependenceAnalysis>();
  399. auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
  400. DT = DTWP ? &DTWP->getDomTree() : nullptr;
  401. // Build up a worklist of loop pairs to analyze.
  402. SmallVector<LoopVector, 8> Worklist;
  403. for (Loop *L : *LI)
  404. populateWorklist(*L, Worklist);
  405. DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
  406. bool Changed = true;
  407. while (!Worklist.empty()) {
  408. LoopVector LoopList = Worklist.pop_back_val();
  409. Changed = processLoopList(LoopList, F);
  410. }
  411. return Changed;
  412. }
  413. bool isComputableLoopNest(LoopVector LoopList) {
  414. for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) {
  415. Loop *L = *I;
  416. const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
  417. if (ExitCountOuter == SE->getCouldNotCompute()) {
  418. DEBUG(dbgs() << "Couldn't compute Backedge count\n");
  419. return false;
  420. }
  421. if (L->getNumBackEdges() != 1) {
  422. DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
  423. return false;
  424. }
  425. if (!L->getExitingBlock()) {
  426. DEBUG(dbgs() << "Loop Doesn't have unique exit block\n");
  427. return false;
  428. }
  429. }
  430. return true;
  431. }
  432. unsigned selectLoopForInterchange(LoopVector LoopList) {
  433. // TODO: Add a better heuristic to select the loop to be interchanged based
  434. // on the dependece matrix. Currently we select the innermost loop.
  435. return LoopList.size() - 1;
  436. }
  437. bool processLoopList(LoopVector LoopList, Function &F) {
  438. bool Changed = false;
  439. CharMatrix DependencyMatrix;
  440. if (LoopList.size() < 2) {
  441. DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
  442. return false;
  443. }
  444. if (!isComputableLoopNest(LoopList)) {
  445. DEBUG(dbgs() << "Not vaild loop candidate for interchange\n");
  446. return false;
  447. }
  448. Loop *OuterMostLoop = *(LoopList.begin());
  449. DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size()
  450. << "\n");
  451. if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(),
  452. OuterMostLoop, DA)) {
  453. DEBUG(dbgs() << "Populating Dependency matrix failed\n");
  454. return false;
  455. }
  456. #ifdef DUMP_DEP_MATRICIES
  457. DEBUG(dbgs() << "Dependence before inter change \n");
  458. printDepMatrix(DependencyMatrix);
  459. #endif
  460. BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch();
  461. BranchInst *OuterMostLoopLatchBI =
  462. dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator());
  463. if (!OuterMostLoopLatchBI)
  464. return false;
  465. // Since we currently do not handle LCSSA PHI's any failure in loop
  466. // condition will now branch to LoopNestExit.
  467. // TODO: This should be removed once we handle LCSSA PHI nodes.
  468. // Get the Outermost loop exit.
  469. BasicBlock *LoopNestExit;
  470. if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader())
  471. LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1);
  472. else
  473. LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0);
  474. if (isa<PHINode>(LoopNestExit->begin())) {
  475. DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now "
  476. "since on failure all loops branch to loop nest exit.\n");
  477. return false;
  478. }
  479. unsigned SelecLoopId = selectLoopForInterchange(LoopList);
  480. // Move the selected loop outwards to the best posible position.
  481. for (unsigned i = SelecLoopId; i > 0; i--) {
  482. bool Interchanged =
  483. processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
  484. if (!Interchanged)
  485. return Changed;
  486. // Loops interchanged reflect the same in LoopList
  487. std::swap(LoopList[i - 1], LoopList[i]);
  488. // Update the DependencyMatrix
  489. interChangeDepedencies(DependencyMatrix, i, i - 1);
  490. DT->recalculate(F);
  491. #ifdef DUMP_DEP_MATRICIES
  492. DEBUG(dbgs() << "Dependence after inter change \n");
  493. printDepMatrix(DependencyMatrix);
  494. #endif
  495. Changed |= Interchanged;
  496. }
  497. return Changed;
  498. }
  499. bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
  500. unsigned OuterLoopId, BasicBlock *LoopNestExit,
  501. std::vector<std::vector<char>> &DependencyMatrix) {
  502. DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId
  503. << " and OuterLoopId = " << OuterLoopId << "\n");
  504. Loop *InnerLoop = LoopList[InnerLoopId];
  505. Loop *OuterLoop = LoopList[OuterLoopId];
  506. LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
  507. if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
  508. DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
  509. return false;
  510. }
  511. DEBUG(dbgs() << "Loops are legal to interchange\n");
  512. LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
  513. if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
  514. DEBUG(dbgs() << "Interchanging Loops not profitable\n");
  515. return false;
  516. }
  517. LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
  518. LoopNestExit, LIL.hasInnerLoopReduction());
  519. LIT.transform();
  520. DEBUG(dbgs() << "Loops interchanged\n");
  521. return true;
  522. }
  523. };
  524. } // end of namespace
  525. bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
  526. return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool {
  527. PHINode *UserIns = dyn_cast<PHINode>(U);
  528. RecurrenceDescriptor RD;
  529. return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
  530. });
  531. }
  532. bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
  533. BasicBlock *BB) {
  534. for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
  535. // Load corresponding to reduction PHI's are safe while concluding if
  536. // tightly nested.
  537. if (LoadInst *L = dyn_cast<LoadInst>(I)) {
  538. if (!areAllUsesReductions(L, InnerLoop))
  539. return true;
  540. } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
  541. return true;
  542. }
  543. return false;
  544. }
  545. bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
  546. BasicBlock *BB) {
  547. for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
  548. // Stores corresponding to reductions are safe while concluding if tightly
  549. // nested.
  550. if (StoreInst *L = dyn_cast<StoreInst>(I)) {
  551. PHINode *PHI = dyn_cast<PHINode>(L->getOperand(0));
  552. if (!PHI)
  553. return true;
  554. } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
  555. return true;
  556. }
  557. return false;
  558. }
  559. bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
  560. BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
  561. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  562. BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
  563. DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n");
  564. // A perfectly nested loop will not have any branch in between the outer and
  565. // inner block i.e. outer header will branch to either inner preheader and
  566. // outerloop latch.
  567. BranchInst *outerLoopHeaderBI =
  568. dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
  569. if (!outerLoopHeaderBI)
  570. return false;
  571. unsigned num = outerLoopHeaderBI->getNumSuccessors();
  572. for (unsigned i = 0; i < num; i++) {
  573. if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
  574. outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
  575. return false;
  576. }
  577. DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
  578. // We do not have any basic block in between now make sure the outer header
  579. // and outer loop latch doesnt contain any unsafe instructions.
  580. if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
  581. containsUnsafeInstructionsInLatch(OuterLoopLatch))
  582. return false;
  583. DEBUG(dbgs() << "Loops are perfectly nested \n");
  584. // We have a perfect loop nest.
  585. return true;
  586. }
  587. bool LoopInterchangeLegality::isLoopStructureUnderstood(
  588. PHINode *InnerInduction) {
  589. unsigned Num = InnerInduction->getNumOperands();
  590. BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
  591. for (unsigned i = 0; i < Num; ++i) {
  592. Value *Val = InnerInduction->getOperand(i);
  593. if (isa<Constant>(Val))
  594. continue;
  595. Instruction *I = dyn_cast<Instruction>(Val);
  596. if (!I)
  597. return false;
  598. // TODO: Handle triangular loops.
  599. // e.g. for(int i=0;i<N;i++)
  600. // for(int j=i;j<N;j++)
  601. unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
  602. if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
  603. InnerLoopPreheader &&
  604. !OuterLoop->isLoopInvariant(I)) {
  605. return false;
  606. }
  607. }
  608. return true;
  609. }
  610. bool LoopInterchangeLegality::findInductionAndReductions(
  611. Loop *L, SmallVector<PHINode *, 8> &Inductions,
  612. SmallVector<PHINode *, 8> &Reductions) {
  613. if (!L->getLoopLatch() || !L->getLoopPredecessor())
  614. return false;
  615. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
  616. RecurrenceDescriptor RD;
  617. PHINode *PHI = cast<PHINode>(I);
  618. ConstantInt *StepValue = nullptr;
  619. if (isInductionPHI(PHI, SE, StepValue))
  620. Inductions.push_back(PHI);
  621. else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
  622. Reductions.push_back(PHI);
  623. else {
  624. DEBUG(
  625. dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
  626. return false;
  627. }
  628. }
  629. return true;
  630. }
  631. static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
  632. for (auto I = Block->begin(); isa<PHINode>(I); ++I) {
  633. PHINode *PHI = cast<PHINode>(I);
  634. // Reduction lcssa phi will have only 1 incoming block that from loop latch.
  635. if (PHI->getNumIncomingValues() > 1)
  636. return false;
  637. Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0));
  638. if (!Ins)
  639. return false;
  640. // Incoming value for lcssa phi's in outer loop exit can only be inner loop
  641. // exits lcssa phi else it would not be tightly nested.
  642. if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
  643. return false;
  644. }
  645. return true;
  646. }
  647. static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock,
  648. BasicBlock *LoopHeader) {
  649. if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) {
  650. unsigned Num = BI->getNumSuccessors();
  651. assert(Num == 2);
  652. for (unsigned i = 0; i < Num; ++i) {
  653. if (BI->getSuccessor(i) == LoopHeader)
  654. continue;
  655. return BI->getSuccessor(i);
  656. }
  657. }
  658. return nullptr;
  659. }
  660. // This function indicates the current limitations in the transform as a result
  661. // of which we do not proceed.
  662. bool LoopInterchangeLegality::currentLimitations() {
  663. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  664. BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
  665. BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  666. BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
  667. BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
  668. PHINode *InnerInductionVar;
  669. SmallVector<PHINode *, 8> Inductions;
  670. SmallVector<PHINode *, 8> Reductions;
  671. if (!findInductionAndReductions(InnerLoop, Inductions, Reductions))
  672. return true;
  673. // TODO: Currently we handle only loops with 1 induction variable.
  674. if (Inductions.size() != 1) {
  675. DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
  676. << "Failed to interchange due to current limitation\n");
  677. return true;
  678. }
  679. if (Reductions.size() > 0)
  680. InnerLoopHasReduction = true;
  681. InnerInductionVar = Inductions.pop_back_val();
  682. Reductions.clear();
  683. if (!findInductionAndReductions(OuterLoop, Inductions, Reductions))
  684. return true;
  685. // Outer loop cannot have reduction because then loops will not be tightly
  686. // nested.
  687. if (!Reductions.empty())
  688. return true;
  689. // TODO: Currently we handle only loops with 1 induction variable.
  690. if (Inductions.size() != 1)
  691. return true;
  692. // TODO: Triangular loops are not handled for now.
  693. if (!isLoopStructureUnderstood(InnerInductionVar)) {
  694. DEBUG(dbgs() << "Loop structure not understood by pass\n");
  695. return true;
  696. }
  697. // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
  698. BasicBlock *LoopExitBlock =
  699. getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
  700. if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true))
  701. return true;
  702. LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
  703. if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false))
  704. return true;
  705. // TODO: Current limitation: Since we split the inner loop latch at the point
  706. // were induction variable is incremented (induction.next); We cannot have
  707. // more than 1 user of induction.next since it would result in broken code
  708. // after split.
  709. // e.g.
  710. // for(i=0;i<N;i++) {
  711. // for(j = 0;j<M;j++) {
  712. // A[j+1][i+2] = A[j][i]+k;
  713. // }
  714. // }
  715. bool FoundInduction = false;
  716. Instruction *InnerIndexVarInc = nullptr;
  717. if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
  718. InnerIndexVarInc =
  719. dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
  720. else
  721. InnerIndexVarInc =
  722. dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
  723. if (!InnerIndexVarInc)
  724. return true;
  725. // Since we split the inner loop latch on this induction variable. Make sure
  726. // we do not have any instruction between the induction variable and branch
  727. // instruction.
  728. for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend();
  729. I != E && !FoundInduction; ++I) {
  730. if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I))
  731. continue;
  732. const Instruction &Ins = *I;
  733. // We found an instruction. If this is not induction variable then it is not
  734. // safe to split this loop latch.
  735. if (!Ins.isIdenticalTo(InnerIndexVarInc))
  736. return true;
  737. else
  738. FoundInduction = true;
  739. }
  740. // The loop latch ended and we didnt find the induction variable return as
  741. // current limitation.
  742. if (!FoundInduction)
  743. return true;
  744. return false;
  745. }
  746. bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
  747. unsigned OuterLoopId,
  748. CharMatrix &DepMatrix) {
  749. if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
  750. DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
  751. << "and OuterLoopId = " << OuterLoopId
  752. << "due to dependence\n");
  753. return false;
  754. }
  755. // Create unique Preheaders if we already do not have one.
  756. BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
  757. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  758. // Create a unique outer preheader -
  759. // 1) If OuterLoop preheader is not present.
  760. // 2) If OuterLoop Preheader is same as OuterLoop Header
  761. // 3) If OuterLoop Preheader is same as Header of the previous loop.
  762. // 4) If OuterLoop Preheader is Entry node.
  763. if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
  764. isa<PHINode>(OuterLoopPreHeader->begin()) ||
  765. !OuterLoopPreHeader->getUniquePredecessor()) {
  766. OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
  767. }
  768. if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
  769. InnerLoopPreHeader == OuterLoop->getHeader()) {
  770. InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
  771. }
  772. // TODO: The loops could not be interchanged due to current limitations in the
  773. // transform module.
  774. if (currentLimitations()) {
  775. DEBUG(dbgs() << "Not legal because of current transform limitation\n");
  776. return false;
  777. }
  778. // Check if the loops are tightly nested.
  779. if (!tightlyNested(OuterLoop, InnerLoop)) {
  780. DEBUG(dbgs() << "Loops not tightly nested\n");
  781. return false;
  782. }
  783. return true;
  784. }
  785. int LoopInterchangeProfitability::getInstrOrderCost() {
  786. unsigned GoodOrder, BadOrder;
  787. BadOrder = GoodOrder = 0;
  788. for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
  789. BI != BE; ++BI) {
  790. for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
  791. const Instruction &Ins = *I;
  792. if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
  793. unsigned NumOp = GEP->getNumOperands();
  794. bool FoundInnerInduction = false;
  795. bool FoundOuterInduction = false;
  796. for (unsigned i = 0; i < NumOp; ++i) {
  797. const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
  798. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
  799. if (!AR)
  800. continue;
  801. // If we find the inner induction after an outer induction e.g.
  802. // for(int i=0;i<N;i++)
  803. // for(int j=0;j<N;j++)
  804. // A[i][j] = A[i-1][j-1]+k;
  805. // then it is a good order.
  806. if (AR->getLoop() == InnerLoop) {
  807. // We found an InnerLoop induction after OuterLoop induction. It is
  808. // a good order.
  809. FoundInnerInduction = true;
  810. if (FoundOuterInduction) {
  811. GoodOrder++;
  812. break;
  813. }
  814. }
  815. // If we find the outer induction after an inner induction e.g.
  816. // for(int i=0;i<N;i++)
  817. // for(int j=0;j<N;j++)
  818. // A[j][i] = A[j-1][i-1]+k;
  819. // then it is a bad order.
  820. if (AR->getLoop() == OuterLoop) {
  821. // We found an OuterLoop induction after InnerLoop induction. It is
  822. // a bad order.
  823. FoundOuterInduction = true;
  824. if (FoundInnerInduction) {
  825. BadOrder++;
  826. break;
  827. }
  828. }
  829. }
  830. }
  831. }
  832. }
  833. return GoodOrder - BadOrder;
  834. }
  835. static bool isProfitabileForVectorization(unsigned InnerLoopId,
  836. unsigned OuterLoopId,
  837. CharMatrix &DepMatrix) {
  838. // TODO: Improve this heuristic to catch more cases.
  839. // If the inner loop is loop independent or doesn't carry any dependency it is
  840. // profitable to move this to outer position.
  841. unsigned Row = DepMatrix.size();
  842. for (unsigned i = 0; i < Row; ++i) {
  843. if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
  844. return false;
  845. // TODO: We need to improve this heuristic.
  846. if (DepMatrix[i][OuterLoopId] != '=')
  847. return false;
  848. }
  849. // If outer loop has dependence and inner loop is loop independent then it is
  850. // profitable to interchange to enable parallelism.
  851. return true;
  852. }
  853. bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
  854. unsigned OuterLoopId,
  855. CharMatrix &DepMatrix) {
  856. // TODO: Add Better Profitibility checks.
  857. // e.g
  858. // 1) Construct dependency matrix and move the one with no loop carried dep
  859. // inside to enable vectorization.
  860. // This is rough cost estimation algorithm. It counts the good and bad order
  861. // of induction variables in the instruction and allows reordering if number
  862. // of bad orders is more than good.
  863. int Cost = 0;
  864. Cost += getInstrOrderCost();
  865. DEBUG(dbgs() << "Cost = " << Cost << "\n");
  866. if (Cost < 0)
  867. return true;
  868. // It is not profitable as per current cache profitibility model. But check if
  869. // we can move this loop outside to improve parallelism.
  870. bool ImprovesPar =
  871. isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
  872. return ImprovesPar;
  873. }
  874. void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
  875. Loop *InnerLoop) {
  876. for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E;
  877. ++I) {
  878. if (*I == InnerLoop) {
  879. OuterLoop->removeChildLoop(I);
  880. return;
  881. }
  882. }
  883. assert(false && "Couldn't find loop");
  884. }
  885. void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
  886. Loop *OuterLoop) {
  887. Loop *OuterLoopParent = OuterLoop->getParentLoop();
  888. if (OuterLoopParent) {
  889. // Remove the loop from its parent loop.
  890. removeChildLoop(OuterLoopParent, OuterLoop);
  891. removeChildLoop(OuterLoop, InnerLoop);
  892. OuterLoopParent->addChildLoop(InnerLoop);
  893. } else {
  894. removeChildLoop(OuterLoop, InnerLoop);
  895. LI->changeTopLevelLoop(OuterLoop, InnerLoop);
  896. }
  897. while (!InnerLoop->empty())
  898. OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin()));
  899. InnerLoop->addChildLoop(OuterLoop);
  900. }
  901. bool LoopInterchangeTransform::transform() {
  902. DEBUG(dbgs() << "transform\n");
  903. bool Transformed = false;
  904. Instruction *InnerIndexVar;
  905. if (InnerLoop->getSubLoops().size() == 0) {
  906. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  907. DEBUG(dbgs() << "Calling Split Inner Loop\n");
  908. PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
  909. if (!InductionPHI) {
  910. DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
  911. return false;
  912. }
  913. if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
  914. InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
  915. else
  916. InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
  917. //
  918. // Split at the place were the induction variable is
  919. // incremented/decremented.
  920. // TODO: This splitting logic may not work always. Fix this.
  921. splitInnerLoopLatch(InnerIndexVar);
  922. DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
  923. // Splits the inner loops phi nodes out into a seperate basic block.
  924. splitInnerLoopHeader();
  925. DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
  926. }
  927. Transformed |= adjustLoopLinks();
  928. if (!Transformed) {
  929. DEBUG(dbgs() << "adjustLoopLinks Failed\n");
  930. return false;
  931. }
  932. restructureLoops(InnerLoop, OuterLoop);
  933. return true;
  934. }
  935. void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
  936. BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  937. BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
  938. InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
  939. }
  940. void LoopInterchangeTransform::splitOuterLoopLatch() {
  941. BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
  942. BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch;
  943. OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock,
  944. OuterLoopLatch->getFirstNonPHI(), DT, LI);
  945. }
  946. void LoopInterchangeTransform::splitInnerLoopHeader() {
  947. // Split the inner loop header out. Here make sure that the reduction PHI's
  948. // stay in the innerloop body.
  949. BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
  950. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  951. if (InnerLoopHasReduction) {
  952. // FIXME: Check if the induction PHI will always be the first PHI.
  953. BasicBlock *New = InnerLoopHeader->splitBasicBlock(
  954. ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split");
  955. if (LI)
  956. if (Loop *L = LI->getLoopFor(InnerLoopHeader))
  957. L->addBasicBlockToLoop(New, *LI);
  958. // Adjust Reduction PHI's in the block.
  959. SmallVector<PHINode *, 8> PHIVec;
  960. for (auto I = New->begin(); isa<PHINode>(I); ++I) {
  961. PHINode *PHI = dyn_cast<PHINode>(I);
  962. Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader);
  963. PHI->replaceAllUsesWith(V);
  964. PHIVec.push_back((PHI));
  965. }
  966. for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) {
  967. PHINode *P = *I;
  968. P->eraseFromParent();
  969. }
  970. } else {
  971. SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
  972. }
  973. DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
  974. "InnerLoopHeader \n");
  975. }
  976. /// \brief Move all instructions except the terminator from FromBB right before
  977. /// InsertBefore
  978. static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
  979. auto &ToList = InsertBefore->getParent()->getInstList();
  980. auto &FromList = FromBB->getInstList();
  981. ToList.splice(InsertBefore, FromList, FromList.begin(),
  982. FromBB->getTerminator());
  983. }
  984. void LoopInterchangeTransform::adjustOuterLoopPreheader() {
  985. BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
  986. BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader();
  987. moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator());
  988. }
  989. void LoopInterchangeTransform::adjustInnerLoopPreheader() {
  990. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  991. BasicBlock *OuterHeader = OuterLoop->getHeader();
  992. moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator());
  993. }
  994. void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
  995. BasicBlock *OldPred,
  996. BasicBlock *NewPred) {
  997. for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) {
  998. PHINode *PHI = cast<PHINode>(I);
  999. unsigned Num = PHI->getNumIncomingValues();
  1000. for (unsigned i = 0; i < Num; ++i) {
  1001. if (PHI->getIncomingBlock(i) == OldPred)
  1002. PHI->setIncomingBlock(i, NewPred);
  1003. }
  1004. }
  1005. }
  1006. bool LoopInterchangeTransform::adjustLoopBranches() {
  1007. DEBUG(dbgs() << "adjustLoopBranches called\n");
  1008. // Adjust the loop preheader
  1009. BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
  1010. BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
  1011. BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
  1012. BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
  1013. BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
  1014. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  1015. BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
  1016. BasicBlock *InnerLoopLatchPredecessor =
  1017. InnerLoopLatch->getUniquePredecessor();
  1018. BasicBlock *InnerLoopLatchSuccessor;
  1019. BasicBlock *OuterLoopLatchSuccessor;
  1020. BranchInst *OuterLoopLatchBI =
  1021. dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
  1022. BranchInst *InnerLoopLatchBI =
  1023. dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
  1024. BranchInst *OuterLoopHeaderBI =
  1025. dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
  1026. BranchInst *InnerLoopHeaderBI =
  1027. dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
  1028. if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
  1029. !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
  1030. !InnerLoopHeaderBI)
  1031. return false;
  1032. BranchInst *InnerLoopLatchPredecessorBI =
  1033. dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
  1034. BranchInst *OuterLoopPredecessorBI =
  1035. dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
  1036. if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
  1037. return false;
  1038. BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
  1039. if (!InnerLoopHeaderSucessor)
  1040. return false;
  1041. // Adjust Loop Preheader and headers
  1042. unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
  1043. for (unsigned i = 0; i < NumSucc; ++i) {
  1044. if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
  1045. OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
  1046. }
  1047. NumSucc = OuterLoopHeaderBI->getNumSuccessors();
  1048. for (unsigned i = 0; i < NumSucc; ++i) {
  1049. if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
  1050. OuterLoopHeaderBI->setSuccessor(i, LoopExit);
  1051. else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
  1052. OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
  1053. }
  1054. // Adjust reduction PHI's now that the incoming block has changed.
  1055. updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
  1056. OuterLoopHeader);
  1057. BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
  1058. InnerLoopHeaderBI->eraseFromParent();
  1059. // -------------Adjust loop latches-----------
  1060. if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
  1061. InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
  1062. else
  1063. InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
  1064. NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
  1065. for (unsigned i = 0; i < NumSucc; ++i) {
  1066. if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
  1067. InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
  1068. }
  1069. // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
  1070. // the value and remove this PHI node from inner loop.
  1071. SmallVector<PHINode *, 8> LcssaVec;
  1072. for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) {
  1073. PHINode *LcssaPhi = cast<PHINode>(I);
  1074. LcssaVec.push_back(LcssaPhi);
  1075. }
  1076. for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) {
  1077. PHINode *P = *I;
  1078. Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
  1079. P->replaceAllUsesWith(Incoming);
  1080. P->eraseFromParent();
  1081. }
  1082. if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
  1083. OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
  1084. else
  1085. OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
  1086. if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
  1087. InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
  1088. else
  1089. InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);
  1090. updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
  1091. if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
  1092. OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
  1093. } else {
  1094. OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
  1095. }
  1096. return true;
  1097. }
  1098. void LoopInterchangeTransform::adjustLoopPreheaders() {
  1099. // We have interchanged the preheaders so we need to interchange the data in
  1100. // the preheader as well.
  1101. // This is because the content of inner preheader was previously executed
  1102. // inside the outer loop.
  1103. BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
  1104. BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
  1105. BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
  1106. BranchInst *InnerTermBI =
  1107. cast<BranchInst>(InnerLoopPreHeader->getTerminator());
  1108. // These instructions should now be executed inside the loop.
  1109. // Move instruction into a new block after outer header.
  1110. moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
  1111. // These instructions were not executed previously in the loop so move them to
  1112. // the older inner loop preheader.
  1113. moveBBContents(OuterLoopPreHeader, InnerTermBI);
  1114. }
  1115. bool LoopInterchangeTransform::adjustLoopLinks() {
  1116. // Adjust all branches in the inner and outer loop.
  1117. bool Changed = adjustLoopBranches();
  1118. if (Changed)
  1119. adjustLoopPreheaders();
  1120. return Changed;
  1121. }
  1122. char LoopInterchange::ID = 0;
  1123. INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
  1124. "Interchanges loops for cache reuse", false, false)
  1125. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  1126. INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
  1127. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  1128. INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  1129. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  1130. INITIALIZE_PASS_DEPENDENCY(LCSSA)
  1131. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1132. INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
  1133. "Interchanges loops for cache reuse", false, false)
  1134. Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }