PassManagerBuilder.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines the PassManagerBuilder class, which is used to set up a
  11. // "standard" optimization sequence suitable for languages like C and C++.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/IPO/PassManagerBuilder.h"
  15. #include "llvm-c/Transforms/PassManagerBuilder.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/Analysis/Passes.h"
  18. #include "llvm/IR/DataLayout.h"
  19. #include "llvm/IR/Verifier.h"
  20. #include "llvm/IR/LegacyPassManager.h"
  21. #include "llvm/Support/CommandLine.h"
  22. #include "llvm/Support/ManagedStatic.h"
  23. #include "llvm/Analysis/TargetLibraryInfo.h"
  24. #include "llvm/Target/TargetMachine.h"
  25. #include "llvm/Transforms/IPO.h"
  26. #include "llvm/Transforms/Scalar.h"
  27. #include "llvm/Transforms/Vectorize.h"
  28. #include "dxc/HLSL/DxilGenerationPass.h" // HLSL Change
  29. #include "dxc/HLSL/HLMatrixLowerPass.h" // HLSL Change
  30. #include "dxc/HLSL/ComputeViewIdState.h" // HLSL Change
  31. using namespace llvm;
  32. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  33. static cl::opt<bool>
  34. RunLoopVectorization("vectorize-loops", cl::Hidden,
  35. cl::desc("Run the Loop vectorization passes"));
  36. static cl::opt<bool>
  37. RunSLPVectorization("vectorize-slp", cl::Hidden,
  38. cl::desc("Run the SLP vectorization passes"));
  39. static cl::opt<bool>
  40. RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
  41. cl::desc("Run the BB vectorization passes"));
  42. static cl::opt<bool>
  43. UseGVNAfterVectorization("use-gvn-after-vectorization",
  44. cl::init(false), cl::Hidden,
  45. cl::desc("Run GVN instead of Early CSE after vectorization passes"));
  46. static cl::opt<bool> ExtraVectorizerPasses(
  47. "extra-vectorizer-passes", cl::init(false), cl::Hidden,
  48. cl::desc("Run cleanup optimization passes after vectorization."));
  49. static cl::opt<bool> UseNewSROA("use-new-sroa",
  50. cl::init(true), cl::Hidden,
  51. cl::desc("Enable the new, experimental SROA pass"));
  52. static cl::opt<bool>
  53. RunLoopRerolling("reroll-loops", cl::Hidden,
  54. cl::desc("Run the loop rerolling pass"));
  55. static cl::opt<bool>
  56. RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
  57. cl::desc("Run the float2int (float demotion) pass"));
  58. static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
  59. cl::Hidden,
  60. cl::desc("Run the load combining pass"));
  61. static cl::opt<bool>
  62. RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
  63. cl::init(true), cl::Hidden,
  64. cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
  65. "vectorizer instead of before"));
  66. static cl::opt<bool> UseCFLAA("use-cfl-aa",
  67. cl::init(false), cl::Hidden,
  68. cl::desc("Enable the new, experimental CFL alias analysis"));
  69. static cl::opt<bool>
  70. EnableMLSM("mlsm", cl::init(true), cl::Hidden,
  71. cl::desc("Enable motion of merged load and store"));
  72. static cl::opt<bool> EnableLoopInterchange(
  73. "enable-loopinterchange", cl::init(false), cl::Hidden,
  74. cl::desc("Enable the new, experimental LoopInterchange Pass"));
  75. static cl::opt<bool> EnableLoopDistribute(
  76. "enable-loop-distribute", cl::init(false), cl::Hidden,
  77. cl::desc("Enable the new, experimental LoopDistribution Pass"));
  78. #else
  79. // Don't declare the 'false' counterparts - simply avoid altogether.
  80. static const bool UseNewSROA = true;
  81. static const bool RunLoopRerolling = false;
  82. static const bool RunFloat2Int = true;
  83. static const bool RunLoadCombine = false;
  84. static const bool RunSLPAfterLoopVectorization = true;
  85. static const bool UseCFLAA = false;
  86. static const bool EnableMLSM = true;
  87. static const bool EnableLoopInterchange = false;
  88. static const bool EnableLoopDistribute = false;
  89. #endif // HLSL Change - don't build vectorization passes
  90. PassManagerBuilder::PassManagerBuilder() {
  91. OptLevel = 2;
  92. SizeLevel = 0;
  93. LibraryInfo = nullptr;
  94. Inliner = nullptr;
  95. DisableUnitAtATime = false;
  96. DisableUnrollLoops = false;
  97. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  98. BBVectorize = RunBBVectorization;
  99. SLPVectorize = RunSLPVectorization;
  100. LoopVectorize = RunLoopVectorization;
  101. #else
  102. BBVectorize = SLPVectorize = LoopVectorize = false;
  103. #endif
  104. RerollLoops = RunLoopRerolling;
  105. LoadCombine = RunLoadCombine;
  106. DisableGVNLoadPRE = false;
  107. VerifyInput = false;
  108. VerifyOutput = false;
  109. MergeFunctions = false;
  110. PrepareForLTO = false;
  111. }
  112. PassManagerBuilder::~PassManagerBuilder() {
  113. delete LibraryInfo;
  114. delete Inliner;
  115. }
  116. #if 0 // HLSL Change Starts - no global extensions
  117. /// Set of global extensions, automatically added as part of the standard set.
  118. static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
  119. PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
  120. #endif // HLSL Change Ends
  121. #if 0 // HLSL Change Starts - no global extensions
  122. void PassManagerBuilder::addGlobalExtension(
  123. PassManagerBuilder::ExtensionPointTy Ty,
  124. PassManagerBuilder::ExtensionFn Fn) {
  125. GlobalExtensions->push_back(std::make_pair(Ty, Fn));
  126. }
  127. #endif // HLSL Change Ends
  128. void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
  129. Extensions.push_back(std::make_pair(Ty, Fn));
  130. }
  131. void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
  132. legacy::PassManagerBase &PM) const {
  133. #if 0 // HLSL Change Starts - no global extensions
  134. for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
  135. if ((*GlobalExtensions)[i].first == ETy)
  136. (*GlobalExtensions)[i].second(*this, PM);
  137. for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
  138. if (Extensions[i].first == ETy)
  139. Extensions[i].second(*this, PM);
  140. #endif // HLSL Change Ends
  141. }
  142. void PassManagerBuilder::addInitialAliasAnalysisPasses(
  143. legacy::PassManagerBase &PM) const {
  144. // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
  145. // BasicAliasAnalysis wins if they disagree. This is intended to help
  146. // support "obvious" type-punning idioms.
  147. if (UseCFLAA)
  148. PM.add(createCFLAliasAnalysisPass());
  149. PM.add(createTypeBasedAliasAnalysisPass());
  150. PM.add(createScopedNoAliasAAPass());
  151. PM.add(createBasicAliasAnalysisPass());
  152. }
  153. void PassManagerBuilder::populateFunctionPassManager(
  154. legacy::FunctionPassManager &FPM) {
  155. addExtensionsToPM(EP_EarlyAsPossible, FPM);
  156. // Add LibraryInfo if we have some.
  157. if (LibraryInfo)
  158. FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
  159. if (OptLevel == 0) return;
  160. addInitialAliasAnalysisPasses(FPM);
  161. FPM.add(createCFGSimplificationPass());
  162. // HLSL Change - don't run SROA.
  163. // HLSL uses special SROA added in addHLSLPasses.
  164. if (HLSLHighLevel) // HLSL Change
  165. if (UseNewSROA)
  166. FPM.add(createSROAPass());
  167. else
  168. FPM.add(createScalarReplAggregatesPass());
  169. // HLSL Change. FPM.add(createEarlyCSEPass());
  170. FPM.add(createLowerExpectIntrinsicPass());
  171. }
  172. // HLSL Change Starts
  173. static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
  174. // Don't do any lowering if we're targeting high-level.
  175. if (HLSLHighLevel) {
  176. MPM.add(createHLEmitMetadataPass());
  177. return;
  178. }
  179. MPM.add(createHLPreprocessPass());
  180. bool NoOpt = OptLevel == 0;
  181. if (!NoOpt) {
  182. MPM.add(createHLDeadFunctionEliminationPass());
  183. }
  184. // Split struct and array of parameter.
  185. MPM.add(createSROA_Parameter_HLSL());
  186. // Split struct.
  187. MPM.add(createScalarReplAggregatesHLSLPass(/*UseDomTree*/ true,
  188. /*Promote*/ !NoOpt));
  189. MPM.add(createHLMatrixLowerPass());
  190. MPM.add(createResourceToHandlePass());
  191. // DCE should after SROA to remove unused element.
  192. MPM.add(createDeadCodeEliminationPass());
  193. MPM.add(createGlobalDCEPass());
  194. if (NoOpt) {
  195. // If not run mem2reg, try to promote allocas used by EvalOperations.
  196. // Do this before change vector to array.
  197. MPM.add(createDxilLegalizeEvalOperationsPass());
  198. }
  199. // Change dynamic indexing vector to array.
  200. MPM.add(createDynamicIndexingVectorToArrayPass(NoOpt));
  201. if (!NoOpt) {
  202. MPM.add(createLowerStaticGlobalIntoAlloca());
  203. // mem2reg
  204. MPM.add(createPromoteMemoryToRegisterPass());
  205. }
  206. if (OptLevel > 2) {
  207. MPM.add(createLoopRotatePass());
  208. MPM.add(createLoopUnrollPass());
  209. }
  210. MPM.add(createSimplifyInstPass());
  211. MPM.add(createCFGSimplificationPass());
  212. MPM.add(createDxilLegalizeResourceUsePass());
  213. MPM.add(createDxilLegalizeStaticResourceUsePass());
  214. MPM.add(createDxilGenerationPass(NoOpt, ExtHelper));
  215. MPM.add(createDxilLoadMetadataPass()); // Ensure DxilModule is loaded for optimizations.
  216. MPM.add(createSimplifyInstPass());
  217. // Propagate precise attribute.
  218. MPM.add(createDxilPrecisePropagatePass());
  219. // scalarize vector to scalar
  220. MPM.add(createScalarizerPass());
  221. MPM.add(createSimplifyInstPass());
  222. MPM.add(createCFGSimplificationPass());
  223. MPM.add(createDeadCodeEliminationPass());
  224. MPM.add(createDxilTranslateRawBuffer());
  225. }
  226. // HLSL Change Ends
  227. void PassManagerBuilder::populateModulePassManager(
  228. legacy::PassManagerBase &MPM) {
  229. // If all optimizations are disabled, just run the always-inline pass and,
  230. // if enabled, the function merging pass.
  231. if (OptLevel == 0) {
  232. if (!HLSLHighLevel) {
  233. MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
  234. }
  235. if (Inliner) {
  236. MPM.add(Inliner);
  237. Inliner = nullptr;
  238. }
  239. // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
  240. // creates a CGSCC pass manager, but we don't want to add extensions into
  241. // that pass manager. To prevent this we insert a no-op module pass to reset
  242. // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
  243. // builds. The function merging pass is
  244. if (MergeFunctions)
  245. MPM.add(createMergeFunctionsPass());
  246. else if (!Extensions.empty()) // HLSL Change - GlobalExtensions not considered
  247. MPM.add(createBarrierNoopPass());
  248. addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
  249. // HLSL Change Begins.
  250. addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
  251. if (!HLSLHighLevel) {
  252. MPM.add(createMultiDimArrayToOneDimArrayPass());
  253. MPM.add(createDxilCondenseResourcesPass());
  254. MPM.add(createDxilLegalizeSampleOffsetPass());
  255. MPM.add(createDxilFinalizeModulePass());
  256. MPM.add(createComputeViewIdStatePass());
  257. MPM.add(createDxilDeadFunctionEliminationPass());
  258. MPM.add(createNoPausePassesPass());
  259. MPM.add(createDxilEmitMetadataPass());
  260. }
  261. // HLSL Change Ends.
  262. return;
  263. }
  264. if (!HLSLHighLevel) {
  265. MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
  266. }
  267. // HLSL Change Begins
  268. MPM.add(createAlwaysInlinerPass(/*InsertLifeTime*/false));
  269. if (Inliner) {
  270. delete Inliner;
  271. Inliner = nullptr;
  272. }
  273. addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM); // HLSL Change
  274. // HLSL Change Ends
  275. // Add LibraryInfo if we have some.
  276. if (LibraryInfo)
  277. MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
  278. addInitialAliasAnalysisPasses(MPM);
  279. if (!DisableUnitAtATime) {
  280. addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
  281. MPM.add(createIPSCCPPass()); // IP SCCP
  282. MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
  283. MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
  284. MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
  285. addExtensionsToPM(EP_Peephole, MPM);
  286. MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
  287. }
  288. // Start of CallGraph SCC passes.
  289. if (!DisableUnitAtATime)
  290. MPM.add(createPruneEHPass()); // Remove dead EH info
  291. if (Inliner) {
  292. MPM.add(Inliner);
  293. Inliner = nullptr;
  294. }
  295. if (!DisableUnitAtATime)
  296. MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
  297. if (OptLevel > 2)
  298. MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
  299. // Start of function pass.
  300. // Break up aggregate allocas, using SSAUpdater.
  301. // HLSL Change - don't run SROA.
  302. // HLSL uses special SROA added in addHLSLPasses.
  303. if (HLSLHighLevel) // HLSL Change
  304. if (UseNewSROA)
  305. MPM.add(createSROAPass(/*RequiresDomTree*/ false));
  306. else
  307. MPM.add(createScalarReplAggregatesPass(-1, false));
  308. // HLSL Change. MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
  309. // HLSL Change. MPM.add(createJumpThreadingPass()); // Thread jumps.
  310. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
  311. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
  312. MPM.add(createInstructionCombiningPass()); // Combine silly seq's
  313. addExtensionsToPM(EP_Peephole, MPM);
  314. // HLSL Change Begins.
  315. // HLSL does not allow recursize functions.
  316. //MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
  317. // HLSL Change Ends.
  318. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
  319. MPM.add(createReassociatePass()); // Reassociate expressions
  320. // Rotate Loop - disable header duplication at -Oz
  321. MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
  322. MPM.add(createLICMPass()); // Hoist loop invariants
  323. //MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); // HLSL Change - may move barrier inside divergent if.
  324. MPM.add(createInstructionCombiningPass());
  325. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
  326. MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
  327. MPM.add(createLoopDeletionPass()); // Delete dead loops
  328. if (EnableLoopInterchange) {
  329. MPM.add(createLoopInterchangePass()); // Interchange loops
  330. MPM.add(createCFGSimplificationPass());
  331. }
  332. if (!DisableUnrollLoops)
  333. MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
  334. addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
  335. if (OptLevel > 1) {
  336. if (EnableMLSM)
  337. MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
  338. MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
  339. }
  340. // HLSL Change Begins.
  341. // HLSL don't allow memcpy and memset.
  342. //MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
  343. // HLSL Change Ends.
  344. MPM.add(createSCCPPass()); // Constant prop with SCCP
  345. // Delete dead bit computations (instcombine runs after to fold away the dead
  346. // computations, and then ADCE will run later to exploit any new DCE
  347. // opportunities that creates).
  348. MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
  349. // Run instcombine after redundancy elimination to exploit opportunities
  350. // opened up by them.
  351. MPM.add(createInstructionCombiningPass());
  352. addExtensionsToPM(EP_Peephole, MPM);
  353. // HLSL Change. MPM.add(createJumpThreadingPass()); // Thread jumps
  354. MPM.add(createCorrelatedValuePropagationPass());
  355. MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
  356. MPM.add(createLICMPass());
  357. addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
  358. if (RerollLoops)
  359. MPM.add(createLoopRerollPass());
  360. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  361. if (!RunSLPAfterLoopVectorization) {
  362. if (SLPVectorize)
  363. MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
  364. if (BBVectorize) {
  365. MPM.add(createBBVectorizePass());
  366. MPM.add(createInstructionCombiningPass());
  367. addExtensionsToPM(EP_Peephole, MPM);
  368. if (OptLevel > 1 && UseGVNAfterVectorization)
  369. MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
  370. else
  371. MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
  372. // BBVectorize may have significantly shortened a loop body; unroll again.
  373. if (!DisableUnrollLoops)
  374. MPM.add(createLoopUnrollPass());
  375. }
  376. }
  377. #endif
  378. if (LoadCombine)
  379. MPM.add(createLoadCombinePass());
  380. MPM.add(createHoistConstantArrayPass()); // HLSL change
  381. MPM.add(createAggressiveDCEPass()); // Delete dead instructions
  382. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
  383. MPM.add(createInstructionCombiningPass()); // Clean up after everything.
  384. addExtensionsToPM(EP_Peephole, MPM);
  385. // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
  386. // pass manager that we are specifically trying to avoid. To prevent this
  387. // we must insert a no-op module pass to reset the pass manager.
  388. MPM.add(createBarrierNoopPass());
  389. if (RunFloat2Int)
  390. MPM.add(createFloat2IntPass());
  391. // Re-rotate loops in all our loop nests. These may have fallout out of
  392. // rotated form due to GVN or other transformations, and the vectorizer relies
  393. // on the rotated form. Disable header duplication at -Oz.
  394. MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
  395. // Distribute loops to allow partial vectorization. I.e. isolate dependences
  396. // into separate loop that would otherwise inhibit vectorization.
  397. if (EnableLoopDistribute)
  398. MPM.add(createLoopDistributePass());
  399. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  400. MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
  401. #endif
  402. // FIXME: Because of #pragma vectorize enable, the passes below are always
  403. // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
  404. // on -O1 and no #pragma is found). Would be good to have these two passes
  405. // as function calls, so that we can only pass them when the vectorizer
  406. // changed the code.
  407. MPM.add(createInstructionCombiningPass());
  408. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  409. if (OptLevel > 1 && ExtraVectorizerPasses) {
  410. // At higher optimization levels, try to clean up any runtime overlap and
  411. // alignment checks inserted by the vectorizer. We want to track correllated
  412. // runtime checks for two inner loops in the same outer loop, fold any
  413. // common computations, hoist loop-invariant aspects out of any outer loop,
  414. // and unswitch the runtime checks if possible. Once hoisted, we may have
  415. // dead (or speculatable) control flows or more combining opportunities.
  416. MPM.add(createEarlyCSEPass());
  417. MPM.add(createCorrelatedValuePropagationPass());
  418. MPM.add(createInstructionCombiningPass());
  419. MPM.add(createLICMPass());
  420. MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
  421. MPM.add(createCFGSimplificationPass());
  422. MPM.add(createInstructionCombiningPass());
  423. }
  424. if (RunSLPAfterLoopVectorization) {
  425. if (SLPVectorize) {
  426. MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
  427. if (OptLevel > 1 && ExtraVectorizerPasses) {
  428. MPM.add(createEarlyCSEPass());
  429. }
  430. }
  431. if (BBVectorize) {
  432. MPM.add(createBBVectorizePass());
  433. MPM.add(createInstructionCombiningPass());
  434. addExtensionsToPM(EP_Peephole, MPM);
  435. if (OptLevel > 1 && UseGVNAfterVectorization)
  436. MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
  437. else
  438. MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
  439. // BBVectorize may have significantly shortened a loop body; unroll again.
  440. if (!DisableUnrollLoops)
  441. MPM.add(createLoopUnrollPass());
  442. }
  443. }
  444. #endif // HLSL Change - don't build vectorization passes
  445. addExtensionsToPM(EP_Peephole, MPM);
  446. MPM.add(createCFGSimplificationPass());
  447. MPM.add(createInstructionCombiningPass());
  448. if (!DisableUnrollLoops) {
  449. MPM.add(createLoopUnrollPass()); // Unroll small loops
  450. // LoopUnroll may generate some redundency to cleanup.
  451. MPM.add(createInstructionCombiningPass());
  452. // Runtime unrolling will introduce runtime check in loop prologue. If the
  453. // unrolled loop is a inner loop, then the prologue will be inside the
  454. // outer loop. LICM pass can help to promote the runtime check out if the
  455. // checked value is loop invariant.
  456. MPM.add(createLICMPass());
  457. }
  458. // After vectorization and unrolling, assume intrinsics may tell us more
  459. // about pointer alignments.
  460. MPM.add(createAlignmentFromAssumptionsPass());
  461. if (!DisableUnitAtATime) {
  462. // FIXME: We shouldn't bother with this anymore.
  463. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
  464. // GlobalOpt already deletes dead functions and globals, at -O2 try a
  465. // late pass of GlobalDCE. It is capable of deleting dead cycles.
  466. if (OptLevel > 1) {
  467. if (!PrepareForLTO) {
  468. // Remove avail extern fns and globals definitions if we aren't
  469. // compiling an object file for later LTO. For LTO we want to preserve
  470. // these so they are eligible for inlining at link-time. Note if they
  471. // are unreferenced they will be removed by GlobalDCE below, so
  472. // this only impacts referenced available externally globals.
  473. // Eventually they will be suppressed during codegen, but eliminating
  474. // here enables more opportunity for GlobalDCE as it may make
  475. // globals referenced by available external functions dead.
  476. MPM.add(createEliminateAvailableExternallyPass());
  477. }
  478. MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
  479. MPM.add(createConstantMergePass()); // Merge dup global constants
  480. }
  481. }
  482. if (MergeFunctions)
  483. MPM.add(createMergeFunctionsPass());
  484. // HLSL Change Begins.
  485. if (!HLSLHighLevel) {
  486. MPM.add(createMultiDimArrayToOneDimArrayPass());
  487. MPM.add(createDxilCondenseResourcesPass());
  488. MPM.add(createDeadCodeEliminationPass());
  489. if (DisableUnrollLoops)
  490. MPM.add(createDxilLegalizeSampleOffsetPass());
  491. MPM.add(createDxilFinalizeModulePass());
  492. MPM.add(createComputeViewIdStatePass());
  493. MPM.add(createDxilDeadFunctionEliminationPass());
  494. MPM.add(createNoPausePassesPass());
  495. MPM.add(createDxilEmitMetadataPass());
  496. }
  497. // HLSL Change Ends.
  498. addExtensionsToPM(EP_OptimizerLast, MPM);
  499. }
  500. void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
  501. // Provide AliasAnalysis services for optimizations.
  502. addInitialAliasAnalysisPasses(PM);
  503. // Propagate constants at call sites into the functions they call. This
  504. // opens opportunities for globalopt (and inlining) by substituting function
  505. // pointers passed as arguments to direct uses of functions.
  506. PM.add(createIPSCCPPass());
  507. // Now that we internalized some globals, see if we can hack on them!
  508. PM.add(createGlobalOptimizerPass());
  509. // Linking modules together can lead to duplicated global constants, only
  510. // keep one copy of each constant.
  511. PM.add(createConstantMergePass());
  512. // Remove unused arguments from functions.
  513. PM.add(createDeadArgEliminationPass());
  514. // Reduce the code after globalopt and ipsccp. Both can open up significant
  515. // simplification opportunities, and both can propagate functions through
  516. // function pointers. When this happens, we often have to resolve varargs
  517. // calls, etc, so let instcombine do this.
  518. PM.add(createInstructionCombiningPass());
  519. addExtensionsToPM(EP_Peephole, PM);
  520. // Inline small functions
  521. bool RunInliner = Inliner;
  522. if (RunInliner) {
  523. PM.add(Inliner);
  524. Inliner = nullptr;
  525. }
  526. PM.add(createPruneEHPass()); // Remove dead EH info.
  527. // Optimize globals again if we ran the inliner.
  528. if (RunInliner)
  529. PM.add(createGlobalOptimizerPass());
  530. PM.add(createGlobalDCEPass()); // Remove dead functions.
  531. // If we didn't decide to inline a function, check to see if we can
  532. // transform it to pass arguments by value instead of by reference.
  533. PM.add(createArgumentPromotionPass());
  534. // The IPO passes may leave cruft around. Clean up after them.
  535. PM.add(createInstructionCombiningPass());
  536. addExtensionsToPM(EP_Peephole, PM);
  537. // HLSL Change. PM.add(createJumpThreadingPass());
  538. // Break up allocas
  539. if (UseNewSROA)
  540. PM.add(createSROAPass());
  541. else
  542. PM.add(createScalarReplAggregatesPass());
  543. // Run a few AA driven optimizations here and now, to cleanup the code.
  544. PM.add(createFunctionAttrsPass()); // Add nocapture.
  545. PM.add(createGlobalsModRefPass()); // IP alias analysis.
  546. PM.add(createLICMPass()); // Hoist loop invariants.
  547. if (EnableMLSM)
  548. PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
  549. PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
  550. PM.add(createMemCpyOptPass()); // Remove dead memcpys.
  551. // Nuke dead stores.
  552. PM.add(createDeadStoreEliminationPass());
  553. // More loops are countable; try to optimize them.
  554. PM.add(createIndVarSimplifyPass());
  555. PM.add(createLoopDeletionPass());
  556. if (EnableLoopInterchange)
  557. PM.add(createLoopInterchangePass());
  558. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  559. PM.add(createLoopVectorizePass(true, LoopVectorize));
  560. // More scalar chains could be vectorized due to more alias information
  561. if (RunSLPAfterLoopVectorization)
  562. if (SLPVectorize)
  563. PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
  564. // After vectorization, assume intrinsics may tell us more about pointer
  565. // alignments.
  566. PM.add(createAlignmentFromAssumptionsPass());
  567. #endif
  568. if (LoadCombine)
  569. PM.add(createLoadCombinePass());
  570. // Cleanup and simplify the code after the scalar optimizations.
  571. PM.add(createInstructionCombiningPass());
  572. addExtensionsToPM(EP_Peephole, PM);
  573. // HLSL Change. PM.add(createJumpThreadingPass());
  574. }
  575. void PassManagerBuilder::addLateLTOOptimizationPasses(
  576. legacy::PassManagerBase &PM) {
  577. // Delete basic blocks, which optimization passes may have killed.
  578. PM.add(createCFGSimplificationPass());
  579. // Now that we have optimized the program, discard unreachable functions.
  580. PM.add(createGlobalDCEPass());
  581. // FIXME: this is profitable (for compiler time) to do at -O0 too, but
  582. // currently it damages debug info.
  583. if (MergeFunctions)
  584. PM.add(createMergeFunctionsPass());
  585. }
  586. void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
  587. if (LibraryInfo)
  588. PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
  589. if (VerifyInput)
  590. PM.add(createVerifierPass());
  591. if (OptLevel > 1)
  592. addLTOOptimizationPasses(PM);
  593. // Lower bit sets to globals. This pass supports Clang's control flow
  594. // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
  595. // is enabled. The pass does nothing if CFI is disabled.
  596. PM.add(createLowerBitSetsPass());
  597. if (OptLevel != 0)
  598. addLateLTOOptimizationPasses(PM);
  599. if (VerifyOutput)
  600. PM.add(createVerifierPass());
  601. }
  602. inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
  603. return reinterpret_cast<PassManagerBuilder*>(P);
  604. }
  605. inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
  606. return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
  607. }
  608. LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
  609. PassManagerBuilder *PMB = new PassManagerBuilder();
  610. return wrap(PMB);
  611. }
  612. void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
  613. PassManagerBuilder *Builder = unwrap(PMB);
  614. delete Builder;
  615. }
  616. void
  617. LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
  618. unsigned OptLevel) {
  619. PassManagerBuilder *Builder = unwrap(PMB);
  620. Builder->OptLevel = OptLevel;
  621. }
  622. void
  623. LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
  624. unsigned SizeLevel) {
  625. PassManagerBuilder *Builder = unwrap(PMB);
  626. Builder->SizeLevel = SizeLevel;
  627. }
  628. void
  629. LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
  630. LLVMBool Value) {
  631. PassManagerBuilder *Builder = unwrap(PMB);
  632. Builder->DisableUnitAtATime = Value;
  633. }
  634. void
  635. LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
  636. LLVMBool Value) {
  637. PassManagerBuilder *Builder = unwrap(PMB);
  638. Builder->DisableUnrollLoops = Value;
  639. }
  640. void
  641. LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
  642. LLVMBool Value) {
  643. // NOTE: The simplify-libcalls pass has been removed.
  644. }
  645. void
  646. LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
  647. unsigned Threshold) {
  648. PassManagerBuilder *Builder = unwrap(PMB);
  649. Builder->Inliner = createFunctionInliningPass(Threshold);
  650. }
  651. void
  652. LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
  653. LLVMPassManagerRef PM) {
  654. PassManagerBuilder *Builder = unwrap(PMB);
  655. legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
  656. Builder->populateFunctionPassManager(*FPM);
  657. }
  658. void
  659. LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
  660. LLVMPassManagerRef PM) {
  661. PassManagerBuilder *Builder = unwrap(PMB);
  662. legacy::PassManagerBase *MPM = unwrap(PM);
  663. Builder->populateModulePassManager(*MPM);
  664. }
  665. void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
  666. LLVMPassManagerRef PM,
  667. LLVMBool Internalize,
  668. LLVMBool RunInliner) {
  669. PassManagerBuilder *Builder = unwrap(PMB);
  670. legacy::PassManagerBase *LPM = unwrap(PM);
  671. // A small backwards compatibility hack. populateLTOPassManager used to take
  672. // an RunInliner option.
  673. if (RunInliner && !Builder->Inliner)
  674. Builder->Inliner = createFunctionInliningPass();
  675. Builder->populateLTOPassManager(*LPM);
  676. }