PassManagerBuilder.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920
  1. //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines the PassManagerBuilder class, which is used to set up a
  11. // "standard" optimization sequence suitable for languages like C and C++.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/IPO/PassManagerBuilder.h"
  15. #include "llvm-c/Transforms/PassManagerBuilder.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/Analysis/Passes.h"
  18. #include "llvm/IR/DataLayout.h"
  19. #include "llvm/IR/Verifier.h"
  20. #include "llvm/IR/LegacyPassManager.h"
  21. #include "llvm/Support/CommandLine.h"
  22. #include "llvm/Support/ManagedStatic.h"
  23. #include "llvm/Analysis/TargetLibraryInfo.h"
  24. #include "llvm/Target/TargetMachine.h"
  25. #include "llvm/Transforms/IPO.h"
  26. #include "llvm/Transforms/Scalar.h"
  27. #include "llvm/Transforms/Vectorize.h"
  28. #include "dxc/HLSL/DxilGenerationPass.h" // HLSL Change
  29. #include "dxc/HLSL/HLMatrixLowerPass.h" // HLSL Change
  30. #include "dxc/HLSL/ComputeViewIdState.h" // HLSL Change
  31. #include "llvm/Analysis/DxilValueCache.h" // HLSL Change
  32. using namespace llvm;
  33. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  34. static cl::opt<bool>
  35. RunLoopVectorization("vectorize-loops", cl::Hidden,
  36. cl::desc("Run the Loop vectorization passes"));
  37. static cl::opt<bool>
  38. RunSLPVectorization("vectorize-slp", cl::Hidden,
  39. cl::desc("Run the SLP vectorization passes"));
  40. static cl::opt<bool>
  41. RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
  42. cl::desc("Run the BB vectorization passes"));
  43. static cl::opt<bool>
  44. UseGVNAfterVectorization("use-gvn-after-vectorization",
  45. cl::init(false), cl::Hidden,
  46. cl::desc("Run GVN instead of Early CSE after vectorization passes"));
  47. static cl::opt<bool> ExtraVectorizerPasses(
  48. "extra-vectorizer-passes", cl::init(false), cl::Hidden,
  49. cl::desc("Run cleanup optimization passes after vectorization."));
  50. static cl::opt<bool> UseNewSROA("use-new-sroa",
  51. cl::init(true), cl::Hidden,
  52. cl::desc("Enable the new, experimental SROA pass"));
  53. static cl::opt<bool>
  54. RunLoopRerolling("reroll-loops", cl::Hidden,
  55. cl::desc("Run the loop rerolling pass"));
  56. static cl::opt<bool>
  57. RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
  58. cl::desc("Run the float2int (float demotion) pass"));
  59. static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
  60. cl::Hidden,
  61. cl::desc("Run the load combining pass"));
  62. static cl::opt<bool>
  63. RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
  64. cl::init(true), cl::Hidden,
  65. cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
  66. "vectorizer instead of before"));
  67. static cl::opt<bool> UseCFLAA("use-cfl-aa",
  68. cl::init(false), cl::Hidden,
  69. cl::desc("Enable the new, experimental CFL alias analysis"));
  70. static cl::opt<bool>
  71. EnableMLSM("mlsm", cl::init(true), cl::Hidden,
  72. cl::desc("Enable motion of merged load and store"));
  73. static cl::opt<bool> EnableLoopInterchange(
  74. "enable-loopinterchange", cl::init(false), cl::Hidden,
  75. cl::desc("Enable the new, experimental LoopInterchange Pass"));
  76. static cl::opt<bool> EnableLoopDistribute(
  77. "enable-loop-distribute", cl::init(false), cl::Hidden,
  78. cl::desc("Enable the new, experimental LoopDistribution Pass"));
  79. #else
  80. // Don't declare the 'false' counterparts - simply avoid altogether.
  81. static const bool UseNewSROA = true;
  82. static const bool RunLoopRerolling = false;
  83. static const bool RunFloat2Int = true;
  84. static const bool RunLoadCombine = false;
  85. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  86. static const bool RunSLPAfterLoopVectorization = true;
  87. #endif // HLSL Change
  88. static const bool UseCFLAA = false;
  89. static const bool EnableMLSM = true;
  90. static const bool EnableLoopInterchange = false;
  91. static const bool EnableLoopDistribute = false;
  92. #endif // HLSL Change - don't build vectorization passes
  93. PassManagerBuilder::PassManagerBuilder() {
  94. OptLevel = 2;
  95. SizeLevel = 0;
  96. LibraryInfo = nullptr;
  97. Inliner = nullptr;
  98. DisableUnitAtATime = false;
  99. DisableUnrollLoops = false;
  100. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  101. BBVectorize = RunBBVectorization;
  102. SLPVectorize = RunSLPVectorization;
  103. LoopVectorize = RunLoopVectorization;
  104. #else
  105. BBVectorize = SLPVectorize = LoopVectorize = false;
  106. #endif
  107. RerollLoops = RunLoopRerolling;
  108. LoadCombine = RunLoadCombine;
  109. DisableGVNLoadPRE = false;
  110. VerifyInput = false;
  111. VerifyOutput = false;
  112. MergeFunctions = false;
  113. PrepareForLTO = false;
  114. }
  115. PassManagerBuilder::~PassManagerBuilder() {
  116. delete LibraryInfo;
  117. delete Inliner;
  118. }
  119. #if 0 // HLSL Change Starts - no global extensions
  120. /// Set of global extensions, automatically added as part of the standard set.
  121. static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
  122. PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
  123. #endif // HLSL Change Ends
  124. #if 0 // HLSL Change Starts - no global extensions
  125. void PassManagerBuilder::addGlobalExtension(
  126. PassManagerBuilder::ExtensionPointTy Ty,
  127. PassManagerBuilder::ExtensionFn Fn) {
  128. GlobalExtensions->push_back(std::make_pair(Ty, Fn));
  129. }
  130. #endif // HLSL Change Ends
  131. void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
  132. Extensions.push_back(std::make_pair(Ty, Fn));
  133. }
  134. void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
  135. legacy::PassManagerBase &PM) const {
  136. #if 0 // HLSL Change Starts - no global extensions
  137. for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
  138. if ((*GlobalExtensions)[i].first == ETy)
  139. (*GlobalExtensions)[i].second(*this, PM);
  140. for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
  141. if (Extensions[i].first == ETy)
  142. Extensions[i].second(*this, PM);
  143. #endif // HLSL Change Ends
  144. }
  145. void PassManagerBuilder::addInitialAliasAnalysisPasses(
  146. legacy::PassManagerBase &PM) const {
  147. // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
  148. // BasicAliasAnalysis wins if they disagree. This is intended to help
  149. // support "obvious" type-punning idioms.
  150. if (UseCFLAA)
  151. PM.add(createCFLAliasAnalysisPass());
  152. PM.add(createTypeBasedAliasAnalysisPass());
  153. PM.add(createScopedNoAliasAAPass());
  154. PM.add(createBasicAliasAnalysisPass());
  155. }
  156. void PassManagerBuilder::populateFunctionPassManager(
  157. legacy::FunctionPassManager &FPM) {
  158. addExtensionsToPM(EP_EarlyAsPossible, FPM);
  159. // Add LibraryInfo if we have some.
  160. if (LibraryInfo)
  161. FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
  162. if (OptLevel == 0) return;
  163. addInitialAliasAnalysisPasses(FPM);
  164. FPM.add(createCFGSimplificationPass());
  165. // HLSL Change - don't run SROA.
  166. // HLSL uses special SROA added in addHLSLPasses.
  167. if (HLSLHighLevel) { // HLSL Change
  168. if (UseNewSROA)
  169. FPM.add(createSROAPass());
  170. else
  171. FPM.add(createScalarReplAggregatesPass());
  172. }
  173. // HLSL Change. FPM.add(createEarlyCSEPass());
  174. FPM.add(createLowerExpectIntrinsicPass());
  175. }
  176. // HLSL Change Starts
  177. static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOnUnrollFail, bool StructurizeLoopExitsForUnroll, bool EnableLifetimeMarkers, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
  178. // Don't do any lowering if we're targeting high-level.
  179. if (HLSLHighLevel) {
  180. MPM.add(createHLEmitMetadataPass());
  181. return;
  182. }
  183. MPM.add(createDxilCleanupAddrSpaceCastPass());
  184. MPM.add(createHLPreprocessPass());
  185. bool NoOpt = OptLevel == 0;
  186. if (!NoOpt) {
  187. MPM.add(createHLDeadFunctionEliminationPass());
  188. }
  189. // Expand buffer store intrinsics before we SROA
  190. MPM.add(createHLExpandStoreIntrinsicsPass());
  191. // Split struct and array of parameter.
  192. MPM.add(createSROA_Parameter_HLSL());
  193. MPM.add(createHLMatrixLowerPass());
  194. // DCE should after SROA to remove unused element.
  195. MPM.add(createDeadCodeEliminationPass());
  196. MPM.add(createGlobalDCEPass());
  197. if (NoOpt) {
  198. // If not run mem2reg, try to promote allocas used by EvalOperations.
  199. // Do this before change vector to array.
  200. MPM.add(createDxilLegalizeEvalOperationsPass());
  201. }
  202. // This should go between matrix lower and dynamic indexing vector to array,
  203. // because matrix lower may create dynamically indexed global vectors,
  204. // which should become locals. If they are turned into arrays first,
  205. // this pass will ignore them as it only works on scalars and vectors.
  206. MPM.add(createLowerStaticGlobalIntoAlloca());
  207. // Change dynamic indexing vector to array.
  208. MPM.add(createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */));
  209. // Rotate the loops before, mem2reg, since it messes up dbg.value's
  210. MPM.add(createLoopRotatePass());
  211. // mem2reg
  212. // Special Mem2Reg pass that skips precise marker.
  213. MPM.add(createDxilConditionalMem2RegPass(NoOpt));
  214. // Clean up inefficiencies that can cause unnecessary live values related to
  215. // lifetime marker cleanup blocks. This is the earliest possible location
  216. // without interfering with HLSL-specific lowering.
  217. if (!NoOpt && EnableLifetimeMarkers) {
  218. MPM.add(createSROAPass());
  219. MPM.add(createJumpThreadingPass());
  220. }
  221. // Remove unneeded dxbreak conditionals
  222. MPM.add(createCleanupDxBreakPass());
  223. if (!NoOpt) {
  224. MPM.add(createDxilConvergentMarkPass());
  225. }
  226. if (!NoOpt)
  227. MPM.add(createSimplifyInstPass());
  228. if (!NoOpt)
  229. MPM.add(createCFGSimplificationPass());
  230. MPM.add(createDxilPromoteLocalResources());
  231. MPM.add(createDxilPromoteStaticResources());
  232. // Verify no undef resource again after promotion
  233. MPM.add(createInvalidateUndefResourcesPass());
  234. MPM.add(createDxilGenerationPass(NoOpt, ExtHelper));
  235. // Propagate precise attribute.
  236. MPM.add(createDxilPrecisePropagatePass());
  237. if (!NoOpt)
  238. MPM.add(createSimplifyInstPass());
  239. // scalarize vector to scalar
  240. MPM.add(createScalarizerPass(!NoOpt /* AllowFolding */));
  241. // Remove vector instructions
  242. MPM.add(createDxilEliminateVectorPass());
  243. // Passes to handle [unroll]
  244. // Needs to happen after SROA since loop count may depend on
  245. // struct members.
  246. // Needs to happen before resources are lowered and before HL
  247. // module is gone.
  248. MPM.add(createDxilLoopUnrollPass(1024, OnlyWarnOnUnrollFail, StructurizeLoopExitsForUnroll));
  249. // Default unroll pass. This is purely for optimizing loops without
  250. // attributes.
  251. if (OptLevel > 2) {
  252. MPM.add(createLoopUnrollPass(-1, -1, -1, -1, StructurizeLoopExitsForUnroll));
  253. }
  254. if (!NoOpt)
  255. MPM.add(createSimplifyInstPass());
  256. if (!NoOpt)
  257. MPM.add(createCFGSimplificationPass());
  258. MPM.add(createDeadCodeEliminationPass());
  259. if (OptLevel > 0) {
  260. MPM.add(createDxilFixConstArrayInitializerPass());
  261. }
  262. }
  263. // HLSL Change Ends
  264. void PassManagerBuilder::populateModulePassManager(
  265. legacy::PassManagerBase &MPM) {
  266. // If all optimizations are disabled, just run the always-inline pass and,
  267. // if enabled, the function merging pass.
  268. if (OptLevel == 0) {
  269. if (!HLSLHighLevel) {
  270. MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
  271. }
  272. MPM.add(createDxilRewriteOutputArgDebugInfoPass()); // Fix output argument types.
  273. if (!HLSLHighLevel)
  274. if (HLSLEnableDebugNops) MPM.add(createDxilInsertPreservesPass(HLSLAllowPreserveValues)); // HLSL Change - insert preserve instructions
  275. if (Inliner) {
  276. MPM.add(createHLLegalizeParameter()); // HLSL Change - legalize parameters
  277. // before inline.
  278. MPM.add(Inliner);
  279. Inliner = nullptr;
  280. }
  281. // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
  282. // creates a CGSCC pass manager, but we don't want to add extensions into
  283. // that pass manager. To prevent this we insert a no-op module pass to reset
  284. // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
  285. // builds. The function merging pass is
  286. if (MergeFunctions)
  287. MPM.add(createMergeFunctionsPass());
  288. else if (!Extensions.empty()) // HLSL Change - GlobalExtensions not considered
  289. MPM.add(createBarrierNoopPass());
  290. if (!HLSLHighLevel)
  291. MPM.add(createDxilPreserveToSelectPass()); // HLSL Change - lower preserve instructions to selects
  292. addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
  293. // HLSL Change Begins.
  294. addHLSLPasses(HLSLHighLevel, OptLevel,
  295. this->HLSLOnlyWarnOnUnrollFail,
  296. this->StructurizeLoopExitsForUnroll,
  297. this->HLSLEnableLifetimeMarkers,
  298. this->HLSLExtensionsCodeGen,
  299. MPM);
  300. if (!HLSLHighLevel) {
  301. MPM.add(createDxilConvergentClearPass());
  302. MPM.add(createDxilRemoveDeadBlocksPass());
  303. MPM.add(createDxilNoOptSimplifyInstructionsPass());
  304. MPM.add(createGlobalOptimizerPass());
  305. MPM.add(createMultiDimArrayToOneDimArrayPass());
  306. MPM.add(createDeadCodeEliminationPass());
  307. MPM.add(createGlobalDCEPass());
  308. MPM.add(createDxilMutateResourceToHandlePass());
  309. MPM.add(createDxilLowerCreateHandleForLibPass());
  310. MPM.add(createDxilTranslateRawBuffer());
  311. MPM.add(createDxilLegalizeSampleOffsetPass());
  312. MPM.add(createDxilNoOptLegalizePass());
  313. MPM.add(createDxilFinalizePreservesPass());
  314. MPM.add(createDxilFinalizeModulePass());
  315. MPM.add(createComputeViewIdStatePass());
  316. MPM.add(createDxilDeadFunctionEliminationPass());
  317. MPM.add(createNoPausePassesPass());
  318. MPM.add(createDxilEmitMetadataPass());
  319. }
  320. // HLSL Change Ends.
  321. return;
  322. }
  323. if (!HLSLHighLevel) {
  324. MPM.add(createHLEnsureMetadataPass()); // HLSL Change - rehydrate metadata from high-level codegen
  325. }
  326. // HLSL Change Begins
  327. MPM.add(createDxilRewriteOutputArgDebugInfoPass()); // Fix output argument types.
  328. MPM.add(createHLLegalizeParameter()); // legalize parameters before inline.
  329. MPM.add(createAlwaysInlinerPass(/*InsertLifeTime*/this->HLSLEnableLifetimeMarkers));
  330. if (Inliner) {
  331. delete Inliner;
  332. Inliner = nullptr;
  333. }
  334. addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, this->StructurizeLoopExitsForUnroll, this->HLSLEnableLifetimeMarkers, HLSLExtensionsCodeGen, MPM); // HLSL Change
  335. // HLSL Change Ends
  336. // Add LibraryInfo if we have some.
  337. if (LibraryInfo)
  338. MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
  339. addInitialAliasAnalysisPasses(MPM);
  340. if (!DisableUnitAtATime) {
  341. addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
  342. MPM.add(createIPSCCPPass()); // IP SCCP
  343. MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
  344. MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
  345. MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
  346. addExtensionsToPM(EP_Peephole, MPM);
  347. MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
  348. }
  349. // Start of CallGraph SCC passes.
  350. if (!DisableUnitAtATime)
  351. MPM.add(createPruneEHPass()); // Remove dead EH info
  352. if (Inliner) {
  353. MPM.add(Inliner);
  354. Inliner = nullptr;
  355. }
  356. if (!DisableUnitAtATime)
  357. MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
  358. if (OptLevel > 2)
  359. MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
  360. // Start of function pass.
  361. // Break up aggregate allocas, using SSAUpdater.
  362. if (UseNewSROA)
  363. MPM.add(createSROAPass(/*RequiresDomTree*/ false));
  364. else
  365. MPM.add(createScalarReplAggregatesPass(-1, false));
  366. // HLSL Change. MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
  367. // HLSL Change. MPM.add(createJumpThreadingPass()); // Thread jumps.
  368. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
  369. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
  370. MPM.add(createInstructionCombiningPass()); // Combine silly seq's
  371. addExtensionsToPM(EP_Peephole, MPM);
  372. // HLSL Change Begins.
  373. // HLSL does not allow recursize functions.
  374. //MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
  375. // HLSL Change Ends.
  376. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
  377. MPM.add(createReassociatePass()); // Reassociate expressions
  378. // Rotate Loop - disable header duplication at -Oz
  379. MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
  380. // HLSL Change - disable LICM in frontend for not consider register pressure.
  381. //MPM.add(createLICMPass()); // Hoist loop invariants
  382. //MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); // HLSL Change - may move barrier inside divergent if.
  383. MPM.add(createInstructionCombiningPass());
  384. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
  385. // HLSL Change Begins
  386. // Don't allow loop idiom pass which may insert memset/memcpy thereby breaking the dxil
  387. //MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
  388. // HLSL Change Ends
  389. MPM.add(createLoopDeletionPass()); // Delete dead loops
  390. if (EnableLoopInterchange) {
  391. MPM.add(createLoopInterchangePass()); // Interchange loops
  392. MPM.add(createCFGSimplificationPass());
  393. }
  394. if (!DisableUnrollLoops)
  395. MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
  396. addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
  397. if (OptLevel > 1) {
  398. if (EnableMLSM)
  399. MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
  400. // HLSL Change Begins
  401. if (EnableGVN) {
  402. MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
  403. if (!HLSLResMayAlias)
  404. MPM.add(createDxilSimpleGVNHoistPass());
  405. }
  406. // HLSL Change Ends
  407. }
  408. // HLSL Change Begins.
  409. // HLSL don't allow memcpy and memset.
  410. //MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
  411. // HLSL Change Ends.
  412. MPM.add(createSCCPPass()); // Constant prop with SCCP
  413. // Delete dead bit computations (instcombine runs after to fold away the dead
  414. // computations, and then ADCE will run later to exploit any new DCE
  415. // opportunities that creates).
  416. MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
  417. // Run instcombine after redundancy elimination to exploit opportunities
  418. // opened up by them.
  419. MPM.add(createInstructionCombiningPass());
  420. addExtensionsToPM(EP_Peephole, MPM);
  421. // HLSL Change. MPM.add(createJumpThreadingPass()); // Thread jumps
  422. MPM.add(createCorrelatedValuePropagationPass());
  423. MPM.add(createDeadStoreEliminationPass(ScanLimit)); // Delete dead stores
  424. // HLSL Change - disable LICM in frontend for not consider register pressure.
  425. // MPM.add(createLICMPass());
  426. addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
  427. if (RerollLoops)
  428. MPM.add(createLoopRerollPass());
  429. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  430. if (!RunSLPAfterLoopVectorization) {
  431. if (SLPVectorize)
  432. MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
  433. if (BBVectorize) {
  434. MPM.add(createBBVectorizePass());
  435. MPM.add(createInstructionCombiningPass());
  436. addExtensionsToPM(EP_Peephole, MPM);
  437. if (OptLevel > 1 && UseGVNAfterVectorization)
  438. MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
  439. else
  440. MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
  441. // BBVectorize may have significantly shortened a loop body; unroll again.
  442. if (!DisableUnrollLoops)
  443. MPM.add(createLoopUnrollPass());
  444. }
  445. }
  446. #endif
  447. if (LoadCombine)
  448. MPM.add(createLoadCombinePass());
  449. MPM.add(createHoistConstantArrayPass()); // HLSL change
  450. MPM.add(createAggressiveDCEPass()); // Delete dead instructions
  451. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
  452. MPM.add(createInstructionCombiningPass()); // Clean up after everything.
  453. addExtensionsToPM(EP_Peephole, MPM);
  454. // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
  455. // pass manager that we are specifically trying to avoid. To prevent this
  456. // we must insert a no-op module pass to reset the pass manager.
  457. MPM.add(createBarrierNoopPass());
  458. if (RunFloat2Int)
  459. MPM.add(createFloat2IntPass());
  460. // Re-rotate loops in all our loop nests. These may have fallout out of
  461. // rotated form due to GVN or other transformations, and the vectorizer relies
  462. // on the rotated form. Disable header duplication at -Oz.
  463. MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
  464. // Distribute loops to allow partial vectorization. I.e. isolate dependences
  465. // into separate loop that would otherwise inhibit vectorization.
  466. if (EnableLoopDistribute)
  467. MPM.add(createLoopDistributePass());
  468. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  469. MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
  470. #endif
  471. // FIXME: Because of #pragma vectorize enable, the passes below are always
  472. // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
  473. // on -O1 and no #pragma is found). Would be good to have these two passes
  474. // as function calls, so that we can only pass them when the vectorizer
  475. // changed the code.
  476. MPM.add(createInstructionCombiningPass());
  477. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  478. if (OptLevel > 1 && ExtraVectorizerPasses) {
  479. // At higher optimization levels, try to clean up any runtime overlap and
  480. // alignment checks inserted by the vectorizer. We want to track correllated
  481. // runtime checks for two inner loops in the same outer loop, fold any
  482. // common computations, hoist loop-invariant aspects out of any outer loop,
  483. // and unswitch the runtime checks if possible. Once hoisted, we may have
  484. // dead (or speculatable) control flows or more combining opportunities.
  485. MPM.add(createEarlyCSEPass());
  486. MPM.add(createCorrelatedValuePropagationPass());
  487. MPM.add(createInstructionCombiningPass());
  488. MPM.add(createLICMPass());
  489. MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
  490. MPM.add(createCFGSimplificationPass());
  491. MPM.add(createInstructionCombiningPass());
  492. }
  493. if (RunSLPAfterLoopVectorization) {
  494. if (SLPVectorize) {
  495. MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
  496. if (OptLevel > 1 && ExtraVectorizerPasses) {
  497. MPM.add(createEarlyCSEPass());
  498. }
  499. }
  500. if (BBVectorize) {
  501. MPM.add(createBBVectorizePass());
  502. MPM.add(createInstructionCombiningPass());
  503. addExtensionsToPM(EP_Peephole, MPM);
  504. if (OptLevel > 1 && UseGVNAfterVectorization)
  505. MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
  506. else
  507. MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
  508. // BBVectorize may have significantly shortened a loop body; unroll again.
  509. if (!DisableUnrollLoops)
  510. MPM.add(createLoopUnrollPass());
  511. }
  512. }
  513. #endif // HLSL Change - don't build vectorization passes
  514. addExtensionsToPM(EP_Peephole, MPM);
  515. MPM.add(createCFGSimplificationPass());
  516. MPM.add(createDxilLoopDeletionPass()); // HLSL Change - try to delete loop again.
  517. MPM.add(createInstructionCombiningPass());
  518. if (!DisableUnrollLoops) {
  519. MPM.add(createLoopUnrollPass(/* HLSL Change begin */-1, -1, -1, -1, this->StructurizeLoopExitsForUnroll /* HLSL Change end */)); // Unroll small loops
  520. // LoopUnroll may generate some redundency to cleanup.
  521. MPM.add(createInstructionCombiningPass());
  522. // Runtime unrolling will introduce runtime check in loop prologue. If the
  523. // unrolled loop is a inner loop, then the prologue will be inside the
  524. // outer loop. LICM pass can help to promote the runtime check out if the
  525. // checked value is loop invariant.
  526. // MPM.add(createLICMPass());// HLSL Change - disable LICM in frontend for
  527. // not consider register pressure.
  528. }
  529. // After vectorization and unrolling, assume intrinsics may tell us more
  530. // about pointer alignments.
  531. MPM.add(createAlignmentFromAssumptionsPass());
  532. if (!DisableUnitAtATime) {
  533. // FIXME: We shouldn't bother with this anymore.
  534. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
  535. // GlobalOpt already deletes dead functions and globals, at -O2 try a
  536. // late pass of GlobalDCE. It is capable of deleting dead cycles.
  537. if (OptLevel > 1) {
  538. if (!PrepareForLTO) {
  539. // Remove avail extern fns and globals definitions if we aren't
  540. // compiling an object file for later LTO. For LTO we want to preserve
  541. // these so they are eligible for inlining at link-time. Note if they
  542. // are unreferenced they will be removed by GlobalDCE below, so
  543. // this only impacts referenced available externally globals.
  544. // Eventually they will be suppressed during codegen, but eliminating
  545. // here enables more opportunity for GlobalDCE as it may make
  546. // globals referenced by available external functions dead.
  547. MPM.add(createEliminateAvailableExternallyPass());
  548. }
  549. MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
  550. MPM.add(createConstantMergePass()); // Merge dup global constants
  551. }
  552. }
  553. if (MergeFunctions)
  554. MPM.add(createMergeFunctionsPass());
  555. // HLSL Change Begins.
  556. if (!HLSLHighLevel) {
  557. if (OptLevel > 0)
  558. MPM.add(createDxilEraseDeadRegionPass());
  559. MPM.add(createDxilConvergentClearPass());
  560. MPM.add(createDeadCodeEliminationPass()); // DCE needed after clearing convergence
  561. // annotations before CreateHandleForLib
  562. // so no unused resources get re-added to
  563. // DxilModule.
  564. MPM.add(createMultiDimArrayToOneDimArrayPass());
  565. MPM.add(createDxilRemoveDeadBlocksPass());
  566. MPM.add(createDeadCodeEliminationPass());
  567. MPM.add(createGlobalDCEPass());
  568. MPM.add(createDxilMutateResourceToHandlePass());
  569. MPM.add(createDxilLowerCreateHandleForLibPass());
  570. MPM.add(createDxilTranslateRawBuffer());
  571. // Always try to legalize sample offsets as loop unrolling
  572. // is not guaranteed for higher opt levels.
  573. MPM.add(createDxilLegalizeSampleOffsetPass());
  574. MPM.add(createDxilFinalizeModulePass());
  575. MPM.add(createComputeViewIdStatePass());
  576. MPM.add(createDxilDeadFunctionEliminationPass());
  577. MPM.add(createNoPausePassesPass());
  578. MPM.add(createDxilValidateWaveSensitivityPass());
  579. MPM.add(createDxilEmitMetadataPass());
  580. }
  581. // HLSL Change Ends.
  582. addExtensionsToPM(EP_OptimizerLast, MPM);
  583. }
  584. void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
  585. // Provide AliasAnalysis services for optimizations.
  586. addInitialAliasAnalysisPasses(PM);
  587. // Propagate constants at call sites into the functions they call. This
  588. // opens opportunities for globalopt (and inlining) by substituting function
  589. // pointers passed as arguments to direct uses of functions.
  590. PM.add(createIPSCCPPass());
  591. // Now that we internalized some globals, see if we can hack on them!
  592. PM.add(createGlobalOptimizerPass());
  593. // Linking modules together can lead to duplicated global constants, only
  594. // keep one copy of each constant.
  595. PM.add(createConstantMergePass());
  596. // Remove unused arguments from functions.
  597. PM.add(createDeadArgEliminationPass());
  598. // Reduce the code after globalopt and ipsccp. Both can open up significant
  599. // simplification opportunities, and both can propagate functions through
  600. // function pointers. When this happens, we often have to resolve varargs
  601. // calls, etc, so let instcombine do this.
  602. PM.add(createInstructionCombiningPass());
  603. addExtensionsToPM(EP_Peephole, PM);
  604. // Inline small functions
  605. bool RunInliner = Inliner;
  606. if (RunInliner) {
  607. PM.add(Inliner);
  608. Inliner = nullptr;
  609. }
  610. PM.add(createPruneEHPass()); // Remove dead EH info.
  611. // Optimize globals again if we ran the inliner.
  612. if (RunInliner)
  613. PM.add(createGlobalOptimizerPass());
  614. PM.add(createGlobalDCEPass()); // Remove dead functions.
  615. // If we didn't decide to inline a function, check to see if we can
  616. // transform it to pass arguments by value instead of by reference.
  617. PM.add(createArgumentPromotionPass());
  618. // The IPO passes may leave cruft around. Clean up after them.
  619. PM.add(createInstructionCombiningPass());
  620. addExtensionsToPM(EP_Peephole, PM);
  621. // HLSL Change. PM.add(createJumpThreadingPass());
  622. // Break up allocas
  623. if (UseNewSROA)
  624. PM.add(createSROAPass());
  625. else
  626. PM.add(createScalarReplAggregatesPass());
  627. // Run a few AA driven optimizations here and now, to cleanup the code.
  628. PM.add(createFunctionAttrsPass()); // Add nocapture.
  629. PM.add(createGlobalsModRefPass()); // IP alias analysis.
  630. // HLSL Change - disable LICM in frontend for not consider register pressure.
  631. // PM.add(createLICMPass()); // Hoist loop invariants.
  632. if (EnableMLSM)
  633. PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
  634. if (EnableGVN) // HLSL Change
  635. PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
  636. PM.add(createMemCpyOptPass()); // Remove dead memcpys.
  637. // Nuke dead stores.
  638. PM.add(createDeadStoreEliminationPass(ScanLimit)); // HLSL Change - add ScanLimit
  639. // More loops are countable; try to optimize them.
  640. PM.add(createIndVarSimplifyPass());
  641. PM.add(createLoopDeletionPass());
  642. if (EnableLoopInterchange)
  643. PM.add(createLoopInterchangePass());
  644. #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
  645. PM.add(createLoopVectorizePass(true, LoopVectorize));
  646. // More scalar chains could be vectorized due to more alias information
  647. if (RunSLPAfterLoopVectorization)
  648. if (SLPVectorize)
  649. PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
  650. // After vectorization, assume intrinsics may tell us more about pointer
  651. // alignments.
  652. PM.add(createAlignmentFromAssumptionsPass());
  653. #endif
  654. if (LoadCombine)
  655. PM.add(createLoadCombinePass());
  656. // Cleanup and simplify the code after the scalar optimizations.
  657. PM.add(createInstructionCombiningPass());
  658. addExtensionsToPM(EP_Peephole, PM);
  659. // HLSL Change. PM.add(createJumpThreadingPass());
  660. }
  661. void PassManagerBuilder::addLateLTOOptimizationPasses(
  662. legacy::PassManagerBase &PM) {
  663. // Delete basic blocks, which optimization passes may have killed.
  664. PM.add(createCFGSimplificationPass());
  665. // Now that we have optimized the program, discard unreachable functions.
  666. PM.add(createGlobalDCEPass());
  667. // FIXME: this is profitable (for compiler time) to do at -O0 too, but
  668. // currently it damages debug info.
  669. if (MergeFunctions)
  670. PM.add(createMergeFunctionsPass());
  671. }
  672. void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
  673. if (LibraryInfo)
  674. PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
  675. if (VerifyInput)
  676. PM.add(createVerifierPass());
  677. if (OptLevel > 1)
  678. addLTOOptimizationPasses(PM);
  679. // Lower bit sets to globals. This pass supports Clang's control flow
  680. // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
  681. // is enabled. The pass does nothing if CFI is disabled.
  682. PM.add(createLowerBitSetsPass());
  683. if (OptLevel != 0)
  684. addLateLTOOptimizationPasses(PM);
  685. if (VerifyOutput)
  686. PM.add(createVerifierPass());
  687. }
  688. inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
  689. return reinterpret_cast<PassManagerBuilder*>(P);
  690. }
  691. inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
  692. return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
  693. }
  694. LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
  695. PassManagerBuilder *PMB = new PassManagerBuilder();
  696. return wrap(PMB);
  697. }
  698. void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
  699. PassManagerBuilder *Builder = unwrap(PMB);
  700. delete Builder;
  701. }
  702. void
  703. LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
  704. unsigned OptLevel) {
  705. PassManagerBuilder *Builder = unwrap(PMB);
  706. Builder->OptLevel = OptLevel;
  707. }
  708. void
  709. LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
  710. unsigned SizeLevel) {
  711. PassManagerBuilder *Builder = unwrap(PMB);
  712. Builder->SizeLevel = SizeLevel;
  713. }
  714. void
  715. LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
  716. LLVMBool Value) {
  717. PassManagerBuilder *Builder = unwrap(PMB);
  718. Builder->DisableUnitAtATime = Value;
  719. }
  720. void
  721. LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
  722. LLVMBool Value) {
  723. PassManagerBuilder *Builder = unwrap(PMB);
  724. Builder->DisableUnrollLoops = Value;
  725. }
  726. void
  727. LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
  728. LLVMBool Value) {
  729. // NOTE: The simplify-libcalls pass has been removed.
  730. }
  731. void
  732. LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
  733. unsigned Threshold) {
  734. PassManagerBuilder *Builder = unwrap(PMB);
  735. Builder->Inliner = createFunctionInliningPass(Threshold);
  736. }
  737. void
  738. LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
  739. LLVMPassManagerRef PM) {
  740. PassManagerBuilder *Builder = unwrap(PMB);
  741. legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
  742. Builder->populateFunctionPassManager(*FPM);
  743. }
  744. void
  745. LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
  746. LLVMPassManagerRef PM) {
  747. PassManagerBuilder *Builder = unwrap(PMB);
  748. legacy::PassManagerBase *MPM = unwrap(PM);
  749. Builder->populateModulePassManager(*MPM);
  750. }
  751. void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
  752. LLVMPassManagerRef PM,
  753. LLVMBool Internalize,
  754. LLVMBool RunInliner) {
  755. PassManagerBuilder *Builder = unwrap(PMB);
  756. legacy::PassManagerBase *LPM = unwrap(PM);
  757. // A small backwards compatibility hack. populateLTOPassManager used to take
  758. // an RunInliner option.
  759. if (RunInliner && !Builder->Inliner)
  760. Builder->Inliner = createFunctionInliningPass();
  761. Builder->populateLTOPassManager(*LPM);
  762. }