Scalarizer.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. //===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass converts vector operations into scalar operations, in order
  11. // to expose optimization opportunities on the individual scalar operations.
  12. // It is mainly intended for targets that do not have vector units, but it
  13. // may also be useful for revectorizing code to different vector widths.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/IR/IRBuilder.h"
  18. #include "llvm/IR/InstVisitor.h"
  19. #include "llvm/Pass.h"
  20. #include "llvm/Support/CommandLine.h"
  21. #include "llvm/Transforms/Scalar.h"
  22. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  23. #include "llvm/IR/DebugInfo.h" // HLSL Change -debug info in scalarizer.
  24. #include "llvm/IR/DIBuilder.h" // HLSL Change -debug info in scalarizer.
  25. using namespace llvm;
  26. #define DEBUG_TYPE "scalarizer"
  27. namespace {
  28. // Used to store the scattered form of a vector.
  29. typedef SmallVector<Value *, 8> ValueVector;
  30. // Used to map a vector Value to its scattered form. We use std::map
  31. // because we want iterators to persist across insertion and because the
  32. // values are relatively large.
  33. typedef std::map<Value *, ValueVector> ScatterMap;
  34. // Lists Instructions that have been replaced with scalar implementations,
  35. // along with a pointer to their scattered forms.
  36. typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
  37. // Provides a very limited vector-like interface for lazily accessing one
  38. // component of a scattered vector or vector pointer.
  39. class Scatterer {
  40. public:
  41. bool AllowFolding = false; // HLSL Change
  42. Scatterer() {}
  43. // Scatter V into Size components. If new instructions are needed,
  44. // insert them before BBI in BB. If Cache is nonnull, use it to cache
  45. // the results.
  46. #if 0 // HLSL Change
  47. Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
  48. ValueVector *cachePtr = nullptr);
  49. #else // HLSL Change
  50. Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, bool AllowFolding,
  51. ValueVector *cachePtr = nullptr);
  52. #endif // HLSL Change
  53. // Return component I, creating a new Value for it if necessary.
  54. Value *operator[](unsigned I);
  55. // Return the number of components.
  56. unsigned size() const { return Size; }
  57. private:
  58. BasicBlock *BB;
  59. BasicBlock::iterator BBI;
  60. Value *V;
  61. ValueVector *CachePtr;
  62. PointerType *PtrTy;
  63. ValueVector Tmp;
  64. unsigned Size;
  65. };
  66. // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
  67. // called Name that compares X and Y in the same way as FCI.
  68. struct FCmpSplitter {
  69. FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
  70. Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
  71. const Twine &Name) const {
  72. Value *Cmp = Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
  73. // HLSL Change Begins -Transfer FPMath flag.
  74. if (Instruction *FPMath = dyn_cast<Instruction>(Cmp)) {
  75. FPMath->copyFastMathFlags(FCI.getFastMathFlags());
  76. }
  77. // HLSL Change Ends
  78. return Cmp;
  79. }
  80. FCmpInst &FCI;
  81. };
  82. // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
  83. // called Name that compares X and Y in the same way as ICI.
  84. struct ICmpSplitter {
  85. ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
  86. Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
  87. const Twine &Name) const {
  88. return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
  89. }
  90. ICmpInst &ICI;
  91. };
  92. // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
  93. // a binary operator like BO called Name with operands X and Y.
  94. struct BinarySplitter {
  95. BinarySplitter(BinaryOperator &bo) : BO(bo) {}
  96. Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
  97. const Twine &Name) const {
  98. Value *BinOp = Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
  99. // HLSL Change Begins -Transfer FPMath flag.
  100. if (isa<FPMathOperator>(&BO)) {
  101. if (Instruction *FPMath = dyn_cast<Instruction>(BinOp)) {
  102. FPMath->copyFastMathFlags(BO.getFastMathFlags());
  103. }
  104. }
  105. // HLSL Change Ends
  106. return BinOp;
  107. }
  108. BinaryOperator &BO;
  109. };
  110. // Information about a load or store that we're scalarizing.
  111. struct VectorLayout {
  112. VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
  113. // Return the alignment of element I.
  114. uint64_t getElemAlign(unsigned I) {
  115. return MinAlign(VecAlign, I * ElemSize);
  116. }
  117. // The type of the vector.
  118. VectorType *VecTy;
  119. // The type of each element.
  120. Type *ElemTy;
  121. // The alignment of the vector.
  122. uint64_t VecAlign;
  123. // The size of each element.
  124. uint64_t ElemSize;
  125. };
  126. class Scalarizer : public FunctionPass,
  127. public InstVisitor<Scalarizer, bool> {
  128. public:
  129. static char ID;
  130. // HLSL Change Begin
  131. bool AllowFolding = false;
  132. Scalarizer(bool AllowFolding) :
  133. FunctionPass(ID),
  134. AllowFolding(AllowFolding) {
  135. initializeScalarizerPass(*PassRegistry::getPassRegistry());
  136. }
  137. // HLSL Change End
  138. Scalarizer() :
  139. FunctionPass(ID) {
  140. initializeScalarizerPass(*PassRegistry::getPassRegistry());
  141. }
  142. bool doInitialization(Module &M) override;
  143. bool runOnFunction(Function &F) override;
  144. // InstVisitor methods. They return true if the instruction was scalarized,
  145. // false if nothing changed.
  146. bool visitInstruction(Instruction &) { return false; }
  147. bool visitSelectInst(SelectInst &SI);
  148. bool visitICmpInst(ICmpInst &);
  149. bool visitFCmpInst(FCmpInst &);
  150. bool visitBinaryOperator(BinaryOperator &);
  151. bool visitGetElementPtrInst(GetElementPtrInst &);
  152. bool visitCastInst(CastInst &);
  153. bool visitBitCastInst(BitCastInst &);
  154. bool visitShuffleVectorInst(ShuffleVectorInst &);
  155. bool visitPHINode(PHINode &);
  156. bool visitLoadInst(LoadInst &);
  157. bool visitStoreInst(StoreInst &);
  158. static void registerOptions() {
  159. // This is disabled by default because having separate loads and stores
  160. // makes it more likely that the -combiner-alias-analysis limits will be
  161. // reached.
  162. OptionRegistry::registerOption<bool, Scalarizer,
  163. &Scalarizer::ScalarizeLoadStore>(
  164. "scalarize-load-store",
  165. "Allow the scalarizer pass to scalarize loads and store", false);
  166. }
  167. private:
  168. Scatterer scatter(Instruction *, Value *);
  169. void gather(Instruction *, const ValueVector &);
  170. bool canTransferMetadata(unsigned Kind);
  171. void transferMetadata(Instruction *, const ValueVector &);
  172. bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
  173. bool finish();
  174. template<typename T> bool splitBinary(Instruction &, const T &);
  175. ScatterMap Scattered;
  176. GatherList Gathered;
  177. unsigned ParallelLoopAccessMDKind;
  178. bool ScalarizeLoadStore;
  179. };
  180. char Scalarizer::ID = 0;
  181. } // end anonymous namespace
  182. INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
  183. "Scalarize vector operations", false, false)
  184. #if 0 // HLSL Change
  185. Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
  186. ValueVector *cachePtr)
  187. : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
  188. #else // HLSL Change
  189. Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
  190. bool AllowFolding,
  191. ValueVector *cachePtr)
  192. : AllowFolding(AllowFolding), BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
  193. #endif // HLSL Change
  194. Type *Ty = V->getType();
  195. PtrTy = dyn_cast<PointerType>(Ty);
  196. if (PtrTy)
  197. Ty = PtrTy->getElementType();
  198. Size = Ty->getVectorNumElements();
  199. if (!CachePtr)
  200. Tmp.resize(Size, nullptr);
  201. else if (CachePtr->empty())
  202. CachePtr->resize(Size, nullptr);
  203. else
  204. assert(Size == CachePtr->size() && "Inconsistent vector sizes");
  205. }
  206. // Return component I, creating a new Value for it if necessary.
  207. Value *Scatterer::operator[](unsigned I) {
  208. ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
  209. // Try to reuse a previous value.
  210. if (CV[I])
  211. return CV[I];
  212. IRBuilder<> Builder(BB, BBI);
  213. Builder.AllowFolding = AllowFolding; // HLSL Change
  214. if (PtrTy) {
  215. if (!CV[0]) {
  216. Type *Ty =
  217. PointerType::get(PtrTy->getElementType()->getVectorElementType(),
  218. PtrTy->getAddressSpace());
  219. CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
  220. }
  221. if (I != 0)
  222. CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
  223. V->getName() + ".i" + Twine(I));
  224. } else {
  225. // Search through a chain of InsertElementInsts looking for element I.
  226. // Record other elements in the cache. The new V is still suitable
  227. // for all uncached indices.
  228. for (;;) {
  229. InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
  230. if (!Insert)
  231. break;
  232. ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
  233. if (!Idx)
  234. break;
  235. unsigned J = Idx->getZExtValue();
  236. V = Insert->getOperand(0);
  237. if (I == J) {
  238. CV[J] = Insert->getOperand(1);
  239. return CV[J];
  240. } else if (!CV[J]) {
  241. // Only cache the first entry we find for each index we're not actively
  242. // searching for. This prevents us from going too far up the chain and
  243. // caching incorrect entries.
  244. CV[J] = Insert->getOperand(1);
  245. }
  246. }
  247. CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
  248. V->getName() + ".i" + Twine(I));
  249. }
  250. return CV[I];
  251. }
  252. bool Scalarizer::doInitialization(Module &M) {
  253. ParallelLoopAccessMDKind =
  254. M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
  255. ScalarizeLoadStore =
  256. M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
  257. return false;
  258. }
  259. bool Scalarizer::runOnFunction(Function &F) {
  260. for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
  261. BasicBlock *BB = BBI;
  262. for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
  263. Instruction *I = II;
  264. bool Done = visit(I);
  265. ++II;
  266. if (Done && I->getType()->isVoidTy())
  267. I->eraseFromParent();
  268. }
  269. }
  270. return finish();
  271. }
  272. // Return a scattered form of V that can be accessed by Point. V must be a
  273. // vector or a pointer to a vector.
  274. Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
  275. if (Argument *VArg = dyn_cast<Argument>(V)) {
  276. // Put the scattered form of arguments in the entry block,
  277. // so that it can be used everywhere.
  278. Function *F = VArg->getParent();
  279. BasicBlock *BB = &F->getEntryBlock();
  280. // HLSL Change - Begin
  281. // return Scatterer(BB, BB->begin(), V, &Scattered[V]);
  282. auto InsertPoint = BB->begin();
  283. while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
  284. InsertPoint++;
  285. Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
  286. // HLSL Change - End
  287. }
  288. if (Instruction *VOp = dyn_cast<Instruction>(V)) {
  289. // Put the scattered form of an instruction directly after the
  290. // instruction.
  291. BasicBlock *BB = VOp->getParent();
  292. #if 0 // HLSL Change
  293. return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
  294. V, &Scattered[V]);
  295. #else // HLSL Change
  296. return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
  297. V, AllowFolding, &Scattered[V]);
  298. #endif // HLSL Change
  299. }
  300. // HLSL Change - Begin
  301. // Allow constant folding for Constant cases, so we don't
  302. // put an instruction before a PHI node.
  303. if (Constant *C = dyn_cast<Constant>(V)) {
  304. if (isa<PHINode>(Point)) {
  305. return Scatterer(Point->getParent(), Point,
  306. V, /* allowFolding */ true, &Scattered[V]);
  307. }
  308. }
  309. // HLSL Change - End
  310. // In the fallback case, just put the scattered before Point and
  311. // keep the result local to Point.
  312. // return Scatterer(Point->getParent(), Point, V); // HLSL Change
  313. return Scatterer(Point->getParent(), Point, V, AllowFolding);
  314. }
  315. // Replace Op with the gathered form of the components in CV. Defer the
  316. // deletion of Op and creation of the gathered form to the end of the pass,
  317. // so that we can avoid creating the gathered form if all uses of Op are
  318. // replaced with uses of CV.
  319. void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
  320. // Since we're not deleting Op yet, stub out its operands, so that it
  321. // doesn't make anything live unnecessarily.
  322. for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
  323. Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
  324. transferMetadata(Op, CV);
  325. // If we already have a scattered form of Op (created from ExtractElements
  326. // of Op itself), replace them with the new form.
  327. ValueVector &SV = Scattered[Op];
  328. if (!SV.empty()) {
  329. for (unsigned I = 0, E = SV.size(); I != E; ++I) {
  330. Instruction *Old = dyn_cast_or_null<Instruction>(SV[I]);
  331. // HLSL Change Begin - skip unused scatter elt.
  332. if (!Old)
  333. continue;
  334. // HLSL Change End.
  335. CV[I]->takeName(Old);
  336. Old->replaceAllUsesWith(CV[I]);
  337. Old->eraseFromParent();
  338. }
  339. }
  340. SV = CV;
  341. Gathered.push_back(GatherList::value_type(Op, &SV));
  342. }
  343. // Return true if it is safe to transfer the given metadata tag from
  344. // vector to scalar instructions.
  345. bool Scalarizer::canTransferMetadata(unsigned Tag) {
  346. return (Tag == LLVMContext::MD_tbaa
  347. || Tag == LLVMContext::MD_fpmath
  348. || Tag == LLVMContext::MD_tbaa_struct
  349. || Tag == LLVMContext::MD_invariant_load
  350. || Tag == LLVMContext::MD_alias_scope
  351. || Tag == LLVMContext::MD_noalias
  352. || Tag == ParallelLoopAccessMDKind);
  353. }
  354. // Transfer metadata from Op to the instructions in CV if it is known
  355. // to be safe to do so.
  356. void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
  357. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
  358. Op->getAllMetadataOtherThanDebugLoc(MDs);
  359. for (unsigned I = 0, E = CV.size(); I != E; ++I) {
  360. if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
  361. for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
  362. MI = MDs.begin(),
  363. ME = MDs.end();
  364. MI != ME; ++MI)
  365. if (canTransferMetadata(MI->first))
  366. New->setMetadata(MI->first, MI->second);
  367. //New->setDebugLoc(Op->getDebugLoc()); // HLSL Change
  368. }
  369. }
  370. }
  371. // Try to fill in Layout from Ty, returning true on success. Alignment is
  372. // the alignment of the vector, or 0 if the ABI default should be used.
  373. bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
  374. VectorLayout &Layout, const DataLayout &DL) {
  375. // Make sure we're dealing with a vector.
  376. Layout.VecTy = dyn_cast<VectorType>(Ty);
  377. if (!Layout.VecTy)
  378. return false;
  379. // Check that we're dealing with full-byte elements.
  380. Layout.ElemTy = Layout.VecTy->getElementType();
  381. if (DL.getTypeSizeInBits(Layout.ElemTy) !=
  382. DL.getTypeStoreSizeInBits(Layout.ElemTy))
  383. return false;
  384. if (Alignment)
  385. Layout.VecAlign = Alignment;
  386. else
  387. Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
  388. Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
  389. return true;
  390. }
  391. // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
  392. // to create an instruction like I with operands X and Y and name Name.
  393. template<typename Splitter>
  394. bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
  395. VectorType *VT = dyn_cast<VectorType>(I.getType());
  396. if (!VT)
  397. return false;
  398. unsigned NumElems = VT->getNumElements();
  399. IRBuilder<> Builder(I.getParent(), &I);
  400. Builder.AllowFolding = AllowFolding; // HLSL Change
  401. Scatterer Op0 = scatter(&I, I.getOperand(0));
  402. Scatterer Op1 = scatter(&I, I.getOperand(1));
  403. assert(Op0.size() == NumElems && "Mismatched binary operation");
  404. assert(Op1.size() == NumElems && "Mismatched binary operation");
  405. ValueVector Res;
  406. Res.resize(NumElems);
  407. for (unsigned Elem = 0; Elem < NumElems; ++Elem)
  408. Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
  409. I.getName() + ".i" + Twine(Elem));
  410. gather(&I, Res);
  411. return true;
  412. }
  413. bool Scalarizer::visitSelectInst(SelectInst &SI) {
  414. VectorType *VT = dyn_cast<VectorType>(SI.getType());
  415. if (!VT)
  416. return false;
  417. unsigned NumElems = VT->getNumElements();
  418. IRBuilder<> Builder(SI.getParent(), &SI);
  419. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  420. Scatterer Op1 = scatter(&SI, SI.getOperand(1));
  421. Scatterer Op2 = scatter(&SI, SI.getOperand(2));
  422. assert(Op1.size() == NumElems && "Mismatched select");
  423. assert(Op2.size() == NumElems && "Mismatched select");
  424. ValueVector Res;
  425. Res.resize(NumElems);
  426. if (SI.getOperand(0)->getType()->isVectorTy()) {
  427. Scatterer Op0 = scatter(&SI, SI.getOperand(0));
  428. assert(Op0.size() == NumElems && "Mismatched select");
  429. for (unsigned I = 0; I < NumElems; ++I)
  430. Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
  431. SI.getName() + ".i" + Twine(I));
  432. } else {
  433. Value *Op0 = SI.getOperand(0);
  434. for (unsigned I = 0; I < NumElems; ++I)
  435. Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
  436. SI.getName() + ".i" + Twine(I));
  437. }
  438. gather(&SI, Res);
  439. return true;
  440. }
  441. bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
  442. return splitBinary(ICI, ICmpSplitter(ICI));
  443. }
  444. bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
  445. return splitBinary(FCI, FCmpSplitter(FCI));
  446. }
  447. bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
  448. return splitBinary(BO, BinarySplitter(BO));
  449. }
  450. bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
  451. VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
  452. if (!VT)
  453. return false;
  454. IRBuilder<> Builder(GEPI.getParent(), &GEPI);
  455. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  456. unsigned NumElems = VT->getNumElements();
  457. unsigned NumIndices = GEPI.getNumIndices();
  458. Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
  459. SmallVector<Scatterer, 8> Ops;
  460. Ops.resize(NumIndices);
  461. for (unsigned I = 0; I < NumIndices; ++I)
  462. Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
  463. ValueVector Res;
  464. Res.resize(NumElems);
  465. for (unsigned I = 0; I < NumElems; ++I) {
  466. SmallVector<Value *, 8> Indices;
  467. Indices.resize(NumIndices);
  468. for (unsigned J = 0; J < NumIndices; ++J)
  469. Indices[J] = Ops[J][I];
  470. Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
  471. GEPI.getName() + ".i" + Twine(I));
  472. if (GEPI.isInBounds())
  473. if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
  474. NewGEPI->setIsInBounds();
  475. }
  476. gather(&GEPI, Res);
  477. return true;
  478. }
  479. bool Scalarizer::visitCastInst(CastInst &CI) {
  480. VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
  481. if (!VT)
  482. return false;
  483. unsigned NumElems = VT->getNumElements();
  484. IRBuilder<> Builder(CI.getParent(), &CI);
  485. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  486. Scatterer Op0 = scatter(&CI, CI.getOperand(0));
  487. assert(Op0.size() == NumElems && "Mismatched cast");
  488. ValueVector Res;
  489. Res.resize(NumElems);
  490. for (unsigned I = 0; I < NumElems; ++I)
  491. Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
  492. CI.getName() + ".i" + Twine(I));
  493. gather(&CI, Res);
  494. return true;
  495. }
  496. bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
  497. VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
  498. VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
  499. if (!DstVT || !SrcVT)
  500. return false;
  501. unsigned DstNumElems = DstVT->getNumElements();
  502. unsigned SrcNumElems = SrcVT->getNumElements();
  503. IRBuilder<> Builder(BCI.getParent(), &BCI);
  504. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  505. Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
  506. ValueVector Res;
  507. Res.resize(DstNumElems);
  508. if (DstNumElems == SrcNumElems) {
  509. for (unsigned I = 0; I < DstNumElems; ++I)
  510. Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
  511. BCI.getName() + ".i" + Twine(I));
  512. } else if (DstNumElems > SrcNumElems) {
  513. // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
  514. // individual elements to the destination.
  515. unsigned FanOut = DstNumElems / SrcNumElems;
  516. Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
  517. unsigned ResI = 0;
  518. for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
  519. Value *V = Op0[Op0I];
  520. Instruction *VI;
  521. // Look through any existing bitcasts before converting to <N x t2>.
  522. // In the best case, the resulting conversion might be a no-op.
  523. while ((VI = dyn_cast<Instruction>(V)) &&
  524. VI->getOpcode() == Instruction::BitCast)
  525. V = VI->getOperand(0);
  526. V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
  527. Scatterer Mid = scatter(&BCI, V);
  528. for (unsigned MidI = 0; MidI < FanOut; ++MidI)
  529. Res[ResI++] = Mid[MidI];
  530. }
  531. } else {
  532. // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
  533. unsigned FanIn = SrcNumElems / DstNumElems;
  534. Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
  535. unsigned Op0I = 0;
  536. for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
  537. Value *V = UndefValue::get(MidTy);
  538. for (unsigned MidI = 0; MidI < FanIn; ++MidI)
  539. V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
  540. BCI.getName() + ".i" + Twine(ResI)
  541. + ".upto" + Twine(MidI));
  542. Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
  543. BCI.getName() + ".i" + Twine(ResI));
  544. }
  545. }
  546. gather(&BCI, Res);
  547. return true;
  548. }
  549. bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  550. VectorType *VT = dyn_cast<VectorType>(SVI.getType());
  551. if (!VT)
  552. return false;
  553. unsigned NumElems = VT->getNumElements();
  554. Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
  555. Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
  556. ValueVector Res;
  557. Res.resize(NumElems);
  558. for (unsigned I = 0; I < NumElems; ++I) {
  559. int Selector = SVI.getMaskValue(I);
  560. if (Selector < 0)
  561. Res[I] = UndefValue::get(VT->getElementType());
  562. else if (unsigned(Selector) < Op0.size())
  563. Res[I] = Op0[Selector];
  564. else
  565. Res[I] = Op1[Selector - Op0.size()];
  566. // HLSL Change Begins: (fix bug in upstream llvm)
  567. if (ExtractElementInst *EA = dyn_cast<ExtractElementInst>(Res[I])) {
  568. // Clone extractelement here, since it is associated with another inst.
  569. // Otherwise it will be added to our Gather, and after the incoming
  570. // instruction is processed, it will be replaced without updating our
  571. // Gather entry. This dead instruction will be accessed by finish(),
  572. // causing assert or crash.
  573. Res[I] = IRBuilder<>(SVI.getNextNode()).Insert(EA->clone());
  574. }
  575. // HLSL Change Ends
  576. }
  577. gather(&SVI, Res);
  578. return true;
  579. }
  580. bool Scalarizer::visitPHINode(PHINode &PHI) {
  581. VectorType *VT = dyn_cast<VectorType>(PHI.getType());
  582. if (!VT)
  583. return false;
  584. unsigned NumElems = VT->getNumElements();
  585. IRBuilder<> Builder(PHI.getParent(), &PHI);
  586. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  587. ValueVector Res;
  588. Res.resize(NumElems);
  589. unsigned NumOps = PHI.getNumOperands();
  590. for (unsigned I = 0; I < NumElems; ++I)
  591. Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
  592. PHI.getName() + ".i" + Twine(I));
  593. for (unsigned I = 0; I < NumOps; ++I) {
  594. Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
  595. BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
  596. for (unsigned J = 0; J < NumElems; ++J)
  597. cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
  598. }
  599. gather(&PHI, Res);
  600. return true;
  601. }
  602. bool Scalarizer::visitLoadInst(LoadInst &LI) {
  603. if (!ScalarizeLoadStore)
  604. return false;
  605. if (!LI.isSimple())
  606. return false;
  607. VectorLayout Layout;
  608. if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
  609. LI.getModule()->getDataLayout()))
  610. return false;
  611. unsigned NumElems = Layout.VecTy->getNumElements();
  612. IRBuilder<> Builder(LI.getParent(), &LI);
  613. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  614. Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
  615. ValueVector Res;
  616. Res.resize(NumElems);
  617. for (unsigned I = 0; I < NumElems; ++I)
  618. Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
  619. LI.getName() + ".i" + Twine(I));
  620. gather(&LI, Res);
  621. return true;
  622. }
  623. bool Scalarizer::visitStoreInst(StoreInst &SI) {
  624. if (!ScalarizeLoadStore)
  625. return false;
  626. if (!SI.isSimple())
  627. return false;
  628. VectorLayout Layout;
  629. Value *FullValue = SI.getValueOperand();
  630. if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
  631. SI.getModule()->getDataLayout()))
  632. return false;
  633. unsigned NumElems = Layout.VecTy->getNumElements();
  634. IRBuilder<> Builder(SI.getParent(), &SI);
  635. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  636. Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
  637. Scatterer Val = scatter(&SI, FullValue);
  638. ValueVector Stores;
  639. Stores.resize(NumElems);
  640. for (unsigned I = 0; I < NumElems; ++I) {
  641. unsigned Align = Layout.getElemAlign(I);
  642. Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
  643. }
  644. transferMetadata(&SI, Stores);
  645. return true;
  646. }
  647. // Delete the instructions that we scalarized. If a full vector result
  648. // is still needed, recreate it using InsertElements.
  649. bool Scalarizer::finish() {
  650. if (Gathered.empty())
  651. return false;
  652. // HLSL Change Begins.
  653. Module &M = *Gathered.front().first->getModule();
  654. LLVMContext &Ctx = M.getContext();
  655. const DataLayout &DL = M.getDataLayout();
  656. bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
  657. // Map from an extract element inst to a Value which replaced it.
  658. DenseMap<Instruction *, Value*> EltMap;
  659. // HLSL Change Ends.
  660. for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
  661. GMI != GME; ++GMI) {
  662. Instruction *Op = GMI->first;
  663. ValueVector &CV = *GMI->second;
  664. // HLSL Change Begin - debug info in scalarizer.
  665. if (HasDbgInfo) {
  666. if (auto *L = LocalAsMetadata::getIfExists(Op)) {
  667. if (auto *DINode = MetadataAsValue::getIfExists(Ctx, L)) {
  668. Type *Ty = Op->getType();
  669. unsigned Count = Ty->getVectorNumElements();
  670. Type *EltTy = Ty->getVectorElementType();
  671. unsigned EltSizeInBits = DL.getTypeSizeInBits(EltTy);
  672. for (User *U : DINode->users()) {
  673. if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U)) {
  674. DIBuilder DIB(M, /*AllowUnresolved*/ false);
  675. auto *VarInfo = DVI->getVariable();
  676. DebugLoc DbgLoc = DVI->getDebugLoc();
  677. unsigned OffsetInBits = 0;
  678. if (DVI->getExpression()->isBitPiece())
  679. OffsetInBits = DVI->getExpression()->getBitPieceOffset();
  680. for (unsigned I = 0; I < Count; ++I) {
  681. DIExpression *EltExpr =
  682. DIB.createBitPieceExpression(OffsetInBits, EltSizeInBits);
  683. OffsetInBits += EltSizeInBits;
  684. DIB.insertDbgValueIntrinsic(CV[I], 0, VarInfo, EltExpr, DbgLoc, DVI);
  685. }
  686. }
  687. }
  688. }
  689. }
  690. }
  691. // HLSL Change End.
  692. if (!Op->use_empty()) {
  693. // HLSL Change Begins.
  694. // Remove the extract element users if possible.
  695. for (User *UI : Op->users()) {
  696. if (ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(UI)) {
  697. Value *Idx = EEI->getIndexOperand();
  698. if (!isa<ConstantInt>(Idx))
  699. continue;
  700. unsigned immIdx = cast<ConstantInt>(Idx)->getLimitedValue();
  701. if (immIdx >= CV.size())
  702. continue;
  703. Value *Elt = CV[immIdx];
  704. // Try to find a map for Elt,if it's in EltMap.
  705. while (Instruction *EltI = dyn_cast<Instruction>(Elt)) {
  706. if (EltMap.count(EltI)) {
  707. Elt = EltMap[EltI];
  708. } else
  709. break;
  710. }
  711. EEI->replaceAllUsesWith(Elt);
  712. EltMap[EEI] = Elt;
  713. }
  714. }
  715. if (Op->use_empty()) {
  716. Op->eraseFromParent();
  717. continue;
  718. }
  719. // HLSL Change Ends.
  720. // The value is still needed, so recreate it using a series of
  721. // InsertElements.
  722. Type *Ty = Op->getType();
  723. Value *Res = UndefValue::get(Ty);
  724. BasicBlock *BB = Op->getParent();
  725. unsigned Count = Ty->getVectorNumElements();
  726. IRBuilder<> Builder(BB, Op);
  727. Builder.AllowFolding = this->AllowFolding; // HLSL Change
  728. if (isa<PHINode>(Op))
  729. Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
  730. for (unsigned I = 0; I < Count; ++I)
  731. Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
  732. Op->getName() + ".upto" + Twine(I));
  733. Res->takeName(Op);
  734. Op->replaceAllUsesWith(Res);
  735. }
  736. Op->eraseFromParent();
  737. }
  738. // HLSL Change Begins.
  739. for (auto It: EltMap) {
  740. Instruction *I = It.first;
  741. if (I->user_empty())
  742. I->eraseFromParent();
  743. }
  744. // HLSL Change Ends.
  745. Gathered.clear();
  746. Scattered.clear();
  747. return true;
  748. }
  749. // HLSL Change Begin
  750. FunctionPass *llvm::createScalarizerPass(bool AllowFolding) {
  751. Scalarizer *pass = new Scalarizer(AllowFolding);
  752. return pass;
  753. }
  754. // HLSL Change End
  755. FunctionPass *llvm::createScalarizerPass() {
  756. return new Scalarizer();
  757. }