SeparateConstOffsetFromGEP.cpp 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. //===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Loop unrolling may create many similar GEPs for array accesses.
  11. // e.g., a 2-level loop
  12. //
  13. // float a[32][32]; // global variable
  14. //
  15. // for (int i = 0; i < 2; ++i) {
  16. // for (int j = 0; j < 2; ++j) {
  17. // ...
  18. // ... = a[x + i][y + j];
  19. // ...
  20. // }
  21. // }
  22. //
  23. // will probably be unrolled to:
  24. //
  25. // gep %a, 0, %x, %y; load
  26. // gep %a, 0, %x, %y + 1; load
  27. // gep %a, 0, %x + 1, %y; load
  28. // gep %a, 0, %x + 1, %y + 1; load
  29. //
  30. // LLVM's GVN does not use partial redundancy elimination yet, and is thus
  31. // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
  32. // significant slowdown in targets with limited addressing modes. For instance,
  33. // because the PTX target does not support the reg+reg addressing mode, the
  34. // NVPTX backend emits PTX code that literally computes the pointer address of
  35. // each GEP, wasting tons of registers. It emits the following PTX for the
  36. // first load and similar PTX for other loads.
  37. //
  38. // mov.u32 %r1, %x;
  39. // mov.u32 %r2, %y;
  40. // mul.wide.u32 %rl2, %r1, 128;
  41. // mov.u64 %rl3, a;
  42. // add.s64 %rl4, %rl3, %rl2;
  43. // mul.wide.u32 %rl5, %r2, 4;
  44. // add.s64 %rl6, %rl4, %rl5;
  45. // ld.global.f32 %f1, [%rl6];
  46. //
  47. // To reduce the register pressure, the optimization implemented in this file
  48. // merges the common part of a group of GEPs, so we can compute each pointer
  49. // address by adding a simple offset to the common part, saving many registers.
  50. //
  51. // It works by splitting each GEP into a variadic base and a constant offset.
  52. // The variadic base can be computed once and reused by multiple GEPs, and the
  53. // constant offsets can be nicely folded into the reg+immediate addressing mode
  54. // (supported by most targets) without using any extra register.
  55. //
  56. // For instance, we transform the four GEPs and four loads in the above example
  57. // into:
  58. //
  59. // base = gep a, 0, x, y
  60. // load base
  61. // laod base + 1 * sizeof(float)
  62. // load base + 32 * sizeof(float)
  63. // load base + 33 * sizeof(float)
  64. //
  65. // Given the transformed IR, a backend that supports the reg+immediate
  66. // addressing mode can easily fold the pointer arithmetics into the loads. For
  67. // example, the NVPTX backend can easily fold the pointer arithmetics into the
  68. // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
  69. //
  70. // mov.u32 %r1, %tid.x;
  71. // mov.u32 %r2, %tid.y;
  72. // mul.wide.u32 %rl2, %r1, 128;
  73. // mov.u64 %rl3, a;
  74. // add.s64 %rl4, %rl3, %rl2;
  75. // mul.wide.u32 %rl5, %r2, 4;
  76. // add.s64 %rl6, %rl4, %rl5;
  77. // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
  78. // ld.global.f32 %f2, [%rl6+4]; // much better
  79. // ld.global.f32 %f3, [%rl6+128]; // much better
  80. // ld.global.f32 %f4, [%rl6+132]; // much better
  81. //
  82. // Another improvement enabled by the LowerGEP flag is to lower a GEP with
  83. // multiple indices to either multiple GEPs with a single index or arithmetic
  84. // operations (depending on whether the target uses alias analysis in codegen).
  85. // Such transformation can have following benefits:
  86. // (1) It can always extract constants in the indices of structure type.
  87. // (2) After such Lowering, there are more optimization opportunities such as
  88. // CSE, LICM and CGP.
  89. //
  90. // E.g. The following GEPs have multiple indices:
  91. // BB1:
  92. // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
  93. // load %p
  94. // ...
  95. // BB2:
  96. // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
  97. // load %p2
  98. // ...
  99. //
  100. // We can not do CSE for to the common part related to index "i64 %i". Lowering
  101. // GEPs can achieve such goals.
  102. // If the target does not use alias analysis in codegen, this pass will
  103. // lower a GEP with multiple indices into arithmetic operations:
  104. // BB1:
  105. // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
  106. // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
  107. // %3 = add i64 %1, %2 ; CSE opportunity
  108. // %4 = mul i64 %j1, length_of_struct
  109. // %5 = add i64 %3, %4
  110. // %6 = add i64 %3, struct_field_3 ; Constant offset
  111. // %p = inttoptr i64 %6 to i32*
  112. // load %p
  113. // ...
  114. // BB2:
  115. // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
  116. // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
  117. // %9 = add i64 %7, %8 ; CSE opportunity
  118. // %10 = mul i64 %j2, length_of_struct
  119. // %11 = add i64 %9, %10
  120. // %12 = add i64 %11, struct_field_2 ; Constant offset
  121. // %p = inttoptr i64 %12 to i32*
  122. // load %p2
  123. // ...
  124. //
  125. // If the target uses alias analysis in codegen, this pass will lower a GEP
  126. // with multiple indices into multiple GEPs with a single index:
  127. // BB1:
  128. // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
  129. // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
  130. // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
  131. // %4 = mul i64 %j1, length_of_struct
  132. // %5 = getelementptr i8* %3, i64 %4
  133. // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
  134. // %p = bitcast i8* %6 to i32*
  135. // load %p
  136. // ...
  137. // BB2:
  138. // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
  139. // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
  140. // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
  141. // %10 = mul i64 %j2, length_of_struct
  142. // %11 = getelementptr i8* %9, i64 %10
  143. // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
  144. // %p2 = bitcast i8* %12 to i32*
  145. // load %p2
  146. // ...
  147. //
  148. // Lowering GEPs can also benefit other passes such as LICM and CGP.
  149. // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
  150. // indices if one of the index is variant. If we lower such GEP into invariant
  151. // parts and variant parts, LICM can hoist/sink those invariant parts.
  152. // CGP (CodeGen Prepare) tries to sink address calculations that match the
  153. // target's addressing modes. A GEP with multiple indices may not match and will
  154. // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
  155. // them. So we end up with a better addressing mode.
  156. //
  157. //===----------------------------------------------------------------------===//
  158. #include "llvm/Analysis/TargetTransformInfo.h"
  159. #include "llvm/Analysis/ValueTracking.h"
  160. #include "llvm/IR/Constants.h"
  161. #include "llvm/IR/DataLayout.h"
  162. #include "llvm/IR/Dominators.h"
  163. #include "llvm/IR/Instructions.h"
  164. #include "llvm/IR/LLVMContext.h"
  165. #include "llvm/IR/Module.h"
  166. #include "llvm/IR/Operator.h"
  167. #include "llvm/Support/CommandLine.h"
  168. #include "llvm/Support/raw_ostream.h"
  169. #include "llvm/Transforms/Scalar.h"
  170. #include "llvm/Transforms/Utils/Local.h"
  171. #include "llvm/Target/TargetMachine.h"
  172. #include "llvm/Target/TargetSubtargetInfo.h"
  173. #include "llvm/IR/IRBuilder.h"
  174. using namespace llvm;
  175. static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
  176. "disable-separate-const-offset-from-gep", cl::init(false),
  177. cl::desc("Do not separate the constant offset from a GEP instruction"),
  178. cl::Hidden);
  179. // Setting this flag may emit false positives when the input module already
  180. // contains dead instructions. Therefore, we set it only in unit tests that are
  181. // free of dead code.
  182. static cl::opt<bool>
  183. VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
  184. cl::desc("Verify this pass produces no dead code"),
  185. cl::Hidden);
  186. namespace {
  187. /// \brief A helper class for separating a constant offset from a GEP index.
  188. ///
  189. /// In real programs, a GEP index may be more complicated than a simple addition
  190. /// of something and a constant integer which can be trivially splitted. For
  191. /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
  192. /// constant offset, so that we can separate the index to (a << 3) + b and 5.
  193. ///
  194. /// Therefore, this class looks into the expression that computes a given GEP
  195. /// index, and tries to find a constant integer that can be hoisted to the
  196. /// outermost level of the expression as an addition. Not every constant in an
  197. /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
  198. /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
  199. /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
  200. class ConstantOffsetExtractor {
  201. public:
  202. /// Extracts a constant offset from the given GEP index. It returns the
  203. /// new index representing the remainder (equal to the original index minus
  204. /// the constant offset), or nullptr if we cannot extract a constant offset.
  205. /// \p Idx The given GEP index
  206. /// \p GEP The given GEP
  207. /// \p UserChainTail Outputs the tail of UserChain so that we can
  208. /// garbage-collect unused instructions in UserChain.
  209. static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
  210. User *&UserChainTail, const DominatorTree *DT);
  211. /// Looks for a constant offset from the given GEP index without extracting
  212. /// it. It returns the numeric value of the extracted constant offset (0 if
  213. /// failed). The meaning of the arguments are the same as Extract.
  214. static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
  215. const DominatorTree *DT);
  216. private:
  217. ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
  218. : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
  219. }
  220. /// Searches the expression that computes V for a non-zero constant C s.t.
  221. /// V can be reassociated into the form V' + C. If the searching is
  222. /// successful, returns C and update UserChain as a def-use chain from C to V;
  223. /// otherwise, UserChain is empty.
  224. ///
  225. /// \p V The given expression
  226. /// \p SignExtended Whether V will be sign-extended in the computation of the
  227. /// GEP index
  228. /// \p ZeroExtended Whether V will be zero-extended in the computation of the
  229. /// GEP index
  230. /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
  231. /// an index of an inbounds GEP is guaranteed to be
  232. /// non-negative. Levaraging this, we can better split
  233. /// inbounds GEPs.
  234. APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
  235. /// A helper function to look into both operands of a binary operator.
  236. APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
  237. bool ZeroExtended);
  238. /// After finding the constant offset C from the GEP index I, we build a new
  239. /// index I' s.t. I' + C = I. This function builds and returns the new
  240. /// index I' according to UserChain produced by function "find".
  241. ///
  242. /// The building conceptually takes two steps:
  243. /// 1) iteratively distribute s/zext towards the leaves of the expression tree
  244. /// that computes I
  245. /// 2) reassociate the expression tree to the form I' + C.
  246. ///
  247. /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
  248. /// sext to a, b and 5 so that we have
  249. /// sext(a) + (sext(b) + 5).
  250. /// Then, we reassociate it to
  251. /// (sext(a) + sext(b)) + 5.
  252. /// Given this form, we know I' is sext(a) + sext(b).
  253. Value *rebuildWithoutConstOffset();
  254. /// After the first step of rebuilding the GEP index without the constant
  255. /// offset, distribute s/zext to the operands of all operators in UserChain.
  256. /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
  257. /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
  258. ///
  259. /// The function also updates UserChain to point to new subexpressions after
  260. /// distributing s/zext. e.g., the old UserChain of the above example is
  261. /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
  262. /// and the new UserChain is
  263. /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
  264. /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
  265. ///
  266. /// \p ChainIndex The index to UserChain. ChainIndex is initially
  267. /// UserChain.size() - 1, and is decremented during
  268. /// the recursion.
  269. Value *distributeExtsAndCloneChain(unsigned ChainIndex);
  270. /// Reassociates the GEP index to the form I' + C and returns I'.
  271. Value *removeConstOffset(unsigned ChainIndex);
  272. /// A helper function to apply ExtInsts, a list of s/zext, to value V.
  273. /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
  274. /// returns "sext i32 (zext i16 V to i32) to i64".
  275. Value *applyExts(Value *V);
  276. /// A helper function that returns whether we can trace into the operands
  277. /// of binary operator BO for a constant offset.
  278. ///
  279. /// \p SignExtended Whether BO is surrounded by sext
  280. /// \p ZeroExtended Whether BO is surrounded by zext
  281. /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
  282. /// array index.
  283. bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
  284. bool NonNegative);
  285. /// The path from the constant offset to the old GEP index. e.g., if the GEP
  286. /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
  287. /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
  288. /// UserChain[2] will be the entire expression "a * b + (c + 5)".
  289. ///
  290. /// This path helps to rebuild the new GEP index.
  291. SmallVector<User *, 8> UserChain;
  292. /// A data structure used in rebuildWithoutConstOffset. Contains all
  293. /// sext/zext instructions along UserChain.
  294. SmallVector<CastInst *, 16> ExtInsts;
  295. Instruction *IP; /// Insertion position of cloned instructions.
  296. const DataLayout &DL;
  297. const DominatorTree *DT;
  298. };
  299. /// \brief A pass that tries to split every GEP in the function into a variadic
  300. /// base and a constant offset. It is a FunctionPass because searching for the
  301. /// constant offset may inspect other basic blocks.
  302. class SeparateConstOffsetFromGEP : public FunctionPass {
  303. public:
  304. static char ID;
  305. SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr,
  306. bool LowerGEP = false)
  307. : FunctionPass(ID), DL(nullptr), DT(nullptr), TM(TM), LowerGEP(LowerGEP) {
  308. initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
  309. }
  310. void getAnalysisUsage(AnalysisUsage &AU) const override {
  311. AU.addRequired<DominatorTreeWrapperPass>();
  312. AU.addRequired<TargetTransformInfoWrapperPass>();
  313. AU.setPreservesCFG();
  314. }
  315. bool doInitialization(Module &M) override {
  316. DL = &M.getDataLayout();
  317. return false;
  318. }
  319. bool runOnFunction(Function &F) override;
  320. private:
  321. /// Tries to split the given GEP into a variadic base and a constant offset,
  322. /// and returns true if the splitting succeeds.
  323. bool splitGEP(GetElementPtrInst *GEP);
  324. /// Lower a GEP with multiple indices into multiple GEPs with a single index.
  325. /// Function splitGEP already split the original GEP into a variadic part and
  326. /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
  327. /// variadic part into a set of GEPs with a single index and applies
  328. /// AccumulativeByteOffset to it.
  329. /// \p Variadic The variadic part of the original GEP.
  330. /// \p AccumulativeByteOffset The constant offset.
  331. void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
  332. int64_t AccumulativeByteOffset);
  333. /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
  334. /// Function splitGEP already split the original GEP into a variadic part and
  335. /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
  336. /// variadic part into a set of arithmetic operations and applies
  337. /// AccumulativeByteOffset to it.
  338. /// \p Variadic The variadic part of the original GEP.
  339. /// \p AccumulativeByteOffset The constant offset.
  340. void lowerToArithmetics(GetElementPtrInst *Variadic,
  341. int64_t AccumulativeByteOffset);
  342. /// Finds the constant offset within each index and accumulates them. If
  343. /// LowerGEP is true, it finds in indices of both sequential and structure
  344. /// types, otherwise it only finds in sequential indices. The output
  345. /// NeedsExtraction indicates whether we successfully find a non-zero constant
  346. /// offset.
  347. int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
  348. /// Canonicalize array indices to pointer-size integers. This helps to
  349. /// simplify the logic of splitting a GEP. For example, if a + b is a
  350. /// pointer-size integer, we have
  351. /// gep base, a + b = gep (gep base, a), b
  352. /// However, this equality may not hold if the size of a + b is smaller than
  353. /// the pointer size, because LLVM conceptually sign-extends GEP indices to
  354. /// pointer size before computing the address
  355. /// (http://llvm.org/docs/LangRef.html#id181).
  356. ///
  357. /// This canonicalization is very likely already done in clang and
  358. /// instcombine. Therefore, the program will probably remain the same.
  359. ///
  360. /// Returns true if the module changes.
  361. ///
  362. /// Verified in @i32_add in split-gep.ll
  363. bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
  364. /// Verify F is free of dead code.
  365. void verifyNoDeadCode(Function &F);
  366. const DataLayout *DL;
  367. const DominatorTree *DT;
  368. const TargetMachine *TM;
  369. /// Whether to lower a GEP with multiple indices into arithmetic operations or
  370. /// multiple GEPs with a single index.
  371. bool LowerGEP;
  372. };
  373. } // anonymous namespace
  374. char SeparateConstOffsetFromGEP::ID = 0;
  375. INITIALIZE_PASS_BEGIN(
  376. SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
  377. "Split GEPs to a variadic base and a constant offset for better CSE", false,
  378. false)
  379. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  380. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  381. INITIALIZE_PASS_END(
  382. SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
  383. "Split GEPs to a variadic base and a constant offset for better CSE", false,
  384. false)
  385. FunctionPass *
  386. llvm::createSeparateConstOffsetFromGEPPass(const TargetMachine *TM,
  387. bool LowerGEP) {
  388. return new SeparateConstOffsetFromGEP(TM, LowerGEP);
  389. }
  390. bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
  391. bool ZeroExtended,
  392. BinaryOperator *BO,
  393. bool NonNegative) {
  394. // We only consider ADD, SUB and OR, because a non-zero constant found in
  395. // expressions composed of these operations can be easily hoisted as a
  396. // constant offset by reassociation.
  397. if (BO->getOpcode() != Instruction::Add &&
  398. BO->getOpcode() != Instruction::Sub &&
  399. BO->getOpcode() != Instruction::Or) {
  400. return false;
  401. }
  402. Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
  403. // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
  404. // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
  405. if (BO->getOpcode() == Instruction::Or &&
  406. !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
  407. return false;
  408. // In addition, tracing into BO requires that its surrounding s/zext (if
  409. // any) is distributable to both operands.
  410. //
  411. // Suppose BO = A op B.
  412. // SignExtended | ZeroExtended | Distributable?
  413. // --------------+--------------+----------------------------------
  414. // 0 | 0 | true because no s/zext exists
  415. // 0 | 1 | zext(BO) == zext(A) op zext(B)
  416. // 1 | 0 | sext(BO) == sext(A) op sext(B)
  417. // 1 | 1 | zext(sext(BO)) ==
  418. // | | zext(sext(A)) op zext(sext(B))
  419. if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
  420. // If a + b >= 0 and (a >= 0 or b >= 0), then
  421. // sext(a + b) = sext(a) + sext(b)
  422. // even if the addition is not marked nsw.
  423. //
  424. // Leveraging this invarient, we can trace into an sext'ed inbound GEP
  425. // index if the constant offset is non-negative.
  426. //
  427. // Verified in @sext_add in split-gep.ll.
  428. if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
  429. if (!ConstLHS->isNegative())
  430. return true;
  431. }
  432. if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
  433. if (!ConstRHS->isNegative())
  434. return true;
  435. }
  436. }
  437. // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
  438. // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
  439. if (BO->getOpcode() == Instruction::Add ||
  440. BO->getOpcode() == Instruction::Sub) {
  441. if (SignExtended && !BO->hasNoSignedWrap())
  442. return false;
  443. if (ZeroExtended && !BO->hasNoUnsignedWrap())
  444. return false;
  445. }
  446. return true;
  447. }
  448. APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
  449. bool SignExtended,
  450. bool ZeroExtended) {
  451. // BO being non-negative does not shed light on whether its operands are
  452. // non-negative. Clear the NonNegative flag here.
  453. APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
  454. /* NonNegative */ false);
  455. // If we found a constant offset in the left operand, stop and return that.
  456. // This shortcut might cause us to miss opportunities of combining the
  457. // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
  458. // However, such cases are probably already handled by -instcombine,
  459. // given this pass runs after the standard optimizations.
  460. if (ConstantOffset != 0) return ConstantOffset;
  461. ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
  462. /* NonNegative */ false);
  463. // If U is a sub operator, negate the constant offset found in the right
  464. // operand.
  465. if (BO->getOpcode() == Instruction::Sub)
  466. ConstantOffset = -ConstantOffset;
  467. return ConstantOffset;
  468. }
  469. APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
  470. bool ZeroExtended, bool NonNegative) {
  471. // TODO(jingyue): We could trace into integer/pointer casts, such as
  472. // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
  473. // integers because it gives good enough results for our benchmarks.
  474. unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
  475. // We cannot do much with Values that are not a User, such as an Argument.
  476. User *U = dyn_cast<User>(V);
  477. if (U == nullptr) return APInt(BitWidth, 0);
  478. APInt ConstantOffset(BitWidth, 0);
  479. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
  480. // Hooray, we found it!
  481. ConstantOffset = CI->getValue();
  482. } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
  483. // Trace into subexpressions for more hoisting opportunities.
  484. if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
  485. ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
  486. } else if (isa<SExtInst>(V)) {
  487. ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
  488. ZeroExtended, NonNegative).sext(BitWidth);
  489. } else if (isa<ZExtInst>(V)) {
  490. // As an optimization, we can clear the SignExtended flag because
  491. // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
  492. //
  493. // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
  494. ConstantOffset =
  495. find(U->getOperand(0), /* SignExtended */ false,
  496. /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
  497. }
  498. // If we found a non-zero constant offset, add it to the path for
  499. // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
  500. // help this optimization.
  501. if (ConstantOffset != 0)
  502. UserChain.push_back(U);
  503. return ConstantOffset;
  504. }
  505. Value *ConstantOffsetExtractor::applyExts(Value *V) {
  506. Value *Current = V;
  507. // ExtInsts is built in the use-def order. Therefore, we apply them to V
  508. // in the reversed order.
  509. for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
  510. if (Constant *C = dyn_cast<Constant>(Current)) {
  511. // If Current is a constant, apply s/zext using ConstantExpr::getCast.
  512. // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
  513. Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
  514. } else {
  515. Instruction *Ext = (*I)->clone();
  516. Ext->setOperand(0, Current);
  517. Ext->insertBefore(IP);
  518. Current = Ext;
  519. }
  520. }
  521. return Current;
  522. }
  523. Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
  524. distributeExtsAndCloneChain(UserChain.size() - 1);
  525. // Remove all nullptrs (used to be s/zext) from UserChain.
  526. unsigned NewSize = 0;
  527. for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) {
  528. if (*I != nullptr) {
  529. UserChain[NewSize] = *I;
  530. NewSize++;
  531. }
  532. }
  533. UserChain.resize(NewSize);
  534. return removeConstOffset(UserChain.size() - 1);
  535. }
  536. Value *
  537. ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
  538. User *U = UserChain[ChainIndex];
  539. if (ChainIndex == 0) {
  540. assert(isa<ConstantInt>(U));
  541. // If U is a ConstantInt, applyExts will return a ConstantInt as well.
  542. return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
  543. }
  544. if (CastInst *Cast = dyn_cast<CastInst>(U)) {
  545. assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
  546. "We only traced into two types of CastInst: sext and zext");
  547. ExtInsts.push_back(Cast);
  548. UserChain[ChainIndex] = nullptr;
  549. return distributeExtsAndCloneChain(ChainIndex - 1);
  550. }
  551. // Function find only trace into BinaryOperator and CastInst.
  552. BinaryOperator *BO = cast<BinaryOperator>(U);
  553. // OpNo = which operand of BO is UserChain[ChainIndex - 1]
  554. unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
  555. Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
  556. Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
  557. BinaryOperator *NewBO = nullptr;
  558. if (OpNo == 0) {
  559. NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
  560. BO->getName(), IP);
  561. } else {
  562. NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
  563. BO->getName(), IP);
  564. }
  565. return UserChain[ChainIndex] = NewBO;
  566. }
  567. Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
  568. if (ChainIndex == 0) {
  569. assert(isa<ConstantInt>(UserChain[ChainIndex]));
  570. return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
  571. }
  572. BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
  573. assert(BO->getNumUses() <= 1 &&
  574. "distributeExtsAndCloneChain clones each BinaryOperator in "
  575. "UserChain, so no one should be used more than "
  576. "once");
  577. unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
  578. assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
  579. Value *NextInChain = removeConstOffset(ChainIndex - 1);
  580. Value *TheOther = BO->getOperand(1 - OpNo);
  581. // If NextInChain is 0 and not the LHS of a sub, we can simplify the
  582. // sub-expression to be just TheOther.
  583. if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
  584. if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
  585. return TheOther;
  586. }
  587. BinaryOperator::BinaryOps NewOp = BO->getOpcode();
  588. if (BO->getOpcode() == Instruction::Or) {
  589. // Rebuild "or" as "add", because "or" may be invalid for the new
  590. // epxression.
  591. //
  592. // For instance, given
  593. // a | (b + 5) where a and b + 5 have no common bits,
  594. // we can extract 5 as the constant offset.
  595. //
  596. // However, reusing the "or" in the new index would give us
  597. // (a | b) + 5
  598. // which does not equal a | (b + 5).
  599. //
  600. // Replacing the "or" with "add" is fine, because
  601. // a | (b + 5) = a + (b + 5) = (a + b) + 5
  602. NewOp = Instruction::Add;
  603. }
  604. BinaryOperator *NewBO;
  605. if (OpNo == 0) {
  606. NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
  607. } else {
  608. NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
  609. }
  610. NewBO->takeName(BO);
  611. return NewBO;
  612. }
  613. Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
  614. User *&UserChainTail,
  615. const DominatorTree *DT) {
  616. ConstantOffsetExtractor Extractor(GEP, DT);
  617. // Find a non-zero constant offset first.
  618. APInt ConstantOffset =
  619. Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
  620. GEP->isInBounds());
  621. if (ConstantOffset == 0) {
  622. UserChainTail = nullptr;
  623. return nullptr;
  624. }
  625. // Separates the constant offset from the GEP index.
  626. Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
  627. UserChainTail = Extractor.UserChain.back();
  628. return IdxWithoutConstOffset;
  629. }
  630. int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
  631. const DominatorTree *DT) {
  632. // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
  633. return ConstantOffsetExtractor(GEP, DT)
  634. .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
  635. GEP->isInBounds())
  636. .getSExtValue();
  637. }
  638. bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
  639. GetElementPtrInst *GEP) {
  640. bool Changed = false;
  641. Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
  642. gep_type_iterator GTI = gep_type_begin(*GEP);
  643. for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
  644. I != E; ++I, ++GTI) {
  645. // Skip struct member indices which must be i32.
  646. if (isa<SequentialType>(*GTI)) {
  647. if ((*I)->getType() != IntPtrTy) {
  648. *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
  649. Changed = true;
  650. }
  651. }
  652. }
  653. return Changed;
  654. }
  655. int64_t
  656. SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
  657. bool &NeedsExtraction) {
  658. NeedsExtraction = false;
  659. int64_t AccumulativeByteOffset = 0;
  660. gep_type_iterator GTI = gep_type_begin(*GEP);
  661. for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
  662. if (isa<SequentialType>(*GTI)) {
  663. // Tries to extract a constant offset from this GEP index.
  664. int64_t ConstantOffset =
  665. ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
  666. if (ConstantOffset != 0) {
  667. NeedsExtraction = true;
  668. // A GEP may have multiple indices. We accumulate the extracted
  669. // constant offset to a byte offset, and later offset the remainder of
  670. // the original GEP with this byte offset.
  671. AccumulativeByteOffset +=
  672. ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
  673. }
  674. } else if (LowerGEP) {
  675. StructType *StTy = cast<StructType>(*GTI);
  676. uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
  677. // Skip field 0 as the offset is always 0.
  678. if (Field != 0) {
  679. NeedsExtraction = true;
  680. AccumulativeByteOffset +=
  681. DL->getStructLayout(StTy)->getElementOffset(Field);
  682. }
  683. }
  684. }
  685. return AccumulativeByteOffset;
  686. }
  687. void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
  688. GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
  689. IRBuilder<> Builder(Variadic);
  690. Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
  691. Type *I8PtrTy =
  692. Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
  693. Value *ResultPtr = Variadic->getOperand(0);
  694. if (ResultPtr->getType() != I8PtrTy)
  695. ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
  696. gep_type_iterator GTI = gep_type_begin(*Variadic);
  697. // Create an ugly GEP for each sequential index. We don't create GEPs for
  698. // structure indices, as they are accumulated in the constant offset index.
  699. for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
  700. if (isa<SequentialType>(*GTI)) {
  701. Value *Idx = Variadic->getOperand(I);
  702. // Skip zero indices.
  703. if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
  704. if (CI->isZero())
  705. continue;
  706. APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
  707. DL->getTypeAllocSize(GTI.getIndexedType()));
  708. // Scale the index by element size.
  709. if (ElementSize != 1) {
  710. if (ElementSize.isPowerOf2()) {
  711. Idx = Builder.CreateShl(
  712. Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
  713. } else {
  714. Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
  715. }
  716. }
  717. // Create an ugly GEP with a single index for each index.
  718. ResultPtr =
  719. Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
  720. }
  721. }
  722. // Create a GEP with the constant offset index.
  723. if (AccumulativeByteOffset != 0) {
  724. Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
  725. ResultPtr =
  726. Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
  727. }
  728. if (ResultPtr->getType() != Variadic->getType())
  729. ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
  730. Variadic->replaceAllUsesWith(ResultPtr);
  731. Variadic->eraseFromParent();
  732. }
  733. void
  734. SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
  735. int64_t AccumulativeByteOffset) {
  736. IRBuilder<> Builder(Variadic);
  737. Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
  738. Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
  739. gep_type_iterator GTI = gep_type_begin(*Variadic);
  740. // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
  741. // don't create arithmetics for structure indices, as they are accumulated
  742. // in the constant offset index.
  743. for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
  744. if (isa<SequentialType>(*GTI)) {
  745. Value *Idx = Variadic->getOperand(I);
  746. // Skip zero indices.
  747. if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
  748. if (CI->isZero())
  749. continue;
  750. APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
  751. DL->getTypeAllocSize(GTI.getIndexedType()));
  752. // Scale the index by element size.
  753. if (ElementSize != 1) {
  754. if (ElementSize.isPowerOf2()) {
  755. Idx = Builder.CreateShl(
  756. Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
  757. } else {
  758. Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
  759. }
  760. }
  761. // Create an ADD for each index.
  762. ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
  763. }
  764. }
  765. // Create an ADD for the constant offset index.
  766. if (AccumulativeByteOffset != 0) {
  767. ResultPtr = Builder.CreateAdd(
  768. ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
  769. }
  770. ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
  771. Variadic->replaceAllUsesWith(ResultPtr);
  772. Variadic->eraseFromParent();
  773. }
  774. bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
  775. // Skip vector GEPs.
  776. if (GEP->getType()->isVectorTy())
  777. return false;
  778. // The backend can already nicely handle the case where all indices are
  779. // constant.
  780. if (GEP->hasAllConstantIndices())
  781. return false;
  782. bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
  783. bool NeedsExtraction;
  784. int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
  785. if (!NeedsExtraction)
  786. return Changed;
  787. // If LowerGEP is disabled, before really splitting the GEP, check whether the
  788. // backend supports the addressing mode we are about to produce. If no, this
  789. // splitting probably won't be beneficial.
  790. // If LowerGEP is enabled, even the extracted constant offset can not match
  791. // the addressing mode, we can still do optimizations to other lowered parts
  792. // of variable indices. Therefore, we don't check for addressing modes in that
  793. // case.
  794. if (!LowerGEP) {
  795. TargetTransformInfo &TTI =
  796. getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
  797. *GEP->getParent()->getParent());
  798. unsigned AddrSpace = GEP->getPointerAddressSpace();
  799. if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
  800. /*BaseGV=*/nullptr, AccumulativeByteOffset,
  801. /*HasBaseReg=*/true, /*Scale=*/0,
  802. AddrSpace)) {
  803. return Changed;
  804. }
  805. }
  806. // Remove the constant offset in each sequential index. The resultant GEP
  807. // computes the variadic base.
  808. // Notice that we don't remove struct field indices here. If LowerGEP is
  809. // disabled, a structure index is not accumulated and we still use the old
  810. // one. If LowerGEP is enabled, a structure index is accumulated in the
  811. // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
  812. // handle the constant offset and won't need a new structure index.
  813. gep_type_iterator GTI = gep_type_begin(*GEP);
  814. for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
  815. if (isa<SequentialType>(*GTI)) {
  816. // Splits this GEP index into a variadic part and a constant offset, and
  817. // uses the variadic part as the new index.
  818. Value *OldIdx = GEP->getOperand(I);
  819. User *UserChainTail;
  820. Value *NewIdx =
  821. ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
  822. if (NewIdx != nullptr) {
  823. // Switches to the index with the constant offset removed.
  824. GEP->setOperand(I, NewIdx);
  825. // After switching to the new index, we can garbage-collect UserChain
  826. // and the old index if they are not used.
  827. RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
  828. RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
  829. }
  830. }
  831. }
  832. // Clear the inbounds attribute because the new index may be off-bound.
  833. // e.g.,
  834. //
  835. // b = add i64 a, 5
  836. // addr = gep inbounds float* p, i64 b
  837. //
  838. // is transformed to:
  839. //
  840. // addr2 = gep float* p, i64 a
  841. // addr = gep float* addr2, i64 5
  842. //
  843. // If a is -4, although the old index b is in bounds, the new index a is
  844. // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
  845. // inbounds keyword is not present, the offsets are added to the base
  846. // address with silently-wrapping two's complement arithmetic".
  847. // Therefore, the final code will be a semantically equivalent.
  848. //
  849. // TODO(jingyue): do some range analysis to keep as many inbounds as
  850. // possible. GEPs with inbounds are more friendly to alias analysis.
  851. GEP->setIsInBounds(false);
  852. // Lowers a GEP to either GEPs with a single index or arithmetic operations.
  853. if (LowerGEP) {
  854. // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
  855. // arithmetic operations if the target uses alias analysis in codegen.
  856. if (TM && TM->getSubtargetImpl(*GEP->getParent()->getParent())->useAA())
  857. lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
  858. else
  859. lowerToArithmetics(GEP, AccumulativeByteOffset);
  860. return true;
  861. }
  862. // No need to create another GEP if the accumulative byte offset is 0.
  863. if (AccumulativeByteOffset == 0)
  864. return true;
  865. // Offsets the base with the accumulative byte offset.
  866. //
  867. // %gep ; the base
  868. // ... %gep ...
  869. //
  870. // => add the offset
  871. //
  872. // %gep2 ; clone of %gep
  873. // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
  874. // %gep ; will be removed
  875. // ... %gep ...
  876. //
  877. // => replace all uses of %gep with %new.gep and remove %gep
  878. //
  879. // %gep2 ; clone of %gep
  880. // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
  881. // ... %new.gep ...
  882. //
  883. // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
  884. // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
  885. // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
  886. // type of %gep.
  887. //
  888. // %gep2 ; clone of %gep
  889. // %0 = bitcast %gep2 to i8*
  890. // %uglygep = gep %0, <offset>
  891. // %new.gep = bitcast %uglygep to <type of %gep>
  892. // ... %new.gep ...
  893. Instruction *NewGEP = GEP->clone();
  894. NewGEP->insertBefore(GEP);
  895. // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
  896. // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
  897. // used with unsigned integers later.
  898. int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
  899. DL->getTypeAllocSize(GEP->getType()->getElementType()));
  900. Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
  901. if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
  902. // Very likely. As long as %gep is natually aligned, the byte offset we
  903. // extracted should be a multiple of sizeof(*%gep).
  904. int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
  905. NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
  906. ConstantInt::get(IntPtrTy, Index, true),
  907. GEP->getName(), GEP);
  908. } else {
  909. // Unlikely but possible. For example,
  910. // #pragma pack(1)
  911. // struct S {
  912. // int a[3];
  913. // int64 b[8];
  914. // };
  915. // #pragma pack()
  916. //
  917. // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
  918. // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
  919. // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
  920. // sizeof(int64).
  921. //
  922. // Emit an uglygep in this case.
  923. Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
  924. GEP->getPointerAddressSpace());
  925. NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
  926. NewGEP = GetElementPtrInst::Create(
  927. Type::getInt8Ty(GEP->getContext()), NewGEP,
  928. ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
  929. GEP);
  930. if (GEP->getType() != I8PtrTy)
  931. NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
  932. }
  933. GEP->replaceAllUsesWith(NewGEP);
  934. GEP->eraseFromParent();
  935. return true;
  936. }
  937. bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
  938. if (skipOptnoneFunction(F))
  939. return false;
  940. if (DisableSeparateConstOffsetFromGEP)
  941. return false;
  942. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  943. bool Changed = false;
  944. for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
  945. for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
  946. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
  947. Changed |= splitGEP(GEP);
  948. }
  949. // No need to split GEP ConstantExprs because all its indices are constant
  950. // already.
  951. }
  952. }
  953. if (VerifyNoDeadCode)
  954. verifyNoDeadCode(F);
  955. return Changed;
  956. }
  957. void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
  958. for (auto &B : F) {
  959. for (auto &I : B) {
  960. if (isInstructionTriviallyDead(&I)) {
  961. std::string ErrMessage;
  962. raw_string_ostream RSO(ErrMessage);
  963. RSO << "Dead instruction detected!\n" << I << "\n";
  964. llvm_unreachable(RSO.str().c_str());
  965. }
  966. }
  967. }
  968. }