InstCombineSimplifyDemanded.cpp 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249
  1. //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains logic for simplifying instructions based on information
  11. // about how they are used.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "InstCombineInternal.h"
  15. #include "llvm/Analysis/ValueTracking.h"
  16. #include "llvm/IR/IntrinsicInst.h"
  17. #include "llvm/IR/PatternMatch.h"
  18. using namespace llvm;
  19. using namespace llvm::PatternMatch;
  20. #define DEBUG_TYPE "instcombine"
  21. /// ShrinkDemandedConstant - Check to see if the specified operand of the
  22. /// specified instruction is a constant integer. If so, check to see if there
  23. /// are any bits set in the constant that are not demanded. If so, shrink the
  24. /// constant and return true.
  25. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
  26. APInt Demanded) {
  27. assert(I && "No instruction?");
  28. assert(OpNo < I->getNumOperands() && "Operand index too large");
  29. // If the operand is not a constant integer, nothing to do.
  30. ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
  31. if (!OpC) return false;
  32. // If there are no bits set that aren't demanded, nothing to do.
  33. Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
  34. if ((~Demanded & OpC->getValue()) == 0)
  35. return false;
  36. // This instruction is producing bits that are not demanded. Shrink the RHS.
  37. Demanded &= OpC->getValue();
  38. I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
  39. return true;
  40. }
  41. /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
  42. /// SimplifyDemandedBits knows about. See if the instruction has any
  43. /// properties that allow us to simplify its operands.
  44. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
  45. unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
  46. APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
  47. APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
  48. Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
  49. 0, &Inst);
  50. if (!V) return false;
  51. if (V == &Inst) return true;
  52. ReplaceInstUsesWith(Inst, V);
  53. return true;
  54. }
  55. /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
  56. /// specified instruction operand if possible, updating it in place. It returns
  57. /// true if it made any change and false otherwise.
  58. bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
  59. APInt &KnownZero, APInt &KnownOne,
  60. unsigned Depth) {
  61. auto *UserI = dyn_cast<Instruction>(U.getUser());
  62. Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
  63. KnownOne, Depth, UserI);
  64. if (!NewVal) return false;
  65. U = NewVal;
  66. return true;
  67. }
  68. /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
  69. /// value based on the demanded bits. When this function is called, it is known
  70. /// that only the bits set in DemandedMask of the result of V are ever used
  71. /// downstream. Consequently, depending on the mask and V, it may be possible
  72. /// to replace V with a constant or one of its operands. In such cases, this
  73. /// function does the replacement and returns true. In all other cases, it
  74. /// returns false after analyzing the expression and setting KnownOne and known
  75. /// to be one in the expression. KnownZero contains all the bits that are known
  76. /// to be zero in the expression. These are provided to potentially allow the
  77. /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
  78. /// the expression. KnownOne and KnownZero always follow the invariant that
  79. /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
  80. /// the bits in KnownOne and KnownZero may only be accurate for those bits set
  81. /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
  82. /// and KnownOne must all be the same.
  83. ///
  84. /// This returns null if it did not change anything and it permits no
  85. /// simplification. This returns V itself if it did some simplification of V's
  86. /// operands based on the information about what bits are demanded. This returns
  87. /// some other non-null value if it found out that V is equal to another value
  88. /// in the context where the specified bits are demanded, but not for all users.
  89. Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
  90. APInt &KnownZero, APInt &KnownOne,
  91. unsigned Depth,
  92. Instruction *CxtI) {
  93. assert(V != nullptr && "Null pointer of Value???");
  94. assert(Depth <= 6 && "Limit Search Depth");
  95. uint32_t BitWidth = DemandedMask.getBitWidth();
  96. Type *VTy = V->getType();
  97. assert(
  98. (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
  99. KnownZero.getBitWidth() == BitWidth &&
  100. KnownOne.getBitWidth() == BitWidth &&
  101. "Value *V, DemandedMask, KnownZero and KnownOne "
  102. "must have same BitWidth");
  103. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
  104. // We know all of the bits for a constant!
  105. KnownOne = CI->getValue() & DemandedMask;
  106. KnownZero = ~KnownOne & DemandedMask;
  107. return nullptr;
  108. }
  109. if (isa<ConstantPointerNull>(V)) {
  110. // We know all of the bits for a constant!
  111. KnownOne.clearAllBits();
  112. KnownZero = DemandedMask;
  113. return nullptr;
  114. }
  115. KnownZero.clearAllBits();
  116. KnownOne.clearAllBits();
  117. if (DemandedMask == 0) { // Not demanding any bits from V.
  118. if (isa<UndefValue>(V))
  119. return nullptr;
  120. return UndefValue::get(VTy);
  121. }
  122. if (Depth == 6) // Limit search depth.
  123. return nullptr;
  124. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
  125. APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
  126. Instruction *I = dyn_cast<Instruction>(V);
  127. if (!I) {
  128. computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
  129. return nullptr; // Only analyze instructions.
  130. }
  131. // If there are multiple uses of this value and we aren't at the root, then
  132. // we can't do any simplifications of the operands, because DemandedMask
  133. // only reflects the bits demanded by *one* of the users.
  134. if (Depth != 0 && !I->hasOneUse()) {
  135. // Despite the fact that we can't simplify this instruction in all User's
  136. // context, we can at least compute the knownzero/knownone bits, and we can
  137. // do simplifications that apply to *just* the one user if we know that
  138. // this instruction has a simpler value in that context.
  139. if (I->getOpcode() == Instruction::And) {
  140. // If either the LHS or the RHS are Zero, the result is zero.
  141. computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
  142. CxtI);
  143. computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
  144. CxtI);
  145. // If all of the demanded bits are known 1 on one side, return the other.
  146. // These bits cannot contribute to the result of the 'and' in this
  147. // context.
  148. if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
  149. (DemandedMask & ~LHSKnownZero))
  150. return I->getOperand(0);
  151. if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
  152. (DemandedMask & ~RHSKnownZero))
  153. return I->getOperand(1);
  154. // If all of the demanded bits in the inputs are known zeros, return zero.
  155. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
  156. return Constant::getNullValue(VTy);
  157. } else if (I->getOpcode() == Instruction::Or) {
  158. // We can simplify (X|Y) -> X or Y in the user's context if we know that
  159. // only bits from X or Y are demanded.
  160. // If either the LHS or the RHS are One, the result is One.
  161. computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
  162. CxtI);
  163. computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
  164. CxtI);
  165. // If all of the demanded bits are known zero on one side, return the
  166. // other. These bits cannot contribute to the result of the 'or' in this
  167. // context.
  168. if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
  169. (DemandedMask & ~LHSKnownOne))
  170. return I->getOperand(0);
  171. if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
  172. (DemandedMask & ~RHSKnownOne))
  173. return I->getOperand(1);
  174. // If all of the potentially set bits on one side are known to be set on
  175. // the other side, just use the 'other' side.
  176. if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
  177. (DemandedMask & (~RHSKnownZero)))
  178. return I->getOperand(0);
  179. if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
  180. (DemandedMask & (~LHSKnownZero)))
  181. return I->getOperand(1);
  182. } else if (I->getOpcode() == Instruction::Xor) {
  183. // We can simplify (X^Y) -> X or Y in the user's context if we know that
  184. // only bits from X or Y are demanded.
  185. computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
  186. CxtI);
  187. computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
  188. CxtI);
  189. // If all of the demanded bits are known zero on one side, return the
  190. // other.
  191. if ((DemandedMask & RHSKnownZero) == DemandedMask)
  192. return I->getOperand(0);
  193. if ((DemandedMask & LHSKnownZero) == DemandedMask)
  194. return I->getOperand(1);
  195. }
  196. // Compute the KnownZero/KnownOne bits to simplify things downstream.
  197. computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
  198. return nullptr;
  199. }
  200. // If this is the root being simplified, allow it to have multiple uses,
  201. // just set the DemandedMask to all bits so that we can try to simplify the
  202. // operands. This allows visitTruncInst (for example) to simplify the
  203. // operand of a trunc without duplicating all the logic below.
  204. if (Depth == 0 && !V->hasOneUse())
  205. DemandedMask = APInt::getAllOnesValue(BitWidth);
  206. switch (I->getOpcode()) {
  207. default:
  208. computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
  209. break;
  210. case Instruction::And:
  211. // If either the LHS or the RHS are Zero, the result is zero.
  212. if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
  213. RHSKnownOne, Depth + 1) ||
  214. SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
  215. LHSKnownZero, LHSKnownOne, Depth + 1))
  216. return I;
  217. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  218. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  219. // If the client is only demanding bits that we know, return the known
  220. // constant.
  221. if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
  222. (RHSKnownOne & LHSKnownOne))) == DemandedMask)
  223. return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
  224. // If all of the demanded bits are known 1 on one side, return the other.
  225. // These bits cannot contribute to the result of the 'and'.
  226. if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
  227. (DemandedMask & ~LHSKnownZero))
  228. return I->getOperand(0);
  229. if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
  230. (DemandedMask & ~RHSKnownZero))
  231. return I->getOperand(1);
  232. // If all of the demanded bits in the inputs are known zeros, return zero.
  233. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
  234. return Constant::getNullValue(VTy);
  235. // If the RHS is a constant, see if we can simplify it.
  236. if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
  237. return I;
  238. // Output known-1 bits are only known if set in both the LHS & RHS.
  239. KnownOne = RHSKnownOne & LHSKnownOne;
  240. // Output known-0 are known to be clear if zero in either the LHS | RHS.
  241. KnownZero = RHSKnownZero | LHSKnownZero;
  242. break;
  243. case Instruction::Or:
  244. // If either the LHS or the RHS are One, the result is One.
  245. if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
  246. RHSKnownOne, Depth + 1) ||
  247. SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
  248. LHSKnownZero, LHSKnownOne, Depth + 1))
  249. return I;
  250. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  251. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  252. // If the client is only demanding bits that we know, return the known
  253. // constant.
  254. if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
  255. (RHSKnownOne | LHSKnownOne))) == DemandedMask)
  256. return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
  257. // If all of the demanded bits are known zero on one side, return the other.
  258. // These bits cannot contribute to the result of the 'or'.
  259. if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
  260. (DemandedMask & ~LHSKnownOne))
  261. return I->getOperand(0);
  262. if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
  263. (DemandedMask & ~RHSKnownOne))
  264. return I->getOperand(1);
  265. // If all of the potentially set bits on one side are known to be set on
  266. // the other side, just use the 'other' side.
  267. if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
  268. (DemandedMask & (~RHSKnownZero)))
  269. return I->getOperand(0);
  270. if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
  271. (DemandedMask & (~LHSKnownZero)))
  272. return I->getOperand(1);
  273. // If the RHS is a constant, see if we can simplify it.
  274. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  275. return I;
  276. // Output known-0 bits are only known if clear in both the LHS & RHS.
  277. KnownZero = RHSKnownZero & LHSKnownZero;
  278. // Output known-1 are known to be set if set in either the LHS | RHS.
  279. KnownOne = RHSKnownOne | LHSKnownOne;
  280. break;
  281. case Instruction::Xor: {
  282. if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
  283. RHSKnownOne, Depth + 1) ||
  284. SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
  285. LHSKnownOne, Depth + 1))
  286. return I;
  287. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  288. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  289. // Output known-0 bits are known if clear or set in both the LHS & RHS.
  290. APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
  291. (RHSKnownOne & LHSKnownOne);
  292. // Output known-1 are known to be set if set in only one of the LHS, RHS.
  293. APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
  294. (RHSKnownOne & LHSKnownZero);
  295. // If the client is only demanding bits that we know, return the known
  296. // constant.
  297. if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
  298. return Constant::getIntegerValue(VTy, IKnownOne);
  299. // If all of the demanded bits are known zero on one side, return the other.
  300. // These bits cannot contribute to the result of the 'xor'.
  301. if ((DemandedMask & RHSKnownZero) == DemandedMask)
  302. return I->getOperand(0);
  303. if ((DemandedMask & LHSKnownZero) == DemandedMask)
  304. return I->getOperand(1);
  305. // If all of the demanded bits are known to be zero on one side or the
  306. // other, turn this into an *inclusive* or.
  307. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  308. if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
  309. Instruction *Or =
  310. BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
  311. I->getName());
  312. return InsertNewInstWith(Or, *I);
  313. }
  314. // If all of the demanded bits on one side are known, and all of the set
  315. // bits on that side are also known to be set on the other side, turn this
  316. // into an AND, as we know the bits will be cleared.
  317. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  318. if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
  319. // all known
  320. if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
  321. Constant *AndC = Constant::getIntegerValue(VTy,
  322. ~RHSKnownOne & DemandedMask);
  323. Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  324. return InsertNewInstWith(And, *I);
  325. }
  326. }
  327. // If the RHS is a constant, see if we can simplify it.
  328. // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
  329. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  330. return I;
  331. // If our LHS is an 'and' and if it has one use, and if any of the bits we
  332. // are flipping are known to be set, then the xor is just resetting those
  333. // bits to zero. We can just knock out bits from the 'and' and the 'xor',
  334. // simplifying both of them.
  335. if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
  336. if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
  337. isa<ConstantInt>(I->getOperand(1)) &&
  338. isa<ConstantInt>(LHSInst->getOperand(1)) &&
  339. (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
  340. ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
  341. ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
  342. APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
  343. Constant *AndC =
  344. ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
  345. Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  346. InsertNewInstWith(NewAnd, *I);
  347. Constant *XorC =
  348. ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
  349. Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
  350. return InsertNewInstWith(NewXor, *I);
  351. }
  352. // Output known-0 bits are known if clear or set in both the LHS & RHS.
  353. KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
  354. // Output known-1 are known to be set if set in only one of the LHS, RHS.
  355. KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
  356. break;
  357. }
  358. case Instruction::Select:
  359. // If this is a select as part of a min/max pattern, don't simplify any
  360. // further in case we break the structure.
  361. Value *LHS, *RHS;
  362. if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
  363. return nullptr;
  364. if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
  365. RHSKnownOne, Depth + 1) ||
  366. SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
  367. LHSKnownOne, Depth + 1))
  368. return I;
  369. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  370. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  371. // If the operands are constants, see if we can simplify them.
  372. if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
  373. ShrinkDemandedConstant(I, 2, DemandedMask))
  374. return I;
  375. // Only known if known in both the LHS and RHS.
  376. KnownOne = RHSKnownOne & LHSKnownOne;
  377. KnownZero = RHSKnownZero & LHSKnownZero;
  378. break;
  379. case Instruction::Trunc: {
  380. unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
  381. DemandedMask = DemandedMask.zext(truncBf);
  382. KnownZero = KnownZero.zext(truncBf);
  383. KnownOne = KnownOne.zext(truncBf);
  384. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
  385. KnownOne, Depth + 1))
  386. return I;
  387. DemandedMask = DemandedMask.trunc(BitWidth);
  388. KnownZero = KnownZero.trunc(BitWidth);
  389. KnownOne = KnownOne.trunc(BitWidth);
  390. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  391. break;
  392. }
  393. case Instruction::BitCast:
  394. if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
  395. return nullptr; // vector->int or fp->int?
  396. if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
  397. if (VectorType *SrcVTy =
  398. dyn_cast<VectorType>(I->getOperand(0)->getType())) {
  399. if (DstVTy->getNumElements() != SrcVTy->getNumElements())
  400. // Don't touch a bitcast between vectors of different element counts.
  401. return nullptr;
  402. } else
  403. // Don't touch a scalar-to-vector bitcast.
  404. return nullptr;
  405. } else if (I->getOperand(0)->getType()->isVectorTy())
  406. // Don't touch a vector-to-scalar bitcast.
  407. return nullptr;
  408. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
  409. KnownOne, Depth + 1))
  410. return I;
  411. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  412. break;
  413. case Instruction::ZExt: {
  414. // Compute the bits in the result that are not present in the input.
  415. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
  416. DemandedMask = DemandedMask.trunc(SrcBitWidth);
  417. KnownZero = KnownZero.trunc(SrcBitWidth);
  418. KnownOne = KnownOne.trunc(SrcBitWidth);
  419. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
  420. KnownOne, Depth + 1))
  421. return I;
  422. DemandedMask = DemandedMask.zext(BitWidth);
  423. KnownZero = KnownZero.zext(BitWidth);
  424. KnownOne = KnownOne.zext(BitWidth);
  425. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  426. // The top bits are known to be zero.
  427. KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
  428. break;
  429. }
  430. case Instruction::SExt: {
  431. // Compute the bits in the result that are not present in the input.
  432. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
  433. APInt InputDemandedBits = DemandedMask &
  434. APInt::getLowBitsSet(BitWidth, SrcBitWidth);
  435. APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
  436. // If any of the sign extended bits are demanded, we know that the sign
  437. // bit is demanded.
  438. if ((NewBits & DemandedMask) != 0)
  439. InputDemandedBits.setBit(SrcBitWidth-1);
  440. InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
  441. KnownZero = KnownZero.trunc(SrcBitWidth);
  442. KnownOne = KnownOne.trunc(SrcBitWidth);
  443. if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
  444. KnownOne, Depth + 1))
  445. return I;
  446. InputDemandedBits = InputDemandedBits.zext(BitWidth);
  447. KnownZero = KnownZero.zext(BitWidth);
  448. KnownOne = KnownOne.zext(BitWidth);
  449. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  450. // If the sign bit of the input is known set or clear, then we know the
  451. // top bits of the result.
  452. // If the input sign bit is known zero, or if the NewBits are not demanded
  453. // convert this into a zero extension.
  454. if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
  455. // Convert to ZExt cast
  456. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
  457. return InsertNewInstWith(NewCast, *I);
  458. } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
  459. KnownOne |= NewBits;
  460. }
  461. break;
  462. }
  463. case Instruction::Add:
  464. case Instruction::Sub: {
  465. /// If the high-bits of an ADD/SUB are not demanded, then we do not care
  466. /// about the high bits of the operands.
  467. unsigned NLZ = DemandedMask.countLeadingZeros();
  468. if (NLZ > 0) {
  469. // Right fill the mask of bits for this ADD/SUB to demand the most
  470. // significant bit and all those below it.
  471. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
  472. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
  473. LHSKnownZero, LHSKnownOne, Depth + 1) ||
  474. ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
  475. SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
  476. LHSKnownZero, LHSKnownOne, Depth + 1)) {
  477. // Disable the nsw and nuw flags here: We can no longer guarantee that
  478. // we won't wrap after simplification. Removing the nsw/nuw flags is
  479. // legal here because the top bit is not demanded.
  480. BinaryOperator &BinOP = *cast<BinaryOperator>(I);
  481. BinOP.setHasNoSignedWrap(false);
  482. BinOP.setHasNoUnsignedWrap(false);
  483. return I;
  484. }
  485. }
  486. // Otherwise just hand the add/sub off to computeKnownBits to fill in
  487. // the known zeros and ones.
  488. computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
  489. break;
  490. }
  491. case Instruction::Shl:
  492. if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
  493. {
  494. Value *VarX; ConstantInt *C1;
  495. if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
  496. Instruction *Shr = cast<Instruction>(I->getOperand(0));
  497. Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
  498. KnownZero, KnownOne);
  499. if (R)
  500. return R;
  501. }
  502. }
  503. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  504. APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
  505. // If the shift is NUW/NSW, then it does demand the high bits.
  506. ShlOperator *IOp = cast<ShlOperator>(I);
  507. if (IOp->hasNoSignedWrap())
  508. DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
  509. else if (IOp->hasNoUnsignedWrap())
  510. DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
  511. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
  512. KnownOne, Depth + 1))
  513. return I;
  514. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  515. KnownZero <<= ShiftAmt;
  516. KnownOne <<= ShiftAmt;
  517. // low bits known zero.
  518. if (ShiftAmt)
  519. KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  520. }
  521. break;
  522. case Instruction::LShr:
  523. // For a logical shift right
  524. if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
  525. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  526. // Unsigned shift right.
  527. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  528. // If the shift is exact, then it does demand the low bits (and knows that
  529. // they are zero).
  530. if (cast<LShrOperator>(I)->isExact())
  531. DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  532. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
  533. KnownOne, Depth + 1))
  534. return I;
  535. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  536. KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
  537. KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
  538. if (ShiftAmt) {
  539. // Compute the new bits that are at the top now.
  540. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
  541. KnownZero |= HighBits; // high bits known zero.
  542. }
  543. }
  544. break;
  545. case Instruction::AShr:
  546. // If this is an arithmetic shift right and only the low-bit is set, we can
  547. // always convert this into a logical shr, even if the shift amount is
  548. // variable. The low bit of the shift cannot be an input sign bit unless
  549. // the shift amount is >= the size of the datatype, which is undefined.
  550. if (DemandedMask == 1) {
  551. // Perform the logical shift right.
  552. Instruction *NewVal = BinaryOperator::CreateLShr(
  553. I->getOperand(0), I->getOperand(1), I->getName());
  554. return InsertNewInstWith(NewVal, *I);
  555. }
  556. // If the sign bit is the only bit demanded by this ashr, then there is no
  557. // need to do it, the shift doesn't change the high bit.
  558. if (DemandedMask.isSignBit())
  559. return I->getOperand(0);
  560. if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
  561. uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  562. // Signed shift right.
  563. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  564. // If any of the "high bits" are demanded, we should set the sign bit as
  565. // demanded.
  566. if (DemandedMask.countLeadingZeros() <= ShiftAmt)
  567. DemandedMaskIn.setBit(BitWidth-1);
  568. // If the shift is exact, then it does demand the low bits (and knows that
  569. // they are zero).
  570. if (cast<AShrOperator>(I)->isExact())
  571. DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  572. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
  573. KnownOne, Depth + 1))
  574. return I;
  575. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  576. // Compute the new bits that are at the top now.
  577. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
  578. KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
  579. KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
  580. // Handle the sign bits.
  581. APInt SignBit(APInt::getSignBit(BitWidth));
  582. // Adjust to where it is now in the mask.
  583. SignBit = APIntOps::lshr(SignBit, ShiftAmt);
  584. // If the input sign bit is known to be zero, or if none of the top bits
  585. // are demanded, turn this into an unsigned shift right.
  586. if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
  587. (HighBits & ~DemandedMask) == HighBits) {
  588. // Perform the logical shift right.
  589. BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
  590. SA, I->getName());
  591. NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
  592. return InsertNewInstWith(NewVal, *I);
  593. } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
  594. KnownOne |= HighBits;
  595. }
  596. }
  597. break;
  598. case Instruction::SRem:
  599. if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
  600. // X % -1 demands all the bits because we don't want to introduce
  601. // INT_MIN % -1 (== undef) by accident.
  602. if (Rem->isAllOnesValue())
  603. break;
  604. APInt RA = Rem->getValue().abs();
  605. if (RA.isPowerOf2()) {
  606. if (DemandedMask.ult(RA)) // srem won't affect demanded bits
  607. return I->getOperand(0);
  608. APInt LowBits = RA - 1;
  609. APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
  610. if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
  611. LHSKnownOne, Depth + 1))
  612. return I;
  613. // The low bits of LHS are unchanged by the srem.
  614. KnownZero = LHSKnownZero & LowBits;
  615. KnownOne = LHSKnownOne & LowBits;
  616. // If LHS is non-negative or has all low bits zero, then the upper bits
  617. // are all zero.
  618. if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
  619. KnownZero |= ~LowBits;
  620. // If LHS is negative and not all low bits are zero, then the upper bits
  621. // are all one.
  622. if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
  623. KnownOne |= ~LowBits;
  624. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  625. }
  626. }
  627. // The sign bit is the LHS's sign bit, except when the result of the
  628. // remainder is zero.
  629. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
  630. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
  631. computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
  632. CxtI);
  633. // If it's known zero, our sign bit is also zero.
  634. if (LHSKnownZero.isNegative())
  635. KnownZero.setBit(KnownZero.getBitWidth() - 1);
  636. }
  637. break;
  638. case Instruction::URem: {
  639. APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
  640. APInt AllOnes = APInt::getAllOnesValue(BitWidth);
  641. if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
  642. KnownOne2, Depth + 1) ||
  643. SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
  644. KnownOne2, Depth + 1))
  645. return I;
  646. unsigned Leaders = KnownZero2.countLeadingOnes();
  647. Leaders = std::max(Leaders,
  648. KnownZero2.countLeadingOnes());
  649. KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
  650. break;
  651. }
  652. case Instruction::Call:
  653. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  654. switch (II->getIntrinsicID()) {
  655. default: break;
  656. case Intrinsic::bswap: {
  657. // If the only bits demanded come from one byte of the bswap result,
  658. // just shift the input byte into position to eliminate the bswap.
  659. unsigned NLZ = DemandedMask.countLeadingZeros();
  660. unsigned NTZ = DemandedMask.countTrailingZeros();
  661. // Round NTZ down to the next byte. If we have 11 trailing zeros, then
  662. // we need all the bits down to bit 8. Likewise, round NLZ. If we
  663. // have 14 leading zeros, round to 8.
  664. NLZ &= ~7;
  665. NTZ &= ~7;
  666. // If we need exactly one byte, we can do this transformation.
  667. if (BitWidth-NLZ-NTZ == 8) {
  668. unsigned ResultBit = NTZ;
  669. unsigned InputBit = BitWidth-NTZ-8;
  670. // Replace this with either a left or right shift to get the byte into
  671. // the right place.
  672. Instruction *NewVal;
  673. if (InputBit > ResultBit)
  674. NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
  675. ConstantInt::get(I->getType(), InputBit-ResultBit));
  676. else
  677. NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
  678. ConstantInt::get(I->getType(), ResultBit-InputBit));
  679. NewVal->takeName(I);
  680. return InsertNewInstWith(NewVal, *I);
  681. }
  682. // TODO: Could compute known zero/one bits based on the input.
  683. break;
  684. }
  685. #if 0 // HLSL Change - remove platform intrinsics
  686. case Intrinsic::x86_sse42_crc32_64_64:
  687. KnownZero = APInt::getHighBitsSet(64, 32);
  688. return nullptr;
  689. #endif // HLSL Change - remove platform intrinsics
  690. }
  691. }
  692. computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
  693. break;
  694. }
  695. // If the client is only demanding bits that we know, return the known
  696. // constant.
  697. if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
  698. return Constant::getIntegerValue(VTy, KnownOne);
  699. return nullptr;
  700. }
  701. /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
  702. /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
  703. /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
  704. /// of "C2-C1".
  705. ///
  706. /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
  707. /// ..., bn}, without considering the specific value X is holding.
  708. /// This transformation is legal iff one of following conditions is hold:
  709. /// 1) All the bit in S are 0, in this case E1 == E2.
  710. /// 2) We don't care those bits in S, per the input DemandedMask.
  711. /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
  712. /// rest bits.
  713. ///
  714. /// Currently we only test condition 2).
  715. ///
  716. /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
  717. /// not successful.
  718. Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
  719. Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
  720. const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
  721. const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
  722. if (!ShlOp1 || !ShrOp1)
  723. return nullptr; // Noop.
  724. Value *VarX = Shr->getOperand(0);
  725. Type *Ty = VarX->getType();
  726. unsigned BitWidth = Ty->getIntegerBitWidth();
  727. if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
  728. return nullptr; // Undef.
  729. unsigned ShlAmt = ShlOp1.getZExtValue();
  730. unsigned ShrAmt = ShrOp1.getZExtValue();
  731. KnownOne.clearAllBits();
  732. KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
  733. KnownZero &= DemandedMask;
  734. APInt BitMask1(APInt::getAllOnesValue(BitWidth));
  735. APInt BitMask2(APInt::getAllOnesValue(BitWidth));
  736. bool isLshr = (Shr->getOpcode() == Instruction::LShr);
  737. BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
  738. (BitMask1.ashr(ShrAmt) << ShlAmt);
  739. if (ShrAmt <= ShlAmt) {
  740. BitMask2 <<= (ShlAmt - ShrAmt);
  741. } else {
  742. BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
  743. BitMask2.ashr(ShrAmt - ShlAmt);
  744. }
  745. // Check if condition-2 (see the comment to this function) is satified.
  746. if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
  747. if (ShrAmt == ShlAmt)
  748. return VarX;
  749. if (!Shr->hasOneUse())
  750. return nullptr;
  751. BinaryOperator *New;
  752. if (ShrAmt < ShlAmt) {
  753. Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
  754. New = BinaryOperator::CreateShl(VarX, Amt);
  755. BinaryOperator *Orig = cast<BinaryOperator>(Shl);
  756. New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
  757. New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
  758. } else {
  759. Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
  760. New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
  761. BinaryOperator::CreateAShr(VarX, Amt);
  762. if (cast<BinaryOperator>(Shr)->isExact())
  763. New->setIsExact(true);
  764. }
  765. return InsertNewInstWith(New, *Shl);
  766. }
  767. return nullptr;
  768. }
  769. /// SimplifyDemandedVectorElts - The specified value produces a vector with
  770. /// any number of elements. DemandedElts contains the set of elements that are
  771. /// actually used by the caller. This method analyzes which elements of the
  772. /// operand are undef and returns that information in UndefElts.
  773. ///
  774. /// If the information about demanded elements can be used to simplify the
  775. /// operation, the operation is simplified, then the resultant value is
  776. /// returned. This returns null if no change was made.
  777. Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
  778. APInt &UndefElts,
  779. unsigned Depth) {
  780. unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
  781. APInt EltMask(APInt::getAllOnesValue(VWidth));
  782. assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
  783. if (isa<UndefValue>(V)) {
  784. // If the entire vector is undefined, just return this info.
  785. UndefElts = EltMask;
  786. return nullptr;
  787. }
  788. if (DemandedElts == 0) { // If nothing is demanded, provide undef.
  789. UndefElts = EltMask;
  790. return UndefValue::get(V->getType());
  791. }
  792. UndefElts = 0;
  793. // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
  794. if (Constant *C = dyn_cast<Constant>(V)) {
  795. // Check if this is identity. If so, return 0 since we are not simplifying
  796. // anything.
  797. if (DemandedElts.isAllOnesValue())
  798. return nullptr;
  799. Type *EltTy = cast<VectorType>(V->getType())->getElementType();
  800. Constant *Undef = UndefValue::get(EltTy);
  801. SmallVector<Constant*, 16> Elts;
  802. for (unsigned i = 0; i != VWidth; ++i) {
  803. if (!DemandedElts[i]) { // If not demanded, set to undef.
  804. Elts.push_back(Undef);
  805. UndefElts.setBit(i);
  806. continue;
  807. }
  808. Constant *Elt = C->getAggregateElement(i);
  809. if (!Elt) return nullptr;
  810. if (isa<UndefValue>(Elt)) { // Already undef.
  811. Elts.push_back(Undef);
  812. UndefElts.setBit(i);
  813. } else { // Otherwise, defined.
  814. Elts.push_back(Elt);
  815. }
  816. }
  817. // If we changed the constant, return it.
  818. Constant *NewCV = ConstantVector::get(Elts);
  819. return NewCV != C ? NewCV : nullptr;
  820. }
  821. // Limit search depth.
  822. if (Depth == 10)
  823. return nullptr;
  824. // If multiple users are using the root value, proceed with
  825. // simplification conservatively assuming that all elements
  826. // are needed.
  827. if (!V->hasOneUse()) {
  828. // Quit if we find multiple users of a non-root value though.
  829. // They'll be handled when it's their turn to be visited by
  830. // the main instcombine process.
  831. if (Depth != 0)
  832. // TODO: Just compute the UndefElts information recursively.
  833. return nullptr;
  834. // Conservatively assume that all elements are needed.
  835. DemandedElts = EltMask;
  836. }
  837. Instruction *I = dyn_cast<Instruction>(V);
  838. if (!I) return nullptr; // Only analyze instructions.
  839. bool MadeChange = false;
  840. APInt UndefElts2(VWidth, 0);
  841. Value *TmpV;
  842. switch (I->getOpcode()) {
  843. default: break;
  844. case Instruction::InsertElement: {
  845. // If this is a variable index, we don't know which element it overwrites.
  846. // demand exactly the same input as we produce.
  847. ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
  848. if (!Idx) {
  849. // Note that we can't propagate undef elt info, because we don't know
  850. // which elt is getting updated.
  851. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
  852. UndefElts2, Depth + 1);
  853. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  854. break;
  855. }
  856. // If this is inserting an element that isn't demanded, remove this
  857. // insertelement.
  858. unsigned IdxNo = Idx->getZExtValue();
  859. if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
  860. Worklist.Add(I);
  861. return I->getOperand(0);
  862. }
  863. // Otherwise, the element inserted overwrites whatever was there, so the
  864. // input demanded set is simpler than the output set.
  865. APInt DemandedElts2 = DemandedElts;
  866. DemandedElts2.clearBit(IdxNo);
  867. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
  868. UndefElts, Depth + 1);
  869. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  870. // The inserted element is defined.
  871. UndefElts.clearBit(IdxNo);
  872. break;
  873. }
  874. case Instruction::ShuffleVector: {
  875. ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
  876. uint64_t LHSVWidth =
  877. cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
  878. APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
  879. for (unsigned i = 0; i < VWidth; i++) {
  880. if (DemandedElts[i]) {
  881. unsigned MaskVal = Shuffle->getMaskValue(i);
  882. if (MaskVal != -1u) {
  883. assert(MaskVal < LHSVWidth * 2 &&
  884. "shufflevector mask index out of range!");
  885. if (MaskVal < LHSVWidth)
  886. LeftDemanded.setBit(MaskVal);
  887. else
  888. RightDemanded.setBit(MaskVal - LHSVWidth);
  889. }
  890. }
  891. }
  892. APInt UndefElts4(LHSVWidth, 0);
  893. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
  894. UndefElts4, Depth + 1);
  895. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  896. APInt UndefElts3(LHSVWidth, 0);
  897. TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
  898. UndefElts3, Depth + 1);
  899. if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
  900. bool NewUndefElts = false;
  901. for (unsigned i = 0; i < VWidth; i++) {
  902. unsigned MaskVal = Shuffle->getMaskValue(i);
  903. if (MaskVal == -1u) {
  904. UndefElts.setBit(i);
  905. } else if (!DemandedElts[i]) {
  906. NewUndefElts = true;
  907. UndefElts.setBit(i);
  908. } else if (MaskVal < LHSVWidth) {
  909. if (UndefElts4[MaskVal]) {
  910. NewUndefElts = true;
  911. UndefElts.setBit(i);
  912. }
  913. } else {
  914. if (UndefElts3[MaskVal - LHSVWidth]) {
  915. NewUndefElts = true;
  916. UndefElts.setBit(i);
  917. }
  918. }
  919. }
  920. if (NewUndefElts) {
  921. // Add additional discovered undefs.
  922. SmallVector<Constant*, 16> Elts;
  923. for (unsigned i = 0; i < VWidth; ++i) {
  924. if (UndefElts[i])
  925. Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
  926. else
  927. Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
  928. Shuffle->getMaskValue(i)));
  929. }
  930. I->setOperand(2, ConstantVector::get(Elts));
  931. MadeChange = true;
  932. }
  933. break;
  934. }
  935. case Instruction::Select: {
  936. APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
  937. if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
  938. for (unsigned i = 0; i < VWidth; i++) {
  939. if (CV->getAggregateElement(i)->isNullValue())
  940. LeftDemanded.clearBit(i);
  941. else
  942. RightDemanded.clearBit(i);
  943. }
  944. }
  945. TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
  946. Depth + 1);
  947. if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
  948. TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
  949. UndefElts2, Depth + 1);
  950. if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
  951. // Output elements are undefined if both are undefined.
  952. UndefElts &= UndefElts2;
  953. break;
  954. }
  955. case Instruction::BitCast: {
  956. // Vector->vector casts only.
  957. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
  958. if (!VTy) break;
  959. unsigned InVWidth = VTy->getNumElements();
  960. APInt InputDemandedElts(InVWidth, 0);
  961. unsigned Ratio;
  962. if (VWidth == InVWidth) {
  963. // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
  964. // elements as are demanded of us.
  965. Ratio = 1;
  966. InputDemandedElts = DemandedElts;
  967. } else if (VWidth > InVWidth) {
  968. // Untested so far.
  969. break;
  970. // If there are more elements in the result than there are in the source,
  971. // then an input element is live if any of the corresponding output
  972. // elements are live.
  973. Ratio = VWidth/InVWidth;
  974. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
  975. if (DemandedElts[OutIdx])
  976. InputDemandedElts.setBit(OutIdx/Ratio);
  977. }
  978. } else {
  979. // Untested so far.
  980. break;
  981. // If there are more elements in the source than there are in the result,
  982. // then an input element is live if the corresponding output element is
  983. // live.
  984. Ratio = InVWidth/VWidth;
  985. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
  986. if (DemandedElts[InIdx/Ratio])
  987. InputDemandedElts.setBit(InIdx);
  988. }
  989. // div/rem demand all inputs, because they don't want divide by zero.
  990. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
  991. UndefElts2, Depth + 1);
  992. if (TmpV) {
  993. I->setOperand(0, TmpV);
  994. MadeChange = true;
  995. }
  996. UndefElts = UndefElts2;
  997. if (VWidth > InVWidth) {
  998. llvm_unreachable("Unimp");
  999. // If there are more elements in the result than there are in the source,
  1000. // then an output element is undef if the corresponding input element is
  1001. // undef.
  1002. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
  1003. if (UndefElts2[OutIdx/Ratio])
  1004. UndefElts.setBit(OutIdx);
  1005. } else if (VWidth < InVWidth) {
  1006. llvm_unreachable("Unimp");
  1007. // If there are more elements in the source than there are in the result,
  1008. // then a result element is undef if all of the corresponding input
  1009. // elements are undef.
  1010. UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
  1011. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
  1012. if (!UndefElts2[InIdx]) // Not undef?
  1013. UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
  1014. }
  1015. break;
  1016. }
  1017. case Instruction::And:
  1018. case Instruction::Or:
  1019. case Instruction::Xor:
  1020. case Instruction::Add:
  1021. case Instruction::Sub:
  1022. case Instruction::Mul:
  1023. // div/rem demand all inputs, because they don't want divide by zero.
  1024. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
  1025. Depth + 1);
  1026. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  1027. TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
  1028. UndefElts2, Depth + 1);
  1029. if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
  1030. // Output elements are undefined if both are undefined. Consider things
  1031. // like undef&0. The result is known zero, not undef.
  1032. UndefElts &= UndefElts2;
  1033. break;
  1034. case Instruction::FPTrunc:
  1035. case Instruction::FPExt:
  1036. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
  1037. Depth + 1);
  1038. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  1039. break;
  1040. case Instruction::Call: {
  1041. IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
  1042. if (!II) break;
  1043. #if 0 // HLSL Change - remove platform intrinsics
  1044. switch (II->getIntrinsicID()) {
  1045. default: break;
  1046. // Binary vector operations that work column-wise. A dest element is a
  1047. // function of the corresponding input elements from the two inputs.
  1048. case Intrinsic::x86_sse_sub_ss:
  1049. case Intrinsic::x86_sse_mul_ss:
  1050. case Intrinsic::x86_sse_min_ss:
  1051. case Intrinsic::x86_sse_max_ss:
  1052. case Intrinsic::x86_sse2_sub_sd:
  1053. case Intrinsic::x86_sse2_mul_sd:
  1054. case Intrinsic::x86_sse2_min_sd:
  1055. case Intrinsic::x86_sse2_max_sd:
  1056. TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
  1057. UndefElts, Depth + 1);
  1058. if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
  1059. TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
  1060. UndefElts2, Depth + 1);
  1061. if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
  1062. // If only the low elt is demanded and this is a scalarizable intrinsic,
  1063. // scalarize it now.
  1064. if (DemandedElts == 1) {
  1065. switch (II->getIntrinsicID()) {
  1066. default: break;
  1067. case Intrinsic::x86_sse_sub_ss:
  1068. case Intrinsic::x86_sse_mul_ss:
  1069. case Intrinsic::x86_sse2_sub_sd:
  1070. case Intrinsic::x86_sse2_mul_sd:
  1071. // TODO: Lower MIN/MAX/ABS/etc
  1072. Value *LHS = II->getArgOperand(0);
  1073. Value *RHS = II->getArgOperand(1);
  1074. // Extract the element as scalars.
  1075. LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
  1076. ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
  1077. RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
  1078. ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
  1079. switch (II->getIntrinsicID()) {
  1080. default: llvm_unreachable("Case stmts out of sync!");
  1081. case Intrinsic::x86_sse_sub_ss:
  1082. case Intrinsic::x86_sse2_sub_sd:
  1083. TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
  1084. II->getName()), *II);
  1085. break;
  1086. case Intrinsic::x86_sse_mul_ss:
  1087. case Intrinsic::x86_sse2_mul_sd:
  1088. TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
  1089. II->getName()), *II);
  1090. break;
  1091. }
  1092. Instruction *New =
  1093. InsertElementInst::Create(
  1094. UndefValue::get(II->getType()), TmpV,
  1095. ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
  1096. II->getName());
  1097. InsertNewInstWith(New, *II);
  1098. return New;
  1099. }
  1100. }
  1101. // Output elements are undefined if both are undefined. Consider things
  1102. // like undef&0. The result is known zero, not undef.
  1103. UndefElts &= UndefElts2;
  1104. break;
  1105. }
  1106. #endif // HLSL Change - remove platform intrinsics
  1107. break;
  1108. }
  1109. }
  1110. return MadeChange ? I : nullptr;
  1111. }