BDCE.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. //===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the Bit-Tracking Dead Code Elimination pass. Some
  11. // instructions (shifts, some ands, ors, etc.) kill some of their input bits.
  12. // We track these dead bits and remove instructions that compute only these
  13. // dead bits.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/Transforms/Scalar.h"
  17. #include "llvm/ADT/DenseMap.h"
  18. #include "llvm/ADT/DepthFirstIterator.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/Analysis/AssumptionCache.h"
  23. #include "llvm/Analysis/ValueTracking.h"
  24. #include "llvm/IR/BasicBlock.h"
  25. #include "llvm/IR/CFG.h"
  26. #include "llvm/IR/DataLayout.h"
  27. #include "llvm/IR/Dominators.h"
  28. #include "llvm/IR/InstIterator.h"
  29. #include "llvm/IR/Instructions.h"
  30. #include "llvm/IR/IntrinsicInst.h"
  31. #include "llvm/IR/Module.h"
  32. #include "llvm/IR/Operator.h"
  33. #include "llvm/Pass.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/raw_ostream.h"
  36. using namespace llvm;
  37. #define DEBUG_TYPE "bdce"
  38. STATISTIC(NumRemoved, "Number of instructions removed (unused)");
  39. STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
  40. namespace {
  41. struct BDCE : public FunctionPass {
  42. static char ID; // Pass identification, replacement for typeid
  43. BDCE() : FunctionPass(ID) {
  44. initializeBDCEPass(*PassRegistry::getPassRegistry());
  45. }
  46. bool runOnFunction(Function& F) override;
  47. void getAnalysisUsage(AnalysisUsage& AU) const override {
  48. AU.setPreservesCFG();
  49. AU.addRequired<AssumptionCacheTracker>();
  50. AU.addRequired<DominatorTreeWrapperPass>();
  51. }
  52. void determineLiveOperandBits(const Instruction *UserI,
  53. const Instruction *I, unsigned OperandNo,
  54. const APInt &AOut, APInt &AB,
  55. APInt &KnownZero, APInt &KnownOne,
  56. APInt &KnownZero2, APInt &KnownOne2);
  57. AssumptionCache *AC;
  58. DominatorTree *DT;
  59. };
  60. }
  61. char BDCE::ID = 0;
  62. INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
  63. false, false)
  64. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  65. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  66. INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
  67. false, false)
  68. static bool isAlwaysLive(Instruction *I) {
  69. return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
  70. isa<LandingPadInst>(I) || I->mayHaveSideEffects();
  71. }
  72. void BDCE::determineLiveOperandBits(const Instruction *UserI,
  73. const Instruction *I, unsigned OperandNo,
  74. const APInt &AOut, APInt &AB,
  75. APInt &KnownZero, APInt &KnownOne,
  76. APInt &KnownZero2, APInt &KnownOne2) {
  77. unsigned BitWidth = AB.getBitWidth();
  78. // We're called once per operand, but for some instructions, we need to
  79. // compute known bits of both operands in order to determine the live bits of
  80. // either (when both operands are instructions themselves). We don't,
  81. // however, want to do this twice, so we cache the result in APInts that live
  82. // in the caller. For the two-relevant-operands case, both operand values are
  83. // provided here.
  84. auto ComputeKnownBits =
  85. [&](unsigned BitWidth, const Value *V1, const Value *V2) {
  86. const DataLayout &DL = I->getModule()->getDataLayout();
  87. KnownZero = APInt(BitWidth, 0);
  88. KnownOne = APInt(BitWidth, 0);
  89. computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
  90. AC, UserI, DT);
  91. if (V2) {
  92. KnownZero2 = APInt(BitWidth, 0);
  93. KnownOne2 = APInt(BitWidth, 0);
  94. computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
  95. 0, AC, UserI, DT);
  96. }
  97. };
  98. switch (UserI->getOpcode()) {
  99. default: break;
  100. case Instruction::Call:
  101. case Instruction::Invoke:
  102. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
  103. switch (II->getIntrinsicID()) {
  104. default: break;
  105. case Intrinsic::bswap:
  106. // The alive bits of the input are the swapped alive bits of
  107. // the output.
  108. AB = AOut.byteSwap();
  109. break;
  110. case Intrinsic::ctlz:
  111. if (OperandNo == 0) {
  112. // We need some output bits, so we need all bits of the
  113. // input to the left of, and including, the leftmost bit
  114. // known to be one.
  115. ComputeKnownBits(BitWidth, I, nullptr);
  116. AB = APInt::getHighBitsSet(BitWidth,
  117. std::min(BitWidth, KnownOne.countLeadingZeros()+1));
  118. }
  119. break;
  120. case Intrinsic::cttz:
  121. if (OperandNo == 0) {
  122. // We need some output bits, so we need all bits of the
  123. // input to the right of, and including, the rightmost bit
  124. // known to be one.
  125. ComputeKnownBits(BitWidth, I, nullptr);
  126. AB = APInt::getLowBitsSet(BitWidth,
  127. std::min(BitWidth, KnownOne.countTrailingZeros()+1));
  128. }
  129. break;
  130. }
  131. break;
  132. case Instruction::Add:
  133. case Instruction::Sub:
  134. // Find the highest live output bit. We don't need any more input
  135. // bits than that (adds, and thus subtracts, ripple only to the
  136. // left).
  137. AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
  138. break;
  139. case Instruction::Shl:
  140. if (OperandNo == 0)
  141. if (ConstantInt *CI =
  142. dyn_cast<ConstantInt>(UserI->getOperand(1))) {
  143. uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
  144. AB = AOut.lshr(ShiftAmt);
  145. // If the shift is nuw/nsw, then the high bits are not dead
  146. // (because we've promised that they *must* be zero).
  147. const ShlOperator *S = cast<ShlOperator>(UserI);
  148. if (S->hasNoSignedWrap())
  149. AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
  150. else if (S->hasNoUnsignedWrap())
  151. AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
  152. }
  153. break;
  154. case Instruction::LShr:
  155. if (OperandNo == 0)
  156. if (ConstantInt *CI =
  157. dyn_cast<ConstantInt>(UserI->getOperand(1))) {
  158. uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
  159. AB = AOut.shl(ShiftAmt);
  160. // If the shift is exact, then the low bits are not dead
  161. // (they must be zero).
  162. if (cast<LShrOperator>(UserI)->isExact())
  163. AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  164. }
  165. break;
  166. case Instruction::AShr:
  167. if (OperandNo == 0)
  168. if (ConstantInt *CI =
  169. dyn_cast<ConstantInt>(UserI->getOperand(1))) {
  170. uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
  171. AB = AOut.shl(ShiftAmt);
  172. // Because the high input bit is replicated into the
  173. // high-order bits of the result, if we need any of those
  174. // bits, then we must keep the highest input bit.
  175. if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
  176. .getBoolValue())
  177. AB.setBit(BitWidth-1);
  178. // If the shift is exact, then the low bits are not dead
  179. // (they must be zero).
  180. if (cast<AShrOperator>(UserI)->isExact())
  181. AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  182. }
  183. break;
  184. case Instruction::And:
  185. AB = AOut;
  186. // For bits that are known zero, the corresponding bits in the
  187. // other operand are dead (unless they're both zero, in which
  188. // case they can't both be dead, so just mark the LHS bits as
  189. // dead).
  190. if (OperandNo == 0) {
  191. ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
  192. AB &= ~KnownZero2;
  193. } else {
  194. if (!isa<Instruction>(UserI->getOperand(0)))
  195. ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
  196. AB &= ~(KnownZero & ~KnownZero2);
  197. }
  198. break;
  199. case Instruction::Or:
  200. AB = AOut;
  201. // For bits that are known one, the corresponding bits in the
  202. // other operand are dead (unless they're both one, in which
  203. // case they can't both be dead, so just mark the LHS bits as
  204. // dead).
  205. if (OperandNo == 0) {
  206. ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
  207. AB &= ~KnownOne2;
  208. } else {
  209. if (!isa<Instruction>(UserI->getOperand(0)))
  210. ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
  211. AB &= ~(KnownOne & ~KnownOne2);
  212. }
  213. break;
  214. case Instruction::Xor:
  215. case Instruction::PHI:
  216. AB = AOut;
  217. break;
  218. case Instruction::Trunc:
  219. AB = AOut.zext(BitWidth);
  220. break;
  221. case Instruction::ZExt:
  222. AB = AOut.trunc(BitWidth);
  223. break;
  224. case Instruction::SExt:
  225. AB = AOut.trunc(BitWidth);
  226. // Because the high input bit is replicated into the
  227. // high-order bits of the result, if we need any of those
  228. // bits, then we must keep the highest input bit.
  229. if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
  230. AOut.getBitWidth() - BitWidth))
  231. .getBoolValue())
  232. AB.setBit(BitWidth-1);
  233. break;
  234. case Instruction::Select:
  235. if (OperandNo != 0)
  236. AB = AOut;
  237. break;
  238. }
  239. }
  240. bool BDCE::runOnFunction(Function& F) {
  241. if (skipOptnoneFunction(F))
  242. return false;
  243. AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  244. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  245. DenseMap<Instruction *, APInt> AliveBits;
  246. SmallVector<Instruction*, 128> Worklist;
  247. // The set of visited instructions (non-integer-typed only).
  248. SmallPtrSet<Instruction*, 128> Visited;
  249. // Collect the set of "root" instructions that are known live.
  250. for (Instruction &I : inst_range(F)) {
  251. if (!isAlwaysLive(&I))
  252. continue;
  253. DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
  254. // For integer-valued instructions, set up an initial empty set of alive
  255. // bits and add the instruction to the work list. For other instructions
  256. // add their operands to the work list (for integer values operands, mark
  257. // all bits as live).
  258. if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
  259. if (!AliveBits.count(&I)) {
  260. AliveBits[&I] = APInt(IT->getBitWidth(), 0);
  261. Worklist.push_back(&I);
  262. }
  263. continue;
  264. }
  265. // Non-integer-typed instructions...
  266. for (Use &OI : I.operands()) {
  267. if (Instruction *J = dyn_cast<Instruction>(OI)) {
  268. if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
  269. AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
  270. Worklist.push_back(J);
  271. }
  272. }
  273. // To save memory, we don't add I to the Visited set here. Instead, we
  274. // check isAlwaysLive on every instruction when searching for dead
  275. // instructions later (we need to check isAlwaysLive for the
  276. // integer-typed instructions anyway).
  277. }
  278. // Propagate liveness backwards to operands.
  279. while (!Worklist.empty()) {
  280. Instruction *UserI = Worklist.pop_back_val();
  281. DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
  282. APInt AOut;
  283. if (UserI->getType()->isIntegerTy()) {
  284. AOut = AliveBits[UserI];
  285. DEBUG(dbgs() << " Alive Out: " << AOut);
  286. }
  287. DEBUG(dbgs() << "\n");
  288. if (!UserI->getType()->isIntegerTy())
  289. Visited.insert(UserI);
  290. APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
  291. // Compute the set of alive bits for each operand. These are anded into the
  292. // existing set, if any, and if that changes the set of alive bits, the
  293. // operand is added to the work-list.
  294. for (Use &OI : UserI->operands()) {
  295. if (Instruction *I = dyn_cast<Instruction>(OI)) {
  296. if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
  297. unsigned BitWidth = IT->getBitWidth();
  298. APInt AB = APInt::getAllOnesValue(BitWidth);
  299. if (UserI->getType()->isIntegerTy() && !AOut &&
  300. !isAlwaysLive(UserI)) {
  301. AB = APInt(BitWidth, 0);
  302. } else {
  303. // If all bits of the output are dead, then all bits of the input
  304. // Bits of each operand that are used to compute alive bits of the
  305. // output are alive, all others are dead.
  306. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
  307. KnownZero, KnownOne,
  308. KnownZero2, KnownOne2);
  309. }
  310. // If we've added to the set of alive bits (or the operand has not
  311. // been previously visited), then re-queue the operand to be visited
  312. // again.
  313. APInt ABPrev(BitWidth, 0);
  314. auto ABI = AliveBits.find(I);
  315. if (ABI != AliveBits.end())
  316. ABPrev = ABI->second;
  317. APInt ABNew = AB | ABPrev;
  318. if (ABNew != ABPrev || ABI == AliveBits.end()) {
  319. AliveBits[I] = std::move(ABNew);
  320. Worklist.push_back(I);
  321. }
  322. } else if (!Visited.count(I)) {
  323. Worklist.push_back(I);
  324. }
  325. }
  326. }
  327. }
  328. bool Changed = false;
  329. // The inverse of the live set is the dead set. These are those instructions
  330. // which have no side effects and do not influence the control flow or return
  331. // value of the function, and may therefore be deleted safely.
  332. // NOTE: We reuse the Worklist vector here for memory efficiency.
  333. for (Instruction &I : inst_range(F)) {
  334. // For live instructions that have all dead bits, first make them dead by
  335. // replacing all uses with something else. Then, if they don't need to
  336. // remain live (because they have side effects, etc.) we can remove them.
  337. if (I.getType()->isIntegerTy()) {
  338. auto ABI = AliveBits.find(&I);
  339. if (ABI != AliveBits.end()) {
  340. if (ABI->second.getBoolValue())
  341. continue;
  342. DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
  343. // FIXME: In theory we could substitute undef here instead of zero.
  344. // This should be reconsidered once we settle on the semantics of
  345. // undef, poison, etc.
  346. Value *Zero = ConstantInt::get(I.getType(), 0);
  347. ++NumSimplified;
  348. I.replaceAllUsesWith(Zero);
  349. Changed = true;
  350. }
  351. } else if (Visited.count(&I)) {
  352. continue;
  353. }
  354. if (isAlwaysLive(&I))
  355. continue;
  356. Worklist.push_back(&I);
  357. I.dropAllReferences();
  358. Changed = true;
  359. }
  360. for (Instruction *&I : Worklist) {
  361. ++NumRemoved;
  362. I->eraseFromParent();
  363. }
  364. return Changed;
  365. }
  366. FunctionPass *llvm::createBitTrackingDCEPass() {
  367. return new BDCE();
  368. }