BypassSlowDivision.cpp 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains an optimization for div and rem on architectures that
  11. // execute short instructions significantly faster than longer instructions.
  12. // For example, on Intel Atom 32-bit divides are slow enough that during
  13. // runtime it is profitable to check the value of the operands, and if they are
  14. // positive and less than 256 use an unsigned 8-bit divide.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/Transforms/Utils/BypassSlowDivision.h"
  18. #include "llvm/ADT/DenseMap.h"
  19. #include "llvm/IR/Function.h"
  20. #include "llvm/IR/IRBuilder.h"
  21. #include "llvm/IR/Instructions.h"
  22. using namespace llvm;
  23. #define DEBUG_TYPE "bypass-slow-division"
  24. namespace {
  25. struct DivOpInfo {
  26. bool SignedOp;
  27. Value *Dividend;
  28. Value *Divisor;
  29. DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
  30. : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
  31. };
  32. struct DivPhiNodes {
  33. PHINode *Quotient;
  34. PHINode *Remainder;
  35. DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
  36. : Quotient(InQuotient), Remainder(InRemainder) {}
  37. };
  38. }
  39. namespace llvm {
  40. template<>
  41. struct DenseMapInfo<DivOpInfo> {
  42. static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
  43. return Val1.SignedOp == Val2.SignedOp &&
  44. Val1.Dividend == Val2.Dividend &&
  45. Val1.Divisor == Val2.Divisor;
  46. }
  47. static DivOpInfo getEmptyKey() {
  48. return DivOpInfo(false, nullptr, nullptr);
  49. }
  50. static DivOpInfo getTombstoneKey() {
  51. return DivOpInfo(true, nullptr, nullptr);
  52. }
  53. static unsigned getHashValue(const DivOpInfo &Val) {
  54. return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
  55. reinterpret_cast<uintptr_t>(Val.Divisor)) ^
  56. (unsigned)Val.SignedOp;
  57. }
  58. };
  59. typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
  60. }
  61. // insertFastDiv - Substitutes the div/rem instruction with code that checks the
  62. // value of the operands and uses a shorter-faster div/rem instruction when
  63. // possible and the longer-slower div/rem instruction otherwise.
  64. static bool insertFastDiv(Function &F,
  65. Function::iterator &I,
  66. BasicBlock::iterator &J,
  67. IntegerType *BypassType,
  68. bool UseDivOp,
  69. bool UseSignedOp,
  70. DivCacheTy &PerBBDivCache) {
  71. // Get instruction operands
  72. Instruction *Instr = J;
  73. Value *Dividend = Instr->getOperand(0);
  74. Value *Divisor = Instr->getOperand(1);
  75. if (isa<ConstantInt>(Divisor) ||
  76. (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
  77. // Operations with immediate values should have
  78. // been solved and replaced during compile time.
  79. return false;
  80. }
  81. // Basic Block is split before divide
  82. BasicBlock *MainBB = I;
  83. BasicBlock *SuccessorBB = I->splitBasicBlock(J);
  84. ++I; //advance iterator I to successorBB
  85. // Add new basic block for slow divide operation
  86. BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
  87. MainBB->getParent(), SuccessorBB);
  88. SlowBB->moveBefore(SuccessorBB);
  89. IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
  90. Value *SlowQuotientV;
  91. Value *SlowRemainderV;
  92. if (UseSignedOp) {
  93. SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
  94. SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
  95. } else {
  96. SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
  97. SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
  98. }
  99. SlowBuilder.CreateBr(SuccessorBB);
  100. // Add new basic block for fast divide operation
  101. BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
  102. MainBB->getParent(), SuccessorBB);
  103. FastBB->moveBefore(SlowBB);
  104. IRBuilder<> FastBuilder(FastBB, FastBB->begin());
  105. Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
  106. BypassType);
  107. Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
  108. BypassType);
  109. // udiv/urem because optimization only handles positive numbers
  110. Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
  111. ShortDivisorV);
  112. Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
  113. ShortDivisorV);
  114. Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
  115. ShortQuotientV,
  116. Dividend->getType());
  117. Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
  118. ShortRemainderV,
  119. Dividend->getType());
  120. FastBuilder.CreateBr(SuccessorBB);
  121. // Phi nodes for result of div and rem
  122. IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
  123. PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
  124. QuoPhi->addIncoming(SlowQuotientV, SlowBB);
  125. QuoPhi->addIncoming(FastQuotientV, FastBB);
  126. PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
  127. RemPhi->addIncoming(SlowRemainderV, SlowBB);
  128. RemPhi->addIncoming(FastRemainderV, FastBB);
  129. // Replace Instr with appropriate phi node
  130. if (UseDivOp)
  131. Instr->replaceAllUsesWith(QuoPhi);
  132. else
  133. Instr->replaceAllUsesWith(RemPhi);
  134. Instr->eraseFromParent();
  135. // Combine operands into a single value with OR for value testing below
  136. MainBB->getInstList().back().eraseFromParent();
  137. IRBuilder<> MainBuilder(MainBB, MainBB->end());
  138. Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
  139. // BitMask is inverted to check if the operands are
  140. // larger than the bypass type
  141. uint64_t BitMask = ~BypassType->getBitMask();
  142. Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
  143. // Compare operand values and branch
  144. Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
  145. Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
  146. MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
  147. // point iterator J at first instruction of successorBB
  148. J = I->begin();
  149. // Cache phi nodes to be used later in place of other instances
  150. // of div or rem with the same sign, dividend, and divisor
  151. DivOpInfo Key(UseSignedOp, Dividend, Divisor);
  152. DivPhiNodes Value(QuoPhi, RemPhi);
  153. PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
  154. return true;
  155. }
  156. // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
  157. // operands and operation are identical. Otherwise call insertFastDiv to perform
  158. // the optimization and cache the resulting dividend and remainder.
  159. static bool reuseOrInsertFastDiv(Function &F,
  160. Function::iterator &I,
  161. BasicBlock::iterator &J,
  162. IntegerType *BypassType,
  163. bool UseDivOp,
  164. bool UseSignedOp,
  165. DivCacheTy &PerBBDivCache) {
  166. // Get instruction operands
  167. Instruction *Instr = J;
  168. DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
  169. DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
  170. if (CacheI == PerBBDivCache.end()) {
  171. // If previous instance does not exist, insert fast div
  172. return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
  173. PerBBDivCache);
  174. }
  175. // Replace operation value with previously generated phi node
  176. DivPhiNodes &Value = CacheI->second;
  177. if (UseDivOp) {
  178. // Replace all uses of div instruction with quotient phi node
  179. J->replaceAllUsesWith(Value.Quotient);
  180. } else {
  181. // Replace all uses of rem instruction with remainder phi node
  182. J->replaceAllUsesWith(Value.Remainder);
  183. }
  184. // Advance to next operation
  185. ++J;
  186. // Remove redundant operation
  187. Instr->eraseFromParent();
  188. return true;
  189. }
  190. // bypassSlowDivision - This optimization identifies DIV instructions that can
  191. // be profitably bypassed and carried out with a shorter, faster divide.
  192. bool llvm::bypassSlowDivision(Function &F,
  193. Function::iterator &I,
  194. const DenseMap<unsigned int, unsigned int> &BypassWidths) {
  195. DivCacheTy DivCache;
  196. bool MadeChange = false;
  197. for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
  198. // Get instruction details
  199. unsigned Opcode = J->getOpcode();
  200. bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
  201. bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
  202. bool UseSignedOp = Opcode == Instruction::SDiv ||
  203. Opcode == Instruction::SRem;
  204. // Only optimize div or rem ops
  205. if (!UseDivOp && !UseRemOp)
  206. continue;
  207. // Skip division on vector types, only optimize integer instructions
  208. if (!J->getType()->isIntegerTy())
  209. continue;
  210. // Get bitwidth of div/rem instruction
  211. IntegerType *T = cast<IntegerType>(J->getType());
  212. unsigned int bitwidth = T->getBitWidth();
  213. // Continue if bitwidth is not bypassed
  214. DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
  215. if (BI == BypassWidths.end())
  216. continue;
  217. // Get type for div/rem instruction with bypass bitwidth
  218. IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
  219. MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
  220. UseSignedOp, DivCache);
  221. }
  222. return MadeChange;
  223. }