InstCombineCalls.cpp 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943
  1. //===- InstCombineCalls.cpp -----------------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the visitCall and visitInvoke functions.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "InstCombineInternal.h"
  14. #include "llvm/ADT/Statistic.h"
  15. #include "llvm/Analysis/InstructionSimplify.h"
  16. #include "llvm/Analysis/MemoryBuiltins.h"
  17. #include "llvm/IR/CallSite.h"
  18. #include "llvm/IR/Dominators.h"
  19. #include "llvm/IR/PatternMatch.h"
  20. #include "llvm/IR/Statepoint.h"
  21. #include "llvm/Transforms/Utils/BuildLibCalls.h"
  22. #include "llvm/Transforms/Utils/Local.h"
  23. #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
  24. using namespace llvm;
  25. using namespace PatternMatch;
  26. #define DEBUG_TYPE "instcombine"
  27. STATISTIC(NumSimplified, "Number of library calls simplified");
  28. /// getPromotedType - Return the specified type promoted as it would be to pass
  29. /// though a va_arg area.
  30. static Type *getPromotedType(Type *Ty) {
  31. if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
  32. if (ITy->getBitWidth() < 32)
  33. return Type::getInt32Ty(Ty->getContext());
  34. }
  35. return Ty;
  36. }
  37. /// reduceToSingleValueType - Given an aggregate type which ultimately holds a
  38. /// single scalar element, like {{{type}}} or [1 x type], return type.
  39. static Type *reduceToSingleValueType(Type *T) {
  40. while (!T->isSingleValueType()) {
  41. if (StructType *STy = dyn_cast<StructType>(T)) {
  42. if (STy->getNumElements() == 1)
  43. T = STy->getElementType(0);
  44. else
  45. break;
  46. } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
  47. if (ATy->getNumElements() == 1)
  48. T = ATy->getElementType();
  49. else
  50. break;
  51. } else
  52. break;
  53. }
  54. return T;
  55. }
  56. Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
  57. unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
  58. unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
  59. unsigned MinAlign = std::min(DstAlign, SrcAlign);
  60. unsigned CopyAlign = MI->getAlignment();
  61. if (CopyAlign < MinAlign) {
  62. MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
  63. MinAlign, false));
  64. return MI;
  65. }
  66. // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
  67. // load/store.
  68. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
  69. if (!MemOpLength) return nullptr;
  70. // Source and destination pointer types are always "i8*" for intrinsic. See
  71. // if the size is something we can handle with a single primitive load/store.
  72. // A single load+store correctly handles overlapping memory in the memmove
  73. // case.
  74. uint64_t Size = MemOpLength->getLimitedValue();
  75. assert(Size && "0-sized memory transferring should be removed already.");
  76. if (Size > 8 || (Size&(Size-1)))
  77. return nullptr; // If not 1/2/4/8 bytes, exit.
  78. // Use an integer load+store unless we can find something better.
  79. unsigned SrcAddrSp =
  80. cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
  81. unsigned DstAddrSp =
  82. cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
  83. IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
  84. Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
  85. Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
  86. // Memcpy forces the use of i8* for the source and destination. That means
  87. // that if you're using memcpy to move one double around, you'll get a cast
  88. // from double* to i8*. We'd much rather use a double load+store rather than
  89. // an i64 load+store, here because this improves the odds that the source or
  90. // dest address will be promotable. See if we can find a better type than the
  91. // integer datatype.
  92. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
  93. MDNode *CopyMD = nullptr;
  94. if (StrippedDest != MI->getArgOperand(0)) {
  95. Type *SrcETy = cast<PointerType>(StrippedDest->getType())
  96. ->getElementType();
  97. if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
  98. // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
  99. // down through these levels if so.
  100. SrcETy = reduceToSingleValueType(SrcETy);
  101. if (SrcETy->isSingleValueType()) {
  102. NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
  103. NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
  104. // If the memcpy has metadata describing the members, see if we can
  105. // get the TBAA tag describing our copy.
  106. if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
  107. if (M->getNumOperands() == 3 && M->getOperand(0) &&
  108. mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
  109. mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
  110. M->getOperand(1) &&
  111. mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
  112. mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
  113. Size &&
  114. M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
  115. CopyMD = cast<MDNode>(M->getOperand(2));
  116. }
  117. }
  118. }
  119. }
  120. // If the memcpy/memmove provides better alignment info than we can
  121. // infer, use it.
  122. SrcAlign = std::max(SrcAlign, CopyAlign);
  123. DstAlign = std::max(DstAlign, CopyAlign);
  124. Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
  125. Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
  126. LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
  127. L->setAlignment(SrcAlign);
  128. if (CopyMD)
  129. L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
  130. StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
  131. S->setAlignment(DstAlign);
  132. if (CopyMD)
  133. S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
  134. // Set the size of the copy to 0, it will be deleted on the next iteration.
  135. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
  136. return MI;
  137. }
  138. Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
  139. unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
  140. if (MI->getAlignment() < Alignment) {
  141. MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
  142. Alignment, false));
  143. return MI;
  144. }
  145. // Extract the length and alignment and fill if they are constant.
  146. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
  147. ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
  148. if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
  149. return nullptr;
  150. uint64_t Len = LenC->getLimitedValue();
  151. Alignment = MI->getAlignment();
  152. assert(Len && "0-sized memory setting should be removed already.");
  153. // memset(s,c,n) -> store s, c (for n=1,2,4,8)
  154. if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
  155. Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
  156. Value *Dest = MI->getDest();
  157. unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
  158. Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
  159. Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
  160. // Alignment 0 is identity for alignment 1 for memset, but not store.
  161. if (Alignment == 0) Alignment = 1;
  162. // Extract the fill value and store.
  163. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
  164. StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
  165. MI->isVolatile());
  166. S->setAlignment(Alignment);
  167. // Set the size of the copy to 0, it will be deleted on the next iteration.
  168. MI->setLength(Constant::getNullValue(LenC->getType()));
  169. return MI;
  170. }
  171. return nullptr;
  172. }
  173. static Value *SimplifyX86insertps(const IntrinsicInst &II,
  174. InstCombiner::BuilderTy &Builder) {
  175. if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
  176. VectorType *VecTy = cast<VectorType>(II.getType());
  177. assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
  178. // The immediate permute control byte looks like this:
  179. // [3:0] - zero mask for each 32-bit lane
  180. // [5:4] - select one 32-bit destination lane
  181. // [7:6] - select one 32-bit source lane
  182. uint8_t Imm = CInt->getZExtValue();
  183. uint8_t ZMask = Imm & 0xf;
  184. uint8_t DestLane = (Imm >> 4) & 0x3;
  185. uint8_t SourceLane = (Imm >> 6) & 0x3;
  186. ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
  187. // If all zero mask bits are set, this was just a weird way to
  188. // generate a zero vector.
  189. if (ZMask == 0xf)
  190. return ZeroVector;
  191. // Initialize by passing all of the first source bits through.
  192. int ShuffleMask[4] = { 0, 1, 2, 3 };
  193. // We may replace the second operand with the zero vector.
  194. Value *V1 = II.getArgOperand(1);
  195. if (ZMask) {
  196. // If the zero mask is being used with a single input or the zero mask
  197. // overrides the destination lane, this is a shuffle with the zero vector.
  198. if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
  199. (ZMask & (1 << DestLane))) {
  200. V1 = ZeroVector;
  201. // We may still move 32-bits of the first source vector from one lane
  202. // to another.
  203. ShuffleMask[DestLane] = SourceLane;
  204. // The zero mask may override the previous insert operation.
  205. for (unsigned i = 0; i < 4; ++i)
  206. if ((ZMask >> i) & 0x1)
  207. ShuffleMask[i] = i + 4;
  208. } else {
  209. // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
  210. return nullptr;
  211. }
  212. } else {
  213. // Replace the selected destination lane with the selected source lane.
  214. ShuffleMask[DestLane] = SourceLane + 4;
  215. }
  216. return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
  217. }
  218. return nullptr;
  219. }
  220. /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
  221. /// source vectors, unless a zero bit is set. If a zero bit is set,
  222. /// then ignore that half of the mask and clear that half of the vector.
  223. static Value *SimplifyX86vperm2(const IntrinsicInst &II,
  224. InstCombiner::BuilderTy &Builder) {
  225. if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
  226. VectorType *VecTy = cast<VectorType>(II.getType());
  227. ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
  228. // The immediate permute control byte looks like this:
  229. // [1:0] - select 128 bits from sources for low half of destination
  230. // [2] - ignore
  231. // [3] - zero low half of destination
  232. // [5:4] - select 128 bits from sources for high half of destination
  233. // [6] - ignore
  234. // [7] - zero high half of destination
  235. uint8_t Imm = CInt->getZExtValue();
  236. bool LowHalfZero = Imm & 0x08;
  237. bool HighHalfZero = Imm & 0x80;
  238. // If both zero mask bits are set, this was just a weird way to
  239. // generate a zero vector.
  240. if (LowHalfZero && HighHalfZero)
  241. return ZeroVector;
  242. // If 0 or 1 zero mask bits are set, this is a simple shuffle.
  243. unsigned NumElts = VecTy->getNumElements();
  244. unsigned HalfSize = NumElts / 2;
  245. SmallVector<int, 8> ShuffleMask(NumElts);
  246. // The high bit of the selection field chooses the 1st or 2nd operand.
  247. bool LowInputSelect = Imm & 0x02;
  248. bool HighInputSelect = Imm & 0x20;
  249. // The low bit of the selection field chooses the low or high half
  250. // of the selected operand.
  251. bool LowHalfSelect = Imm & 0x01;
  252. bool HighHalfSelect = Imm & 0x10;
  253. // Determine which operand(s) are actually in use for this instruction.
  254. Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
  255. Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
  256. // If needed, replace operands based on zero mask.
  257. V0 = LowHalfZero ? ZeroVector : V0;
  258. V1 = HighHalfZero ? ZeroVector : V1;
  259. // Permute low half of result.
  260. unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
  261. for (unsigned i = 0; i < HalfSize; ++i)
  262. ShuffleMask[i] = StartIndex + i;
  263. // Permute high half of result.
  264. StartIndex = HighHalfSelect ? HalfSize : 0;
  265. StartIndex += NumElts;
  266. for (unsigned i = 0; i < HalfSize; ++i)
  267. ShuffleMask[i + HalfSize] = StartIndex + i;
  268. return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
  269. }
  270. return nullptr;
  271. }
  272. /// visitCallInst - CallInst simplification. This mostly only handles folding
  273. /// of intrinsic instructions. For normal calls, it allows visitCallSite to do
  274. /// the heavy lifting.
  275. ///
  276. Instruction *InstCombiner::visitCallInst(CallInst &CI) {
  277. auto Args = CI.arg_operands();
  278. if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
  279. TLI, DT, AC))
  280. return ReplaceInstUsesWith(CI, V);
  281. if (isFreeCall(&CI, TLI))
  282. return visitFree(CI);
  283. // If the caller function is nounwind, mark the call as nounwind, even if the
  284. // callee isn't.
  285. if (CI.getParent()->getParent()->doesNotThrow() &&
  286. !CI.doesNotThrow()) {
  287. CI.setDoesNotThrow();
  288. return &CI;
  289. }
  290. IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
  291. if (!II) return visitCallSite(&CI);
  292. // Intrinsics cannot occur in an invoke, so handle them here instead of in
  293. // visitCallSite.
  294. if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
  295. bool Changed = false;
  296. // memmove/cpy/set of zero bytes is a noop.
  297. if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
  298. if (NumBytes->isNullValue())
  299. return EraseInstFromFunction(CI);
  300. if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
  301. if (CI->getZExtValue() == 1) {
  302. // Replace the instruction with just byte operations. We would
  303. // transform other cases to loads/stores, but we don't know if
  304. // alignment is sufficient.
  305. }
  306. }
  307. // No other transformations apply to volatile transfers.
  308. if (MI->isVolatile())
  309. return nullptr;
  310. // If we have a memmove and the source operation is a constant global,
  311. // then the source and dest pointers can't alias, so we can change this
  312. // into a call to memcpy.
  313. if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
  314. if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
  315. if (GVSrc->isConstant()) {
  316. Module *M = CI.getParent()->getParent()->getParent();
  317. Intrinsic::ID MemCpyID = Intrinsic::memcpy;
  318. Type *Tys[3] = { CI.getArgOperand(0)->getType(),
  319. CI.getArgOperand(1)->getType(),
  320. CI.getArgOperand(2)->getType() };
  321. CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
  322. Changed = true;
  323. }
  324. }
  325. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
  326. // memmove(x,x,size) -> noop.
  327. if (MTI->getSource() == MTI->getDest())
  328. return EraseInstFromFunction(CI);
  329. }
  330. // If we can determine a pointer alignment that is bigger than currently
  331. // set, update the alignment.
  332. if (isa<MemTransferInst>(MI)) {
  333. if (Instruction *I = SimplifyMemTransfer(MI))
  334. return I;
  335. } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
  336. if (Instruction *I = SimplifyMemSet(MSI))
  337. return I;
  338. }
  339. if (Changed) return II;
  340. }
  341. switch (II->getIntrinsicID()) {
  342. default: break;
  343. case Intrinsic::objectsize: {
  344. uint64_t Size;
  345. if (getObjectSize(II->getArgOperand(0), Size, DL, TLI))
  346. return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
  347. return nullptr;
  348. }
  349. case Intrinsic::bswap: {
  350. Value *IIOperand = II->getArgOperand(0);
  351. Value *X = nullptr;
  352. // bswap(bswap(x)) -> x
  353. if (match(IIOperand, m_BSwap(m_Value(X))))
  354. return ReplaceInstUsesWith(CI, X);
  355. // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
  356. if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
  357. unsigned C = X->getType()->getPrimitiveSizeInBits() -
  358. IIOperand->getType()->getPrimitiveSizeInBits();
  359. Value *CV = ConstantInt::get(X->getType(), C);
  360. Value *V = Builder->CreateLShr(X, CV);
  361. return new TruncInst(V, IIOperand->getType());
  362. }
  363. break;
  364. }
  365. case Intrinsic::powi:
  366. if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
  367. // powi(x, 0) -> 1.0
  368. if (Power->isZero())
  369. return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
  370. // powi(x, 1) -> x
  371. if (Power->isOne())
  372. return ReplaceInstUsesWith(CI, II->getArgOperand(0));
  373. // powi(x, -1) -> 1/x
  374. if (Power->isAllOnesValue())
  375. return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
  376. II->getArgOperand(0));
  377. }
  378. break;
  379. case Intrinsic::cttz: {
  380. // If all bits below the first known one are known zero,
  381. // this value is constant.
  382. IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
  383. // FIXME: Try to simplify vectors of integers.
  384. if (!IT) break;
  385. uint32_t BitWidth = IT->getBitWidth();
  386. APInt KnownZero(BitWidth, 0);
  387. APInt KnownOne(BitWidth, 0);
  388. computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
  389. unsigned TrailingZeros = KnownOne.countTrailingZeros();
  390. APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
  391. if ((Mask & KnownZero) == Mask)
  392. return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
  393. APInt(BitWidth, TrailingZeros)));
  394. }
  395. break;
  396. case Intrinsic::ctlz: {
  397. // If all bits above the first known one are known zero,
  398. // this value is constant.
  399. IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
  400. // FIXME: Try to simplify vectors of integers.
  401. if (!IT) break;
  402. uint32_t BitWidth = IT->getBitWidth();
  403. APInt KnownZero(BitWidth, 0);
  404. APInt KnownOne(BitWidth, 0);
  405. computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
  406. unsigned LeadingZeros = KnownOne.countLeadingZeros();
  407. APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
  408. if ((Mask & KnownZero) == Mask)
  409. return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
  410. APInt(BitWidth, LeadingZeros)));
  411. }
  412. break;
  413. case Intrinsic::uadd_with_overflow:
  414. case Intrinsic::sadd_with_overflow:
  415. case Intrinsic::umul_with_overflow:
  416. case Intrinsic::smul_with_overflow:
  417. if (isa<Constant>(II->getArgOperand(0)) &&
  418. !isa<Constant>(II->getArgOperand(1))) {
  419. // Canonicalize constants into the RHS.
  420. Value *LHS = II->getArgOperand(0);
  421. II->setArgOperand(0, II->getArgOperand(1));
  422. II->setArgOperand(1, LHS);
  423. return II;
  424. }
  425. // fall through
  426. case Intrinsic::usub_with_overflow:
  427. case Intrinsic::ssub_with_overflow: {
  428. OverflowCheckFlavor OCF =
  429. IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
  430. assert(OCF != OCF_INVALID && "unexpected!");
  431. Value *OperationResult = nullptr;
  432. Constant *OverflowResult = nullptr;
  433. if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
  434. *II, OperationResult, OverflowResult))
  435. return CreateOverflowTuple(II, OperationResult, OverflowResult);
  436. break;
  437. }
  438. case Intrinsic::minnum:
  439. case Intrinsic::maxnum: {
  440. Value *Arg0 = II->getArgOperand(0);
  441. Value *Arg1 = II->getArgOperand(1);
  442. // fmin(x, x) -> x
  443. if (Arg0 == Arg1)
  444. return ReplaceInstUsesWith(CI, Arg0);
  445. const ConstantFP *C0 = dyn_cast<ConstantFP>(Arg0);
  446. const ConstantFP *C1 = dyn_cast<ConstantFP>(Arg1);
  447. // Canonicalize constants into the RHS.
  448. if (C0 && !C1) {
  449. II->setArgOperand(0, Arg1);
  450. II->setArgOperand(1, Arg0);
  451. return II;
  452. }
  453. // fmin(x, nan) -> x
  454. if (C1 && C1->isNaN())
  455. return ReplaceInstUsesWith(CI, Arg0);
  456. // This is the value because if undef were NaN, we would return the other
  457. // value and cannot return a NaN unless both operands are.
  458. //
  459. // fmin(undef, x) -> x
  460. if (isa<UndefValue>(Arg0))
  461. return ReplaceInstUsesWith(CI, Arg1);
  462. // fmin(x, undef) -> x
  463. if (isa<UndefValue>(Arg1))
  464. return ReplaceInstUsesWith(CI, Arg0);
  465. Value *X = nullptr;
  466. Value *Y = nullptr;
  467. if (II->getIntrinsicID() == Intrinsic::minnum) {
  468. // fmin(x, fmin(x, y)) -> fmin(x, y)
  469. // fmin(y, fmin(x, y)) -> fmin(x, y)
  470. if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
  471. if (Arg0 == X || Arg0 == Y)
  472. return ReplaceInstUsesWith(CI, Arg1);
  473. }
  474. // fmin(fmin(x, y), x) -> fmin(x, y)
  475. // fmin(fmin(x, y), y) -> fmin(x, y)
  476. if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
  477. if (Arg1 == X || Arg1 == Y)
  478. return ReplaceInstUsesWith(CI, Arg0);
  479. }
  480. // TODO: fmin(nnan x, inf) -> x
  481. // TODO: fmin(nnan ninf x, flt_max) -> x
  482. if (C1 && C1->isInfinity()) {
  483. // fmin(x, -inf) -> -inf
  484. if (C1->isNegative())
  485. return ReplaceInstUsesWith(CI, Arg1);
  486. }
  487. } else {
  488. assert(II->getIntrinsicID() == Intrinsic::maxnum);
  489. // fmax(x, fmax(x, y)) -> fmax(x, y)
  490. // fmax(y, fmax(x, y)) -> fmax(x, y)
  491. if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
  492. if (Arg0 == X || Arg0 == Y)
  493. return ReplaceInstUsesWith(CI, Arg1);
  494. }
  495. // fmax(fmax(x, y), x) -> fmax(x, y)
  496. // fmax(fmax(x, y), y) -> fmax(x, y)
  497. if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
  498. if (Arg1 == X || Arg1 == Y)
  499. return ReplaceInstUsesWith(CI, Arg0);
  500. }
  501. // TODO: fmax(nnan x, -inf) -> x
  502. // TODO: fmax(nnan ninf x, -flt_max) -> x
  503. if (C1 && C1->isInfinity()) {
  504. // fmax(x, inf) -> inf
  505. if (!C1->isNegative())
  506. return ReplaceInstUsesWith(CI, Arg1);
  507. }
  508. }
  509. break;
  510. }
  511. #if 0 // HLSL Change - remove platform intrinsics
  512. case Intrinsic::ppc_altivec_lvx:
  513. case Intrinsic::ppc_altivec_lvxl:
  514. // Turn PPC lvx -> load if the pointer is known aligned.
  515. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
  516. 16) {
  517. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  518. PointerType::getUnqual(II->getType()));
  519. return new LoadInst(Ptr);
  520. }
  521. break;
  522. case Intrinsic::ppc_vsx_lxvw4x:
  523. case Intrinsic::ppc_vsx_lxvd2x: {
  524. // Turn PPC VSX loads into normal loads.
  525. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  526. PointerType::getUnqual(II->getType()));
  527. return new LoadInst(Ptr, Twine(""), false, 1);
  528. }
  529. case Intrinsic::ppc_altivec_stvx:
  530. case Intrinsic::ppc_altivec_stvxl:
  531. // Turn stvx -> store if the pointer is known aligned.
  532. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
  533. 16) {
  534. Type *OpPtrTy =
  535. PointerType::getUnqual(II->getArgOperand(0)->getType());
  536. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  537. return new StoreInst(II->getArgOperand(0), Ptr);
  538. }
  539. break;
  540. case Intrinsic::ppc_vsx_stxvw4x:
  541. case Intrinsic::ppc_vsx_stxvd2x: {
  542. // Turn PPC VSX stores into normal stores.
  543. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
  544. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  545. return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
  546. }
  547. case Intrinsic::ppc_qpx_qvlfs:
  548. // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
  549. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
  550. 16) {
  551. Type *VTy = VectorType::get(Builder->getFloatTy(),
  552. II->getType()->getVectorNumElements());
  553. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  554. PointerType::getUnqual(VTy));
  555. Value *Load = Builder->CreateLoad(Ptr);
  556. return new FPExtInst(Load, II->getType());
  557. }
  558. break;
  559. case Intrinsic::ppc_qpx_qvlfd:
  560. // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
  561. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
  562. 32) {
  563. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  564. PointerType::getUnqual(II->getType()));
  565. return new LoadInst(Ptr);
  566. }
  567. break;
  568. case Intrinsic::ppc_qpx_qvstfs:
  569. // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
  570. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
  571. 16) {
  572. Type *VTy = VectorType::get(Builder->getFloatTy(),
  573. II->getArgOperand(0)->getType()->getVectorNumElements());
  574. Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
  575. Type *OpPtrTy = PointerType::getUnqual(VTy);
  576. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  577. return new StoreInst(TOp, Ptr);
  578. }
  579. break;
  580. case Intrinsic::ppc_qpx_qvstfd:
  581. // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
  582. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
  583. 32) {
  584. Type *OpPtrTy =
  585. PointerType::getUnqual(II->getArgOperand(0)->getType());
  586. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  587. return new StoreInst(II->getArgOperand(0), Ptr);
  588. }
  589. break;
  590. case Intrinsic::x86_sse_storeu_ps:
  591. case Intrinsic::x86_sse2_storeu_pd:
  592. case Intrinsic::x86_sse2_storeu_dq:
  593. // Turn X86 storeu -> store if the pointer is known aligned.
  594. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
  595. 16) {
  596. Type *OpPtrTy =
  597. PointerType::getUnqual(II->getArgOperand(1)->getType());
  598. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
  599. return new StoreInst(II->getArgOperand(1), Ptr);
  600. }
  601. break;
  602. case Intrinsic::x86_sse_cvtss2si:
  603. case Intrinsic::x86_sse_cvtss2si64:
  604. case Intrinsic::x86_sse_cvttss2si:
  605. case Intrinsic::x86_sse_cvttss2si64:
  606. case Intrinsic::x86_sse2_cvtsd2si:
  607. case Intrinsic::x86_sse2_cvtsd2si64:
  608. case Intrinsic::x86_sse2_cvttsd2si:
  609. case Intrinsic::x86_sse2_cvttsd2si64: {
  610. // These intrinsics only demand the 0th element of their input vectors. If
  611. // we can simplify the input based on that, do so now.
  612. unsigned VWidth =
  613. cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
  614. APInt DemandedElts(VWidth, 1);
  615. APInt UndefElts(VWidth, 0);
  616. if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
  617. DemandedElts, UndefElts)) {
  618. II->setArgOperand(0, V);
  619. return II;
  620. }
  621. break;
  622. }
  623. // Constant fold <A x Bi> << Ci.
  624. // FIXME: We don't handle _dq because it's a shift of an i128, but is
  625. // represented in the IR as <2 x i64>. A per element shift is wrong.
  626. case Intrinsic::x86_sse2_psll_d:
  627. case Intrinsic::x86_sse2_psll_q:
  628. case Intrinsic::x86_sse2_psll_w:
  629. case Intrinsic::x86_sse2_pslli_d:
  630. case Intrinsic::x86_sse2_pslli_q:
  631. case Intrinsic::x86_sse2_pslli_w:
  632. case Intrinsic::x86_avx2_psll_d:
  633. case Intrinsic::x86_avx2_psll_q:
  634. case Intrinsic::x86_avx2_psll_w:
  635. case Intrinsic::x86_avx2_pslli_d:
  636. case Intrinsic::x86_avx2_pslli_q:
  637. case Intrinsic::x86_avx2_pslli_w:
  638. case Intrinsic::x86_sse2_psrl_d:
  639. case Intrinsic::x86_sse2_psrl_q:
  640. case Intrinsic::x86_sse2_psrl_w:
  641. case Intrinsic::x86_sse2_psrli_d:
  642. case Intrinsic::x86_sse2_psrli_q:
  643. case Intrinsic::x86_sse2_psrli_w:
  644. case Intrinsic::x86_avx2_psrl_d:
  645. case Intrinsic::x86_avx2_psrl_q:
  646. case Intrinsic::x86_avx2_psrl_w:
  647. case Intrinsic::x86_avx2_psrli_d:
  648. case Intrinsic::x86_avx2_psrli_q:
  649. case Intrinsic::x86_avx2_psrli_w: {
  650. // Simplify if count is constant. To 0 if >= BitWidth,
  651. // otherwise to shl/lshr.
  652. auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
  653. auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
  654. if (!CDV && !CInt)
  655. break;
  656. ConstantInt *Count;
  657. if (CDV)
  658. Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
  659. else
  660. Count = CInt;
  661. auto Vec = II->getArgOperand(0);
  662. auto VT = cast<VectorType>(Vec->getType());
  663. if (Count->getZExtValue() >
  664. VT->getElementType()->getPrimitiveSizeInBits() - 1)
  665. return ReplaceInstUsesWith(
  666. CI, ConstantAggregateZero::get(Vec->getType()));
  667. bool isPackedShiftLeft = true;
  668. switch (II->getIntrinsicID()) {
  669. default : break;
  670. case Intrinsic::x86_sse2_psrl_d:
  671. case Intrinsic::x86_sse2_psrl_q:
  672. case Intrinsic::x86_sse2_psrl_w:
  673. case Intrinsic::x86_sse2_psrli_d:
  674. case Intrinsic::x86_sse2_psrli_q:
  675. case Intrinsic::x86_sse2_psrli_w:
  676. case Intrinsic::x86_avx2_psrl_d:
  677. case Intrinsic::x86_avx2_psrl_q:
  678. case Intrinsic::x86_avx2_psrl_w:
  679. case Intrinsic::x86_avx2_psrli_d:
  680. case Intrinsic::x86_avx2_psrli_q:
  681. case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
  682. }
  683. unsigned VWidth = VT->getNumElements();
  684. // Get a constant vector of the same type as the first operand.
  685. auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
  686. if (isPackedShiftLeft)
  687. return BinaryOperator::CreateShl(Vec,
  688. Builder->CreateVectorSplat(VWidth, VTCI));
  689. return BinaryOperator::CreateLShr(Vec,
  690. Builder->CreateVectorSplat(VWidth, VTCI));
  691. }
  692. case Intrinsic::x86_sse41_pmovsxbw:
  693. case Intrinsic::x86_sse41_pmovsxwd:
  694. case Intrinsic::x86_sse41_pmovsxdq:
  695. case Intrinsic::x86_sse41_pmovzxbw:
  696. case Intrinsic::x86_sse41_pmovzxwd:
  697. case Intrinsic::x86_sse41_pmovzxdq: {
  698. // pmov{s|z}x ignores the upper half of their input vectors.
  699. unsigned VWidth =
  700. cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
  701. unsigned LowHalfElts = VWidth / 2;
  702. APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
  703. APInt UndefElts(VWidth, 0);
  704. if (Value *TmpV = SimplifyDemandedVectorElts(
  705. II->getArgOperand(0), InputDemandedElts, UndefElts)) {
  706. II->setArgOperand(0, TmpV);
  707. return II;
  708. }
  709. break;
  710. }
  711. case Intrinsic::x86_sse41_insertps:
  712. if (Value *V = SimplifyX86insertps(*II, *Builder))
  713. return ReplaceInstUsesWith(*II, V);
  714. break;
  715. case Intrinsic::x86_sse4a_insertqi: {
  716. // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
  717. // ones undef
  718. // TODO: eventually we should lower this intrinsic to IR
  719. if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
  720. if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
  721. unsigned Index = CIStart->getZExtValue();
  722. // From AMD documentation: "a value of zero in the field length is
  723. // defined as length of 64".
  724. unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
  725. // From AMD documentation: "If the sum of the bit index + length field
  726. // is greater than 64, the results are undefined".
  727. // Note that both field index and field length are 8-bit quantities.
  728. // Since variables 'Index' and 'Length' are unsigned values
  729. // obtained from zero-extending field index and field length
  730. // respectively, their sum should never wrap around.
  731. if ((Index + Length) > 64)
  732. return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
  733. if (Length == 64 && Index == 0) {
  734. Value *Vec = II->getArgOperand(1);
  735. Value *Undef = UndefValue::get(Vec->getType());
  736. const uint32_t Mask[] = { 0, 2 };
  737. return ReplaceInstUsesWith(
  738. CI,
  739. Builder->CreateShuffleVector(
  740. Vec, Undef, ConstantDataVector::get(
  741. II->getContext(), makeArrayRef(Mask))));
  742. } else if (auto Source =
  743. dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
  744. if (Source->hasOneUse() &&
  745. Source->getArgOperand(1) == II->getArgOperand(1)) {
  746. // If the source of the insert has only one use and it's another
  747. // insert (and they're both inserting from the same vector), try to
  748. // bundle both together.
  749. auto CISourceWidth =
  750. dyn_cast<ConstantInt>(Source->getArgOperand(2));
  751. auto CISourceStart =
  752. dyn_cast<ConstantInt>(Source->getArgOperand(3));
  753. if (CISourceStart && CISourceWidth) {
  754. unsigned Start = CIStart->getZExtValue();
  755. unsigned Width = CIWidth->getZExtValue();
  756. unsigned End = Start + Width;
  757. unsigned SourceStart = CISourceStart->getZExtValue();
  758. unsigned SourceWidth = CISourceWidth->getZExtValue();
  759. unsigned SourceEnd = SourceStart + SourceWidth;
  760. unsigned NewStart, NewWidth;
  761. bool ShouldReplace = false;
  762. if (Start <= SourceStart && SourceStart <= End) {
  763. NewStart = Start;
  764. NewWidth = std::max(End, SourceEnd) - NewStart;
  765. ShouldReplace = true;
  766. } else if (SourceStart <= Start && Start <= SourceEnd) {
  767. NewStart = SourceStart;
  768. NewWidth = std::max(SourceEnd, End) - NewStart;
  769. ShouldReplace = true;
  770. }
  771. if (ShouldReplace) {
  772. Constant *ConstantWidth = ConstantInt::get(
  773. II->getArgOperand(2)->getType(), NewWidth, false);
  774. Constant *ConstantStart = ConstantInt::get(
  775. II->getArgOperand(3)->getType(), NewStart, false);
  776. Value *Args[4] = { Source->getArgOperand(0),
  777. II->getArgOperand(1), ConstantWidth,
  778. ConstantStart };
  779. Module *M = CI.getParent()->getParent()->getParent();
  780. Value *F =
  781. Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
  782. return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
  783. }
  784. }
  785. }
  786. }
  787. }
  788. }
  789. break;
  790. }
  791. case Intrinsic::x86_sse41_pblendvb:
  792. case Intrinsic::x86_sse41_blendvps:
  793. case Intrinsic::x86_sse41_blendvpd:
  794. case Intrinsic::x86_avx_blendv_ps_256:
  795. case Intrinsic::x86_avx_blendv_pd_256:
  796. case Intrinsic::x86_avx2_pblendvb: {
  797. // Convert blendv* to vector selects if the mask is constant.
  798. // This optimization is convoluted because the intrinsic is defined as
  799. // getting a vector of floats or doubles for the ps and pd versions.
  800. // FIXME: That should be changed.
  801. Value *Mask = II->getArgOperand(2);
  802. if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
  803. auto Tyi1 = Builder->getInt1Ty();
  804. auto SelectorType = cast<VectorType>(Mask->getType());
  805. auto EltTy = SelectorType->getElementType();
  806. unsigned Size = SelectorType->getNumElements();
  807. unsigned BitWidth =
  808. EltTy->isFloatTy()
  809. ? 32
  810. : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth());
  811. assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) &&
  812. "Wrong arguments for variable blend intrinsic");
  813. SmallVector<Constant *, 32> Selectors;
  814. for (unsigned I = 0; I < Size; ++I) {
  815. // The intrinsics only read the top bit
  816. uint64_t Selector;
  817. if (BitWidth == 8)
  818. Selector = C->getElementAsInteger(I);
  819. else
  820. Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue();
  821. Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
  822. }
  823. auto NewSelector = ConstantVector::get(Selectors);
  824. return SelectInst::Create(NewSelector, II->getArgOperand(1),
  825. II->getArgOperand(0), "blendv");
  826. } else {
  827. break;
  828. }
  829. }
  830. case Intrinsic::x86_avx_vpermilvar_ps:
  831. case Intrinsic::x86_avx_vpermilvar_ps_256:
  832. case Intrinsic::x86_avx_vpermilvar_pd:
  833. case Intrinsic::x86_avx_vpermilvar_pd_256: {
  834. // Convert vpermil* to shufflevector if the mask is constant.
  835. Value *V = II->getArgOperand(1);
  836. unsigned Size = cast<VectorType>(V->getType())->getNumElements();
  837. assert(Size == 8 || Size == 4 || Size == 2);
  838. uint32_t Indexes[8];
  839. if (auto C = dyn_cast<ConstantDataVector>(V)) {
  840. // The intrinsics only read one or two bits, clear the rest.
  841. for (unsigned I = 0; I < Size; ++I) {
  842. uint32_t Index = C->getElementAsInteger(I) & 0x3;
  843. if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
  844. II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
  845. Index >>= 1;
  846. Indexes[I] = Index;
  847. }
  848. } else if (isa<ConstantAggregateZero>(V)) {
  849. for (unsigned I = 0; I < Size; ++I)
  850. Indexes[I] = 0;
  851. } else {
  852. break;
  853. }
  854. // The _256 variants are a bit trickier since the mask bits always index
  855. // into the corresponding 128 half. In order to convert to a generic
  856. // shuffle, we have to make that explicit.
  857. if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
  858. II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
  859. for (unsigned I = Size / 2; I < Size; ++I)
  860. Indexes[I] += Size / 2;
  861. }
  862. auto NewC =
  863. ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
  864. auto V1 = II->getArgOperand(0);
  865. auto V2 = UndefValue::get(V1->getType());
  866. auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
  867. return ReplaceInstUsesWith(CI, Shuffle);
  868. }
  869. case Intrinsic::x86_avx_vperm2f128_pd_256:
  870. case Intrinsic::x86_avx_vperm2f128_ps_256:
  871. case Intrinsic::x86_avx_vperm2f128_si_256:
  872. case Intrinsic::x86_avx2_vperm2i128:
  873. if (Value *V = SimplifyX86vperm2(*II, *Builder))
  874. return ReplaceInstUsesWith(*II, V);
  875. break;
  876. case Intrinsic::ppc_altivec_vperm:
  877. // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
  878. // Note that ppc_altivec_vperm has a big-endian bias, so when creating
  879. // a vectorshuffle for little endian, we must undo the transformation
  880. // performed on vec_perm in altivec.h. That is, we must complement
  881. // the permutation mask with respect to 31 and reverse the order of
  882. // V1 and V2.
  883. if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
  884. assert(Mask->getType()->getVectorNumElements() == 16 &&
  885. "Bad type for intrinsic!");
  886. // Check that all of the elements are integer constants or undefs.
  887. bool AllEltsOk = true;
  888. for (unsigned i = 0; i != 16; ++i) {
  889. Constant *Elt = Mask->getAggregateElement(i);
  890. if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
  891. AllEltsOk = false;
  892. break;
  893. }
  894. }
  895. if (AllEltsOk) {
  896. // Cast the input vectors to byte vectors.
  897. Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
  898. Mask->getType());
  899. Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
  900. Mask->getType());
  901. Value *Result = UndefValue::get(Op0->getType());
  902. // Only extract each element once.
  903. Value *ExtractedElts[32];
  904. memset(ExtractedElts, 0, sizeof(ExtractedElts));
  905. for (unsigned i = 0; i != 16; ++i) {
  906. if (isa<UndefValue>(Mask->getAggregateElement(i)))
  907. continue;
  908. unsigned Idx =
  909. cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
  910. Idx &= 31; // Match the hardware behavior.
  911. if (DL.isLittleEndian())
  912. Idx = 31 - Idx;
  913. if (!ExtractedElts[Idx]) {
  914. Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
  915. Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
  916. ExtractedElts[Idx] =
  917. Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
  918. Builder->getInt32(Idx&15));
  919. }
  920. // Insert this value into the result vector.
  921. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
  922. Builder->getInt32(i));
  923. }
  924. return CastInst::Create(Instruction::BitCast, Result, CI.getType());
  925. }
  926. }
  927. break;
  928. case Intrinsic::arm_neon_vld1:
  929. case Intrinsic::arm_neon_vld2:
  930. case Intrinsic::arm_neon_vld3:
  931. case Intrinsic::arm_neon_vld4:
  932. case Intrinsic::arm_neon_vld2lane:
  933. case Intrinsic::arm_neon_vld3lane:
  934. case Intrinsic::arm_neon_vld4lane:
  935. case Intrinsic::arm_neon_vst1:
  936. case Intrinsic::arm_neon_vst2:
  937. case Intrinsic::arm_neon_vst3:
  938. case Intrinsic::arm_neon_vst4:
  939. case Intrinsic::arm_neon_vst2lane:
  940. case Intrinsic::arm_neon_vst3lane:
  941. case Intrinsic::arm_neon_vst4lane: {
  942. unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
  943. unsigned AlignArg = II->getNumArgOperands() - 1;
  944. ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
  945. if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
  946. II->setArgOperand(AlignArg,
  947. ConstantInt::get(Type::getInt32Ty(II->getContext()),
  948. MemAlign, false));
  949. return II;
  950. }
  951. break;
  952. }
  953. case Intrinsic::arm_neon_vmulls:
  954. case Intrinsic::arm_neon_vmullu:
  955. case Intrinsic::aarch64_neon_smull:
  956. case Intrinsic::aarch64_neon_umull: {
  957. Value *Arg0 = II->getArgOperand(0);
  958. Value *Arg1 = II->getArgOperand(1);
  959. // Handle mul by zero first:
  960. if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
  961. return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
  962. }
  963. // Check for constant LHS & RHS - in this case we just simplify.
  964. bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
  965. II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
  966. VectorType *NewVT = cast<VectorType>(II->getType());
  967. if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
  968. if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
  969. CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
  970. CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
  971. return ReplaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
  972. }
  973. // Couldn't simplify - canonicalize constant to the RHS.
  974. std::swap(Arg0, Arg1);
  975. }
  976. // Handle mul by one:
  977. if (Constant *CV1 = dyn_cast<Constant>(Arg1))
  978. if (ConstantInt *Splat =
  979. dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
  980. if (Splat->isOne())
  981. return CastInst::CreateIntegerCast(Arg0, II->getType(),
  982. /*isSigned=*/!Zext);
  983. break;
  984. }
  985. case Intrinsic::AMDGPU_rcp: {
  986. if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
  987. const APFloat &ArgVal = C->getValueAPF();
  988. APFloat Val(ArgVal.getSemantics(), 1.0);
  989. APFloat::opStatus Status = Val.divide(ArgVal,
  990. APFloat::rmNearestTiesToEven);
  991. // Only do this if it was exact and therefore not dependent on the
  992. // rounding mode.
  993. if (Status == APFloat::opOK)
  994. return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
  995. }
  996. break;
  997. }
  998. #endif // HLSL Change - remove platform intrinsics
  999. case Intrinsic::stackrestore: {
  1000. // If the save is right next to the restore, remove the restore. This can
  1001. // happen when variable allocas are DCE'd.
  1002. if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
  1003. if (SS->getIntrinsicID() == Intrinsic::stacksave) {
  1004. BasicBlock::iterator BI = SS;
  1005. if (&*++BI == II)
  1006. return EraseInstFromFunction(CI);
  1007. }
  1008. }
  1009. // Scan down this block to see if there is another stack restore in the
  1010. // same block without an intervening call/alloca.
  1011. BasicBlock::iterator BI = II;
  1012. TerminatorInst *TI = II->getParent()->getTerminator();
  1013. bool CannotRemove = false;
  1014. for (++BI; &*BI != TI; ++BI) {
  1015. if (isa<AllocaInst>(BI)) {
  1016. CannotRemove = true;
  1017. break;
  1018. }
  1019. if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
  1020. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
  1021. // If there is a stackrestore below this one, remove this one.
  1022. if (II->getIntrinsicID() == Intrinsic::stackrestore)
  1023. return EraseInstFromFunction(CI);
  1024. // Otherwise, ignore the intrinsic.
  1025. } else {
  1026. // If we found a non-intrinsic call, we can't remove the stack
  1027. // restore.
  1028. CannotRemove = true;
  1029. break;
  1030. }
  1031. }
  1032. }
  1033. // If the stack restore is in a return, resume, or unwind block and if there
  1034. // are no allocas or calls between the restore and the return, nuke the
  1035. // restore.
  1036. if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
  1037. return EraseInstFromFunction(CI);
  1038. break;
  1039. }
  1040. case Intrinsic::assume: {
  1041. // Canonicalize assume(a && b) -> assume(a); assume(b);
  1042. // Note: New assumption intrinsics created here are registered by
  1043. // the InstCombineIRInserter object.
  1044. Value *IIOperand = II->getArgOperand(0), *A, *B,
  1045. *AssumeIntrinsic = II->getCalledValue();
  1046. if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
  1047. Builder->CreateCall(AssumeIntrinsic, A, II->getName());
  1048. Builder->CreateCall(AssumeIntrinsic, B, II->getName());
  1049. return EraseInstFromFunction(*II);
  1050. }
  1051. // assume(!(a || b)) -> assume(!a); assume(!b);
  1052. if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
  1053. Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
  1054. II->getName());
  1055. Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
  1056. II->getName());
  1057. return EraseInstFromFunction(*II);
  1058. }
  1059. // assume( (load addr) != null ) -> add 'nonnull' metadata to load
  1060. // (if assume is valid at the load)
  1061. if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
  1062. Value *LHS = ICmp->getOperand(0);
  1063. Value *RHS = ICmp->getOperand(1);
  1064. if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
  1065. isa<LoadInst>(LHS) &&
  1066. isa<Constant>(RHS) &&
  1067. RHS->getType()->isPointerTy() &&
  1068. cast<Constant>(RHS)->isNullValue()) {
  1069. LoadInst* LI = cast<LoadInst>(LHS);
  1070. if (isValidAssumeForContext(II, LI, DT)) {
  1071. MDNode *MD = MDNode::get(II->getContext(), None);
  1072. LI->setMetadata(LLVMContext::MD_nonnull, MD);
  1073. return EraseInstFromFunction(*II);
  1074. }
  1075. }
  1076. // TODO: apply nonnull return attributes to calls and invokes
  1077. // TODO: apply range metadata for range check patterns?
  1078. }
  1079. // If there is a dominating assume with the same condition as this one,
  1080. // then this one is redundant, and should be removed.
  1081. APInt KnownZero(1, 0), KnownOne(1, 0);
  1082. computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
  1083. if (KnownOne.isAllOnesValue())
  1084. return EraseInstFromFunction(*II);
  1085. break;
  1086. }
  1087. case Intrinsic::experimental_gc_relocate: {
  1088. // Translate facts known about a pointer before relocating into
  1089. // facts about the relocate value, while being careful to
  1090. // preserve relocation semantics.
  1091. GCRelocateOperands Operands(II);
  1092. Value *DerivedPtr = Operands.getDerivedPtr();
  1093. auto *GCRelocateType = cast<PointerType>(II->getType());
  1094. // Remove the relocation if unused, note that this check is required
  1095. // to prevent the cases below from looping forever.
  1096. if (II->use_empty())
  1097. return EraseInstFromFunction(*II);
  1098. // Undef is undef, even after relocation.
  1099. // TODO: provide a hook for this in GCStrategy. This is clearly legal for
  1100. // most practical collectors, but there was discussion in the review thread
  1101. // about whether it was legal for all possible collectors.
  1102. if (isa<UndefValue>(DerivedPtr)) {
  1103. // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
  1104. return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
  1105. }
  1106. // The relocation of null will be null for most any collector.
  1107. // TODO: provide a hook for this in GCStrategy. There might be some weird
  1108. // collector this property does not hold for.
  1109. if (isa<ConstantPointerNull>(DerivedPtr)) {
  1110. // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
  1111. return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
  1112. }
  1113. // isKnownNonNull -> nonnull attribute
  1114. if (isKnownNonNull(DerivedPtr))
  1115. II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
  1116. // isDereferenceablePointer -> deref attribute
  1117. if (isDereferenceablePointer(DerivedPtr, DL)) {
  1118. if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
  1119. uint64_t Bytes = A->getDereferenceableBytes();
  1120. II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
  1121. }
  1122. }
  1123. // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
  1124. // Canonicalize on the type from the uses to the defs
  1125. // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
  1126. }
  1127. }
  1128. return visitCallSite(II);
  1129. }
  1130. // InvokeInst simplification
  1131. //
  1132. Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
  1133. return visitCallSite(&II);
  1134. }
  1135. /// isSafeToEliminateVarargsCast - If this cast does not affect the value
  1136. /// passed through the varargs area, we can eliminate the use of the cast.
  1137. static bool isSafeToEliminateVarargsCast(const CallSite CS,
  1138. const DataLayout &DL,
  1139. const CastInst *const CI,
  1140. const int ix) {
  1141. if (!CI->isLosslessCast())
  1142. return false;
  1143. // If this is a GC intrinsic, avoid munging types. We need types for
  1144. // statepoint reconstruction in SelectionDAG.
  1145. // TODO: This is probably something which should be expanded to all
  1146. // intrinsics since the entire point of intrinsics is that
  1147. // they are understandable by the optimizer.
  1148. if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
  1149. return false;
  1150. // The size of ByVal or InAlloca arguments is derived from the type, so we
  1151. // can't change to a type with a different size. If the size were
  1152. // passed explicitly we could avoid this check.
  1153. if (!CS.isByValOrInAllocaArgument(ix))
  1154. return true;
  1155. Type* SrcTy =
  1156. cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
  1157. Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
  1158. if (!SrcTy->isSized() || !DstTy->isSized())
  1159. return false;
  1160. if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
  1161. return false;
  1162. return true;
  1163. }
  1164. // Try to fold some different type of calls here.
  1165. // Currently we're only working with the checking functions, memcpy_chk,
  1166. // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
  1167. // strcat_chk and strncat_chk.
  1168. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
  1169. if (!CI->getCalledFunction()) return nullptr;
  1170. auto InstCombineRAUW = [this](Instruction *From, Value *With) {
  1171. ReplaceInstUsesWith(*From, With);
  1172. };
  1173. LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
  1174. if (Value *With = Simplifier.optimizeCall(CI)) {
  1175. ++NumSimplified;
  1176. return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
  1177. }
  1178. return nullptr;
  1179. }
  1180. static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
  1181. // Strip off at most one level of pointer casts, looking for an alloca. This
  1182. // is good enough in practice and simpler than handling any number of casts.
  1183. Value *Underlying = TrampMem->stripPointerCasts();
  1184. if (Underlying != TrampMem &&
  1185. (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
  1186. return nullptr;
  1187. if (!isa<AllocaInst>(Underlying))
  1188. return nullptr;
  1189. IntrinsicInst *InitTrampoline = nullptr;
  1190. for (User *U : TrampMem->users()) {
  1191. IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
  1192. if (!II)
  1193. return nullptr;
  1194. if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
  1195. if (InitTrampoline)
  1196. // More than one init_trampoline writes to this value. Give up.
  1197. return nullptr;
  1198. InitTrampoline = II;
  1199. continue;
  1200. }
  1201. if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
  1202. // Allow any number of calls to adjust.trampoline.
  1203. continue;
  1204. return nullptr;
  1205. }
  1206. // No call to init.trampoline found.
  1207. if (!InitTrampoline)
  1208. return nullptr;
  1209. // Check that the alloca is being used in the expected way.
  1210. if (InitTrampoline->getOperand(0) != TrampMem)
  1211. return nullptr;
  1212. return InitTrampoline;
  1213. }
  1214. static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
  1215. Value *TrampMem) {
  1216. // Visit all the previous instructions in the basic block, and try to find a
  1217. // init.trampoline which has a direct path to the adjust.trampoline.
  1218. for (BasicBlock::iterator I = AdjustTramp,
  1219. E = AdjustTramp->getParent()->begin(); I != E; ) {
  1220. Instruction *Inst = --I;
  1221. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
  1222. if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
  1223. II->getOperand(0) == TrampMem)
  1224. return II;
  1225. if (Inst->mayWriteToMemory())
  1226. return nullptr;
  1227. }
  1228. return nullptr;
  1229. }
  1230. // Given a call to llvm.adjust.trampoline, find and return the corresponding
  1231. // call to llvm.init.trampoline if the call to the trampoline can be optimized
  1232. // to a direct call to a function. Otherwise return NULL.
  1233. //
  1234. static IntrinsicInst *FindInitTrampoline(Value *Callee) {
  1235. Callee = Callee->stripPointerCasts();
  1236. IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
  1237. if (!AdjustTramp ||
  1238. AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
  1239. return nullptr;
  1240. Value *TrampMem = AdjustTramp->getOperand(0);
  1241. if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
  1242. return IT;
  1243. if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
  1244. return IT;
  1245. return nullptr;
  1246. }
  1247. // visitCallSite - Improvements for call and invoke instructions.
  1248. //
  1249. Instruction *InstCombiner::visitCallSite(CallSite CS) {
  1250. if (isAllocLikeFn(CS.getInstruction(), TLI))
  1251. return visitAllocSite(*CS.getInstruction());
  1252. bool Changed = false;
  1253. // Mark any parameters that are known to be non-null with the nonnull
  1254. // attribute. This is helpful for inlining calls to functions with null
  1255. // checks on their arguments.
  1256. unsigned ArgNo = 0;
  1257. for (Value *V : CS.args()) {
  1258. if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
  1259. isKnownNonNull(V)) {
  1260. AttributeSet AS = CS.getAttributes();
  1261. AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
  1262. Attribute::NonNull);
  1263. CS.setAttributes(AS);
  1264. Changed = true;
  1265. }
  1266. ArgNo++;
  1267. }
  1268. assert(ArgNo == CS.arg_size() && "sanity check");
  1269. // If the callee is a pointer to a function, attempt to move any casts to the
  1270. // arguments of the call/invoke.
  1271. Value *Callee = CS.getCalledValue();
  1272. if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
  1273. return nullptr;
  1274. if (Function *CalleeF = dyn_cast<Function>(Callee))
  1275. // If the call and callee calling conventions don't match, this call must
  1276. // be unreachable, as the call is undefined.
  1277. if (CalleeF->getCallingConv() != CS.getCallingConv() &&
  1278. // Only do this for calls to a function with a body. A prototype may
  1279. // not actually end up matching the implementation's calling conv for a
  1280. // variety of reasons (e.g. it may be written in assembly).
  1281. !CalleeF->isDeclaration()) {
  1282. Instruction *OldCall = CS.getInstruction();
  1283. new StoreInst(ConstantInt::getTrue(Callee->getContext()),
  1284. UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
  1285. OldCall);
  1286. // If OldCall does not return void then replaceAllUsesWith undef.
  1287. // This allows ValueHandlers and custom metadata to adjust itself.
  1288. if (!OldCall->getType()->isVoidTy())
  1289. ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
  1290. if (isa<CallInst>(OldCall))
  1291. return EraseInstFromFunction(*OldCall);
  1292. // We cannot remove an invoke, because it would change the CFG, just
  1293. // change the callee to a null pointer.
  1294. cast<InvokeInst>(OldCall)->setCalledFunction(
  1295. Constant::getNullValue(CalleeF->getType()));
  1296. return nullptr;
  1297. }
  1298. if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
  1299. // If CS does not return void then replaceAllUsesWith undef.
  1300. // This allows ValueHandlers and custom metadata to adjust itself.
  1301. if (!CS.getInstruction()->getType()->isVoidTy())
  1302. ReplaceInstUsesWith(*CS.getInstruction(),
  1303. UndefValue::get(CS.getInstruction()->getType()));
  1304. if (isa<InvokeInst>(CS.getInstruction())) {
  1305. // Can't remove an invoke because we cannot change the CFG.
  1306. return nullptr;
  1307. }
  1308. // This instruction is not reachable, just remove it. We insert a store to
  1309. // undef so that we know that this code is not reachable, despite the fact
  1310. // that we can't modify the CFG here.
  1311. new StoreInst(ConstantInt::getTrue(Callee->getContext()),
  1312. UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
  1313. CS.getInstruction());
  1314. return EraseInstFromFunction(*CS.getInstruction());
  1315. }
  1316. if (IntrinsicInst *II = FindInitTrampoline(Callee))
  1317. return transformCallThroughTrampoline(CS, II);
  1318. PointerType *PTy = cast<PointerType>(Callee->getType());
  1319. FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
  1320. if (FTy->isVarArg()) {
  1321. int ix = FTy->getNumParams();
  1322. // See if we can optimize any arguments passed through the varargs area of
  1323. // the call.
  1324. for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
  1325. E = CS.arg_end(); I != E; ++I, ++ix) {
  1326. CastInst *CI = dyn_cast<CastInst>(*I);
  1327. if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
  1328. *I = CI->getOperand(0);
  1329. Changed = true;
  1330. }
  1331. }
  1332. }
  1333. if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
  1334. // Inline asm calls cannot throw - mark them 'nounwind'.
  1335. CS.setDoesNotThrow();
  1336. Changed = true;
  1337. }
  1338. // Try to optimize the call if possible, we require DataLayout for most of
  1339. // this. None of these calls are seen as possibly dead so go ahead and
  1340. // delete the instruction now.
  1341. if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
  1342. Instruction *I = tryOptimizeCall(CI);
  1343. // If we changed something return the result, etc. Otherwise let
  1344. // the fallthrough check.
  1345. if (I) return EraseInstFromFunction(*I);
  1346. }
  1347. return Changed ? CS.getInstruction() : nullptr;
  1348. }
  1349. // transformConstExprCastCall - If the callee is a constexpr cast of a function,
  1350. // attempt to move the cast to the arguments of the call/invoke.
  1351. //
  1352. bool InstCombiner::transformConstExprCastCall(CallSite CS) {
  1353. Function *Callee =
  1354. dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
  1355. if (!Callee)
  1356. return false;
  1357. // The prototype of thunks are a lie, don't try to directly call such
  1358. // functions.
  1359. if (Callee->hasFnAttribute("thunk"))
  1360. return false;
  1361. Instruction *Caller = CS.getInstruction();
  1362. const AttributeSet &CallerPAL = CS.getAttributes();
  1363. // Okay, this is a cast from a function to a different type. Unless doing so
  1364. // would cause a type conversion of one of our arguments, change this call to
  1365. // be a direct call with arguments casted to the appropriate types.
  1366. //
  1367. FunctionType *FT = Callee->getFunctionType();
  1368. Type *OldRetTy = Caller->getType();
  1369. Type *NewRetTy = FT->getReturnType();
  1370. // Check to see if we are changing the return type...
  1371. if (OldRetTy != NewRetTy) {
  1372. if (NewRetTy->isStructTy())
  1373. return false; // TODO: Handle multiple return values.
  1374. if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
  1375. if (Callee->isDeclaration())
  1376. return false; // Cannot transform this return value.
  1377. if (!Caller->use_empty() &&
  1378. // void -> non-void is handled specially
  1379. !NewRetTy->isVoidTy())
  1380. return false; // Cannot transform this return value.
  1381. }
  1382. if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
  1383. AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
  1384. if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
  1385. return false; // Attribute not compatible with transformed value.
  1386. }
  1387. // If the callsite is an invoke instruction, and the return value is used by
  1388. // a PHI node in a successor, we cannot change the return type of the call
  1389. // because there is no place to put the cast instruction (without breaking
  1390. // the critical edge). Bail out in this case.
  1391. if (!Caller->use_empty())
  1392. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
  1393. for (User *U : II->users())
  1394. if (PHINode *PN = dyn_cast<PHINode>(U))
  1395. if (PN->getParent() == II->getNormalDest() ||
  1396. PN->getParent() == II->getUnwindDest())
  1397. return false;
  1398. }
  1399. unsigned NumActualArgs = CS.arg_size();
  1400. unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
  1401. // Prevent us turning:
  1402. // declare void @takes_i32_inalloca(i32* inalloca)
  1403. // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
  1404. //
  1405. // into:
  1406. // call void @takes_i32_inalloca(i32* null)
  1407. //
  1408. // Similarly, avoid folding away bitcasts of byval calls.
  1409. if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
  1410. Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
  1411. return false;
  1412. CallSite::arg_iterator AI = CS.arg_begin();
  1413. for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
  1414. Type *ParamTy = FT->getParamType(i);
  1415. Type *ActTy = (*AI)->getType();
  1416. if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
  1417. return false; // Cannot transform this parameter value.
  1418. if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
  1419. overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
  1420. return false; // Attribute not compatible with transformed value.
  1421. if (CS.isInAllocaArgument(i))
  1422. return false; // Cannot transform to and from inalloca.
  1423. // If the parameter is passed as a byval argument, then we have to have a
  1424. // sized type and the sized type has to have the same size as the old type.
  1425. if (ParamTy != ActTy &&
  1426. CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
  1427. Attribute::ByVal)) {
  1428. PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
  1429. if (!ParamPTy || !ParamPTy->getElementType()->isSized())
  1430. return false;
  1431. Type *CurElTy = ActTy->getPointerElementType();
  1432. if (DL.getTypeAllocSize(CurElTy) !=
  1433. DL.getTypeAllocSize(ParamPTy->getElementType()))
  1434. return false;
  1435. }
  1436. }
  1437. if (Callee->isDeclaration()) {
  1438. // Do not delete arguments unless we have a function body.
  1439. if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
  1440. return false;
  1441. // If the callee is just a declaration, don't change the varargsness of the
  1442. // call. We don't want to introduce a varargs call where one doesn't
  1443. // already exist.
  1444. PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
  1445. if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
  1446. return false;
  1447. // If both the callee and the cast type are varargs, we still have to make
  1448. // sure the number of fixed parameters are the same or we have the same
  1449. // ABI issues as if we introduce a varargs call.
  1450. if (FT->isVarArg() &&
  1451. cast<FunctionType>(APTy->getElementType())->isVarArg() &&
  1452. FT->getNumParams() !=
  1453. cast<FunctionType>(APTy->getElementType())->getNumParams())
  1454. return false;
  1455. }
  1456. if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
  1457. !CallerPAL.isEmpty())
  1458. // In this case we have more arguments than the new function type, but we
  1459. // won't be dropping them. Check that these extra arguments have attributes
  1460. // that are compatible with being a vararg call argument.
  1461. for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
  1462. unsigned Index = CallerPAL.getSlotIndex(i - 1);
  1463. if (Index <= FT->getNumParams())
  1464. break;
  1465. // Check if it has an attribute that's incompatible with varargs.
  1466. AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
  1467. if (PAttrs.hasAttribute(Index, Attribute::StructRet))
  1468. return false;
  1469. }
  1470. // Okay, we decided that this is a safe thing to do: go ahead and start
  1471. // inserting cast instructions as necessary.
  1472. std::vector<Value*> Args;
  1473. Args.reserve(NumActualArgs);
  1474. SmallVector<AttributeSet, 8> attrVec;
  1475. attrVec.reserve(NumCommonArgs);
  1476. // Get any return attributes.
  1477. AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
  1478. // If the return value is not being used, the type may not be compatible
  1479. // with the existing attributes. Wipe out any problematic attributes.
  1480. RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
  1481. // Add the new return attributes.
  1482. if (RAttrs.hasAttributes())
  1483. attrVec.push_back(AttributeSet::get(Caller->getContext(),
  1484. AttributeSet::ReturnIndex, RAttrs));
  1485. AI = CS.arg_begin();
  1486. for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
  1487. Type *ParamTy = FT->getParamType(i);
  1488. if ((*AI)->getType() == ParamTy) {
  1489. Args.push_back(*AI);
  1490. } else {
  1491. Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
  1492. }
  1493. // Add any parameter attributes.
  1494. AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
  1495. if (PAttrs.hasAttributes())
  1496. attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
  1497. PAttrs));
  1498. }
  1499. // If the function takes more arguments than the call was taking, add them
  1500. // now.
  1501. for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
  1502. Args.push_back(Constant::getNullValue(FT->getParamType(i)));
  1503. // If we are removing arguments to the function, emit an obnoxious warning.
  1504. if (FT->getNumParams() < NumActualArgs) {
  1505. // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
  1506. if (FT->isVarArg()) {
  1507. // Add all of the arguments in their promoted form to the arg list.
  1508. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
  1509. Type *PTy = getPromotedType((*AI)->getType());
  1510. if (PTy != (*AI)->getType()) {
  1511. // Must promote to pass through va_arg area!
  1512. Instruction::CastOps opcode =
  1513. CastInst::getCastOpcode(*AI, false, PTy, false);
  1514. Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
  1515. } else {
  1516. Args.push_back(*AI);
  1517. }
  1518. // Add any parameter attributes.
  1519. AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
  1520. if (PAttrs.hasAttributes())
  1521. attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
  1522. PAttrs));
  1523. }
  1524. }
  1525. }
  1526. AttributeSet FnAttrs = CallerPAL.getFnAttributes();
  1527. if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
  1528. attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
  1529. if (NewRetTy->isVoidTy())
  1530. Caller->setName(""); // Void type should not have a name.
  1531. const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
  1532. attrVec);
  1533. Instruction *NC;
  1534. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  1535. NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
  1536. II->getUnwindDest(), Args);
  1537. NC->takeName(II);
  1538. cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
  1539. cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
  1540. } else {
  1541. CallInst *CI = cast<CallInst>(Caller);
  1542. NC = Builder->CreateCall(Callee, Args);
  1543. NC->takeName(CI);
  1544. if (CI->isTailCall())
  1545. cast<CallInst>(NC)->setTailCall();
  1546. cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
  1547. cast<CallInst>(NC)->setAttributes(NewCallerPAL);
  1548. }
  1549. // Insert a cast of the return type as necessary.
  1550. Value *NV = NC;
  1551. if (OldRetTy != NV->getType() && !Caller->use_empty()) {
  1552. if (!NV->getType()->isVoidTy()) {
  1553. NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
  1554. NC->setDebugLoc(Caller->getDebugLoc());
  1555. // If this is an invoke instruction, we should insert it after the first
  1556. // non-phi, instruction in the normal successor block.
  1557. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  1558. BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
  1559. InsertNewInstBefore(NC, *I);
  1560. } else {
  1561. // Otherwise, it's a call, just insert cast right after the call.
  1562. InsertNewInstBefore(NC, *Caller);
  1563. }
  1564. Worklist.AddUsersToWorkList(*Caller);
  1565. } else {
  1566. NV = UndefValue::get(Caller->getType());
  1567. }
  1568. }
  1569. if (!Caller->use_empty())
  1570. ReplaceInstUsesWith(*Caller, NV);
  1571. else if (Caller->hasValueHandle()) {
  1572. if (OldRetTy == NV->getType())
  1573. ValueHandleBase::ValueIsRAUWd(Caller, NV);
  1574. else
  1575. // We cannot call ValueIsRAUWd with a different type, and the
  1576. // actual tracked value will disappear.
  1577. ValueHandleBase::ValueIsDeleted(Caller);
  1578. }
  1579. EraseInstFromFunction(*Caller);
  1580. return true;
  1581. }
  1582. // transformCallThroughTrampoline - Turn a call to a function created by
  1583. // init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
  1584. // underlying function.
  1585. //
  1586. Instruction *
  1587. InstCombiner::transformCallThroughTrampoline(CallSite CS,
  1588. IntrinsicInst *Tramp) {
  1589. Value *Callee = CS.getCalledValue();
  1590. PointerType *PTy = cast<PointerType>(Callee->getType());
  1591. FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
  1592. const AttributeSet &Attrs = CS.getAttributes();
  1593. // If the call already has the 'nest' attribute somewhere then give up -
  1594. // otherwise 'nest' would occur twice after splicing in the chain.
  1595. if (Attrs.hasAttrSomewhere(Attribute::Nest))
  1596. return nullptr;
  1597. assert(Tramp &&
  1598. "transformCallThroughTrampoline called with incorrect CallSite.");
  1599. Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
  1600. PointerType *NestFPTy = cast<PointerType>(NestF->getType());
  1601. FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
  1602. const AttributeSet &NestAttrs = NestF->getAttributes();
  1603. if (!NestAttrs.isEmpty()) {
  1604. unsigned NestIdx = 1;
  1605. Type *NestTy = nullptr;
  1606. AttributeSet NestAttr;
  1607. // Look for a parameter marked with the 'nest' attribute.
  1608. for (FunctionType::param_iterator I = NestFTy->param_begin(),
  1609. E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
  1610. if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
  1611. // Record the parameter type and any other attributes.
  1612. NestTy = *I;
  1613. NestAttr = NestAttrs.getParamAttributes(NestIdx);
  1614. break;
  1615. }
  1616. if (NestTy) {
  1617. Instruction *Caller = CS.getInstruction();
  1618. std::vector<Value*> NewArgs;
  1619. NewArgs.reserve(CS.arg_size() + 1);
  1620. SmallVector<AttributeSet, 8> NewAttrs;
  1621. NewAttrs.reserve(Attrs.getNumSlots() + 1);
  1622. // Insert the nest argument into the call argument list, which may
  1623. // mean appending it. Likewise for attributes.
  1624. // Add any result attributes.
  1625. if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
  1626. NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
  1627. Attrs.getRetAttributes()));
  1628. {
  1629. unsigned Idx = 1;
  1630. CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
  1631. do {
  1632. if (Idx == NestIdx) {
  1633. // Add the chain argument and attributes.
  1634. Value *NestVal = Tramp->getArgOperand(2);
  1635. if (NestVal->getType() != NestTy)
  1636. NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
  1637. NewArgs.push_back(NestVal);
  1638. NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
  1639. NestAttr));
  1640. }
  1641. if (I == E)
  1642. break;
  1643. // Add the original argument and attributes.
  1644. NewArgs.push_back(*I);
  1645. AttributeSet Attr = Attrs.getParamAttributes(Idx);
  1646. if (Attr.hasAttributes(Idx)) {
  1647. AttrBuilder B(Attr, Idx);
  1648. NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
  1649. Idx + (Idx >= NestIdx), B));
  1650. }
  1651. ++Idx, ++I;
  1652. } while (1);
  1653. }
  1654. // Add any function attributes.
  1655. if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
  1656. NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
  1657. Attrs.getFnAttributes()));
  1658. // The trampoline may have been bitcast to a bogus type (FTy).
  1659. // Handle this by synthesizing a new function type, equal to FTy
  1660. // with the chain parameter inserted.
  1661. std::vector<Type*> NewTypes;
  1662. NewTypes.reserve(FTy->getNumParams()+1);
  1663. // Insert the chain's type into the list of parameter types, which may
  1664. // mean appending it.
  1665. {
  1666. unsigned Idx = 1;
  1667. FunctionType::param_iterator I = FTy->param_begin(),
  1668. E = FTy->param_end();
  1669. do {
  1670. if (Idx == NestIdx)
  1671. // Add the chain's type.
  1672. NewTypes.push_back(NestTy);
  1673. if (I == E)
  1674. break;
  1675. // Add the original type.
  1676. NewTypes.push_back(*I);
  1677. ++Idx, ++I;
  1678. } while (1);
  1679. }
  1680. // Replace the trampoline call with a direct call. Let the generic
  1681. // code sort out any function type mismatches.
  1682. FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
  1683. FTy->isVarArg());
  1684. Constant *NewCallee =
  1685. NestF->getType() == PointerType::getUnqual(NewFTy) ?
  1686. NestF : ConstantExpr::getBitCast(NestF,
  1687. PointerType::getUnqual(NewFTy));
  1688. const AttributeSet &NewPAL =
  1689. AttributeSet::get(FTy->getContext(), NewAttrs);
  1690. Instruction *NewCaller;
  1691. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  1692. NewCaller = InvokeInst::Create(NewCallee,
  1693. II->getNormalDest(), II->getUnwindDest(),
  1694. NewArgs);
  1695. cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
  1696. cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
  1697. } else {
  1698. NewCaller = CallInst::Create(NewCallee, NewArgs);
  1699. if (cast<CallInst>(Caller)->isTailCall())
  1700. cast<CallInst>(NewCaller)->setTailCall();
  1701. cast<CallInst>(NewCaller)->
  1702. setCallingConv(cast<CallInst>(Caller)->getCallingConv());
  1703. cast<CallInst>(NewCaller)->setAttributes(NewPAL);
  1704. }
  1705. return NewCaller;
  1706. }
  1707. }
  1708. // Replace the trampoline call with a direct call. Since there is no 'nest'
  1709. // parameter, there is no need to adjust the argument list. Let the generic
  1710. // code sort out any function type mismatches.
  1711. Constant *NewCallee =
  1712. NestF->getType() == PTy ? NestF :
  1713. ConstantExpr::getBitCast(NestF, PTy);
  1714. CS.setCalledFunction(NewCallee);
  1715. return CS.getInstruction();
  1716. }