InstCombineCalls.cpp 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946
  1. //===- InstCombineCalls.cpp -----------------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the visitCall and visitInvoke functions.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "InstCombineInternal.h"
  14. #include "llvm/ADT/Statistic.h"
  15. #include "llvm/Analysis/InstructionSimplify.h"
  16. #include "llvm/Analysis/MemoryBuiltins.h"
  17. #include "llvm/IR/CallSite.h"
  18. #include "llvm/IR/Dominators.h"
  19. #include "llvm/IR/PatternMatch.h"
  20. #include "llvm/IR/Statepoint.h"
  21. #include "llvm/Transforms/Utils/BuildLibCalls.h"
  22. #include "llvm/Transforms/Utils/Local.h"
  23. #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
  24. using namespace llvm;
  25. using namespace PatternMatch;
  26. #define DEBUG_TYPE "instcombine"
  27. STATISTIC(NumSimplified, "Number of library calls simplified");
  28. /// getPromotedType - Return the specified type promoted as it would be to pass
  29. /// though a va_arg area.
  30. static Type *getPromotedType(Type *Ty) {
  31. if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
  32. if (ITy->getBitWidth() < 32)
  33. return Type::getInt32Ty(Ty->getContext());
  34. }
  35. return Ty;
  36. }
  37. /// reduceToSingleValueType - Given an aggregate type which ultimately holds a
  38. /// single scalar element, like {{{type}}} or [1 x type], return type.
  39. static Type *reduceToSingleValueType(Type *T) {
  40. while (!T->isSingleValueType()) {
  41. if (StructType *STy = dyn_cast<StructType>(T)) {
  42. if (STy->getNumElements() == 1)
  43. T = STy->getElementType(0);
  44. else
  45. break;
  46. } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
  47. if (ATy->getNumElements() == 1)
  48. T = ATy->getElementType();
  49. else
  50. break;
  51. } else
  52. break;
  53. }
  54. return T;
  55. }
  56. Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
  57. unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
  58. unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
  59. unsigned MinAlign = std::min(DstAlign, SrcAlign);
  60. unsigned CopyAlign = MI->getAlignment();
  61. if (CopyAlign < MinAlign) {
  62. MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
  63. MinAlign, false));
  64. return MI;
  65. }
  66. // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
  67. // load/store.
  68. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
  69. if (!MemOpLength) return nullptr;
  70. // Source and destination pointer types are always "i8*" for intrinsic. See
  71. // if the size is something we can handle with a single primitive load/store.
  72. // A single load+store correctly handles overlapping memory in the memmove
  73. // case.
  74. uint64_t Size = MemOpLength->getLimitedValue();
  75. assert(Size && "0-sized memory transferring should be removed already.");
  76. if (Size > 8 || (Size&(Size-1)))
  77. return nullptr; // If not 1/2/4/8 bytes, exit.
  78. // Use an integer load+store unless we can find something better.
  79. unsigned SrcAddrSp =
  80. cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
  81. unsigned DstAddrSp =
  82. cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
  83. IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
  84. Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
  85. Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
  86. // Memcpy forces the use of i8* for the source and destination. That means
  87. // that if you're using memcpy to move one double around, you'll get a cast
  88. // from double* to i8*. We'd much rather use a double load+store rather than
  89. // an i64 load+store, here because this improves the odds that the source or
  90. // dest address will be promotable. See if we can find a better type than the
  91. // integer datatype.
  92. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
  93. MDNode *CopyMD = nullptr;
  94. if (StrippedDest != MI->getArgOperand(0)) {
  95. Type *SrcETy = cast<PointerType>(StrippedDest->getType())
  96. ->getElementType();
  97. if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
  98. // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
  99. // down through these levels if so.
  100. SrcETy = reduceToSingleValueType(SrcETy);
  101. if (SrcETy->isSingleValueType()) {
  102. NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
  103. NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
  104. // If the memcpy has metadata describing the members, see if we can
  105. // get the TBAA tag describing our copy.
  106. if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
  107. if (M->getNumOperands() == 3 && M->getOperand(0) &&
  108. mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
  109. mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
  110. M->getOperand(1) &&
  111. mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
  112. mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
  113. Size &&
  114. M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
  115. CopyMD = cast<MDNode>(M->getOperand(2));
  116. }
  117. }
  118. }
  119. }
  120. // If the memcpy/memmove provides better alignment info than we can
  121. // infer, use it.
  122. SrcAlign = std::max(SrcAlign, CopyAlign);
  123. DstAlign = std::max(DstAlign, CopyAlign);
  124. Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
  125. Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
  126. LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
  127. L->setAlignment(SrcAlign);
  128. if (CopyMD)
  129. L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
  130. StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
  131. S->setAlignment(DstAlign);
  132. if (CopyMD)
  133. S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
  134. // Set the size of the copy to 0, it will be deleted on the next iteration.
  135. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
  136. return MI;
  137. }
  138. Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
  139. unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
  140. if (MI->getAlignment() < Alignment) {
  141. MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
  142. Alignment, false));
  143. return MI;
  144. }
  145. // Extract the length and alignment and fill if they are constant.
  146. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
  147. ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
  148. if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
  149. return nullptr;
  150. uint64_t Len = LenC->getLimitedValue();
  151. Alignment = MI->getAlignment();
  152. assert(Len && "0-sized memory setting should be removed already.");
  153. // memset(s,c,n) -> store s, c (for n=1,2,4,8)
  154. if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
  155. Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
  156. Value *Dest = MI->getDest();
  157. unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
  158. Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
  159. Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
  160. // Alignment 0 is identity for alignment 1 for memset, but not store.
  161. if (Alignment == 0) Alignment = 1;
  162. // Extract the fill value and store.
  163. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
  164. StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
  165. MI->isVolatile());
  166. S->setAlignment(Alignment);
  167. // Set the size of the copy to 0, it will be deleted on the next iteration.
  168. MI->setLength(Constant::getNullValue(LenC->getType()));
  169. return MI;
  170. }
  171. return nullptr;
  172. }
  173. #if 0 // HLSL Change - remove platform intrinsics
  174. static Value *SimplifyX86insertps(const IntrinsicInst &II,
  175. InstCombiner::BuilderTy &Builder) {
  176. if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
  177. VectorType *VecTy = cast<VectorType>(II.getType());
  178. assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
  179. // The immediate permute control byte looks like this:
  180. // [3:0] - zero mask for each 32-bit lane
  181. // [5:4] - select one 32-bit destination lane
  182. // [7:6] - select one 32-bit source lane
  183. uint8_t Imm = CInt->getZExtValue();
  184. uint8_t ZMask = Imm & 0xf;
  185. uint8_t DestLane = (Imm >> 4) & 0x3;
  186. uint8_t SourceLane = (Imm >> 6) & 0x3;
  187. ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
  188. // If all zero mask bits are set, this was just a weird way to
  189. // generate a zero vector.
  190. if (ZMask == 0xf)
  191. return ZeroVector;
  192. // Initialize by passing all of the first source bits through.
  193. int ShuffleMask[4] = { 0, 1, 2, 3 };
  194. // We may replace the second operand with the zero vector.
  195. Value *V1 = II.getArgOperand(1);
  196. if (ZMask) {
  197. // If the zero mask is being used with a single input or the zero mask
  198. // overrides the destination lane, this is a shuffle with the zero vector.
  199. if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
  200. (ZMask & (1 << DestLane))) {
  201. V1 = ZeroVector;
  202. // We may still move 32-bits of the first source vector from one lane
  203. // to another.
  204. ShuffleMask[DestLane] = SourceLane;
  205. // The zero mask may override the previous insert operation.
  206. for (unsigned i = 0; i < 4; ++i)
  207. if ((ZMask >> i) & 0x1)
  208. ShuffleMask[i] = i + 4;
  209. } else {
  210. // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
  211. return nullptr;
  212. }
  213. } else {
  214. // Replace the selected destination lane with the selected source lane.
  215. ShuffleMask[DestLane] = SourceLane + 4;
  216. }
  217. return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
  218. }
  219. return nullptr;
  220. }
  221. /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
  222. /// source vectors, unless a zero bit is set. If a zero bit is set,
  223. /// then ignore that half of the mask and clear that half of the vector.
  224. static Value *SimplifyX86vperm2(const IntrinsicInst &II,
  225. InstCombiner::BuilderTy &Builder) {
  226. if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
  227. VectorType *VecTy = cast<VectorType>(II.getType());
  228. ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
  229. // The immediate permute control byte looks like this:
  230. // [1:0] - select 128 bits from sources for low half of destination
  231. // [2] - ignore
  232. // [3] - zero low half of destination
  233. // [5:4] - select 128 bits from sources for high half of destination
  234. // [6] - ignore
  235. // [7] - zero high half of destination
  236. uint8_t Imm = CInt->getZExtValue();
  237. bool LowHalfZero = Imm & 0x08;
  238. bool HighHalfZero = Imm & 0x80;
  239. // If both zero mask bits are set, this was just a weird way to
  240. // generate a zero vector.
  241. if (LowHalfZero && HighHalfZero)
  242. return ZeroVector;
  243. // If 0 or 1 zero mask bits are set, this is a simple shuffle.
  244. unsigned NumElts = VecTy->getNumElements();
  245. unsigned HalfSize = NumElts / 2;
  246. SmallVector<int, 8> ShuffleMask(NumElts);
  247. // The high bit of the selection field chooses the 1st or 2nd operand.
  248. bool LowInputSelect = Imm & 0x02;
  249. bool HighInputSelect = Imm & 0x20;
  250. // The low bit of the selection field chooses the low or high half
  251. // of the selected operand.
  252. bool LowHalfSelect = Imm & 0x01;
  253. bool HighHalfSelect = Imm & 0x10;
  254. // Determine which operand(s) are actually in use for this instruction.
  255. Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
  256. Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
  257. // If needed, replace operands based on zero mask.
  258. V0 = LowHalfZero ? ZeroVector : V0;
  259. V1 = HighHalfZero ? ZeroVector : V1;
  260. // Permute low half of result.
  261. unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
  262. for (unsigned i = 0; i < HalfSize; ++i)
  263. ShuffleMask[i] = StartIndex + i;
  264. // Permute high half of result.
  265. StartIndex = HighHalfSelect ? HalfSize : 0;
  266. StartIndex += NumElts;
  267. for (unsigned i = 0; i < HalfSize; ++i)
  268. ShuffleMask[i + HalfSize] = StartIndex + i;
  269. return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
  270. }
  271. return nullptr;
  272. }
  273. #endif // HLSL Change - remove platform intrinsics
  274. /// visitCallInst - CallInst simplification. This mostly only handles folding
  275. /// of intrinsic instructions. For normal calls, it allows visitCallSite to do
  276. /// the heavy lifting.
  277. ///
  278. Instruction *InstCombiner::visitCallInst(CallInst &CI) {
  279. auto Args = CI.arg_operands();
  280. if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
  281. TLI, DT, AC))
  282. return ReplaceInstUsesWith(CI, V);
  283. if (isFreeCall(&CI, TLI))
  284. return visitFree(CI);
  285. // If the caller function is nounwind, mark the call as nounwind, even if the
  286. // callee isn't.
  287. if (CI.getParent()->getParent()->doesNotThrow() &&
  288. !CI.doesNotThrow()) {
  289. CI.setDoesNotThrow();
  290. return &CI;
  291. }
  292. IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
  293. if (!II) return visitCallSite(&CI);
  294. // Intrinsics cannot occur in an invoke, so handle them here instead of in
  295. // visitCallSite.
  296. if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
  297. bool Changed = false;
  298. // memmove/cpy/set of zero bytes is a noop.
  299. if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
  300. if (NumBytes->isNullValue())
  301. return EraseInstFromFunction(CI);
  302. if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
  303. if (CI->getZExtValue() == 1) {
  304. // Replace the instruction with just byte operations. We would
  305. // transform other cases to loads/stores, but we don't know if
  306. // alignment is sufficient.
  307. }
  308. }
  309. // No other transformations apply to volatile transfers.
  310. if (MI->isVolatile())
  311. return nullptr;
  312. // If we have a memmove and the source operation is a constant global,
  313. // then the source and dest pointers can't alias, so we can change this
  314. // into a call to memcpy.
  315. if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
  316. if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
  317. if (GVSrc->isConstant()) {
  318. Module *M = CI.getParent()->getParent()->getParent();
  319. Intrinsic::ID MemCpyID = Intrinsic::memcpy;
  320. Type *Tys[3] = { CI.getArgOperand(0)->getType(),
  321. CI.getArgOperand(1)->getType(),
  322. CI.getArgOperand(2)->getType() };
  323. CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
  324. Changed = true;
  325. }
  326. }
  327. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
  328. // memmove(x,x,size) -> noop.
  329. if (MTI->getSource() == MTI->getDest())
  330. return EraseInstFromFunction(CI);
  331. }
  332. // If we can determine a pointer alignment that is bigger than currently
  333. // set, update the alignment.
  334. if (isa<MemTransferInst>(MI)) {
  335. if (Instruction *I = SimplifyMemTransfer(MI))
  336. return I;
  337. } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
  338. if (Instruction *I = SimplifyMemSet(MSI))
  339. return I;
  340. }
  341. if (Changed) return II;
  342. }
  343. switch (II->getIntrinsicID()) {
  344. default: break;
  345. case Intrinsic::objectsize: {
  346. uint64_t Size;
  347. if (getObjectSize(II->getArgOperand(0), Size, DL, TLI))
  348. return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
  349. return nullptr;
  350. }
  351. case Intrinsic::bswap: {
  352. Value *IIOperand = II->getArgOperand(0);
  353. Value *X = nullptr;
  354. // bswap(bswap(x)) -> x
  355. if (match(IIOperand, m_BSwap(m_Value(X))))
  356. return ReplaceInstUsesWith(CI, X);
  357. // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
  358. if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
  359. unsigned C = X->getType()->getPrimitiveSizeInBits() -
  360. IIOperand->getType()->getPrimitiveSizeInBits();
  361. Value *CV = ConstantInt::get(X->getType(), C);
  362. Value *V = Builder->CreateLShr(X, CV);
  363. return new TruncInst(V, IIOperand->getType());
  364. }
  365. break;
  366. }
  367. case Intrinsic::powi:
  368. if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
  369. // powi(x, 0) -> 1.0
  370. if (Power->isZero())
  371. return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
  372. // powi(x, 1) -> x
  373. if (Power->isOne())
  374. return ReplaceInstUsesWith(CI, II->getArgOperand(0));
  375. // powi(x, -1) -> 1/x
  376. if (Power->isAllOnesValue())
  377. return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
  378. II->getArgOperand(0));
  379. }
  380. break;
  381. case Intrinsic::cttz: {
  382. // If all bits below the first known one are known zero,
  383. // this value is constant.
  384. IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
  385. // FIXME: Try to simplify vectors of integers.
  386. if (!IT) break;
  387. uint32_t BitWidth = IT->getBitWidth();
  388. APInt KnownZero(BitWidth, 0);
  389. APInt KnownOne(BitWidth, 0);
  390. computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
  391. unsigned TrailingZeros = KnownOne.countTrailingZeros();
  392. APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
  393. if ((Mask & KnownZero) == Mask)
  394. return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
  395. APInt(BitWidth, TrailingZeros)));
  396. }
  397. break;
  398. case Intrinsic::ctlz: {
  399. // If all bits above the first known one are known zero,
  400. // this value is constant.
  401. IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
  402. // FIXME: Try to simplify vectors of integers.
  403. if (!IT) break;
  404. uint32_t BitWidth = IT->getBitWidth();
  405. APInt KnownZero(BitWidth, 0);
  406. APInt KnownOne(BitWidth, 0);
  407. computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
  408. unsigned LeadingZeros = KnownOne.countLeadingZeros();
  409. APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
  410. if ((Mask & KnownZero) == Mask)
  411. return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
  412. APInt(BitWidth, LeadingZeros)));
  413. }
  414. break;
  415. case Intrinsic::uadd_with_overflow:
  416. case Intrinsic::sadd_with_overflow:
  417. case Intrinsic::umul_with_overflow:
  418. case Intrinsic::smul_with_overflow:
  419. if (isa<Constant>(II->getArgOperand(0)) &&
  420. !isa<Constant>(II->getArgOperand(1))) {
  421. // Canonicalize constants into the RHS.
  422. Value *LHS = II->getArgOperand(0);
  423. II->setArgOperand(0, II->getArgOperand(1));
  424. II->setArgOperand(1, LHS);
  425. return II;
  426. }
  427. // fall through
  428. case Intrinsic::usub_with_overflow:
  429. case Intrinsic::ssub_with_overflow: {
  430. OverflowCheckFlavor OCF =
  431. IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
  432. assert(OCF != OCF_INVALID && "unexpected!");
  433. Value *OperationResult = nullptr;
  434. Constant *OverflowResult = nullptr;
  435. if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
  436. *II, OperationResult, OverflowResult))
  437. return CreateOverflowTuple(II, OperationResult, OverflowResult);
  438. break;
  439. }
  440. case Intrinsic::minnum:
  441. case Intrinsic::maxnum: {
  442. Value *Arg0 = II->getArgOperand(0);
  443. Value *Arg1 = II->getArgOperand(1);
  444. // fmin(x, x) -> x
  445. if (Arg0 == Arg1)
  446. return ReplaceInstUsesWith(CI, Arg0);
  447. const ConstantFP *C0 = dyn_cast<ConstantFP>(Arg0);
  448. const ConstantFP *C1 = dyn_cast<ConstantFP>(Arg1);
  449. // Canonicalize constants into the RHS.
  450. if (C0 && !C1) {
  451. II->setArgOperand(0, Arg1);
  452. II->setArgOperand(1, Arg0);
  453. return II;
  454. }
  455. // fmin(x, nan) -> x
  456. if (C1 && C1->isNaN())
  457. return ReplaceInstUsesWith(CI, Arg0);
  458. // This is the value because if undef were NaN, we would return the other
  459. // value and cannot return a NaN unless both operands are.
  460. //
  461. // fmin(undef, x) -> x
  462. if (isa<UndefValue>(Arg0))
  463. return ReplaceInstUsesWith(CI, Arg1);
  464. // fmin(x, undef) -> x
  465. if (isa<UndefValue>(Arg1))
  466. return ReplaceInstUsesWith(CI, Arg0);
  467. Value *X = nullptr;
  468. Value *Y = nullptr;
  469. if (II->getIntrinsicID() == Intrinsic::minnum) {
  470. // fmin(x, fmin(x, y)) -> fmin(x, y)
  471. // fmin(y, fmin(x, y)) -> fmin(x, y)
  472. if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
  473. if (Arg0 == X || Arg0 == Y)
  474. return ReplaceInstUsesWith(CI, Arg1);
  475. }
  476. // fmin(fmin(x, y), x) -> fmin(x, y)
  477. // fmin(fmin(x, y), y) -> fmin(x, y)
  478. if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
  479. if (Arg1 == X || Arg1 == Y)
  480. return ReplaceInstUsesWith(CI, Arg0);
  481. }
  482. // TODO: fmin(nnan x, inf) -> x
  483. // TODO: fmin(nnan ninf x, flt_max) -> x
  484. if (C1 && C1->isInfinity()) {
  485. // fmin(x, -inf) -> -inf
  486. if (C1->isNegative())
  487. return ReplaceInstUsesWith(CI, Arg1);
  488. }
  489. } else {
  490. assert(II->getIntrinsicID() == Intrinsic::maxnum);
  491. // fmax(x, fmax(x, y)) -> fmax(x, y)
  492. // fmax(y, fmax(x, y)) -> fmax(x, y)
  493. if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
  494. if (Arg0 == X || Arg0 == Y)
  495. return ReplaceInstUsesWith(CI, Arg1);
  496. }
  497. // fmax(fmax(x, y), x) -> fmax(x, y)
  498. // fmax(fmax(x, y), y) -> fmax(x, y)
  499. if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
  500. if (Arg1 == X || Arg1 == Y)
  501. return ReplaceInstUsesWith(CI, Arg0);
  502. }
  503. // TODO: fmax(nnan x, -inf) -> x
  504. // TODO: fmax(nnan ninf x, -flt_max) -> x
  505. if (C1 && C1->isInfinity()) {
  506. // fmax(x, inf) -> inf
  507. if (!C1->isNegative())
  508. return ReplaceInstUsesWith(CI, Arg1);
  509. }
  510. }
  511. break;
  512. }
  513. #if 0 // HLSL Change - remove platform intrinsics
  514. case Intrinsic::ppc_altivec_lvx:
  515. case Intrinsic::ppc_altivec_lvxl:
  516. // Turn PPC lvx -> load if the pointer is known aligned.
  517. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
  518. 16) {
  519. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  520. PointerType::getUnqual(II->getType()));
  521. return new LoadInst(Ptr);
  522. }
  523. break;
  524. case Intrinsic::ppc_vsx_lxvw4x:
  525. case Intrinsic::ppc_vsx_lxvd2x: {
  526. // Turn PPC VSX loads into normal loads.
  527. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  528. PointerType::getUnqual(II->getType()));
  529. return new LoadInst(Ptr, Twine(""), false, 1);
  530. }
  531. case Intrinsic::ppc_altivec_stvx:
  532. case Intrinsic::ppc_altivec_stvxl:
  533. // Turn stvx -> store if the pointer is known aligned.
  534. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
  535. 16) {
  536. Type *OpPtrTy =
  537. PointerType::getUnqual(II->getArgOperand(0)->getType());
  538. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  539. return new StoreInst(II->getArgOperand(0), Ptr);
  540. }
  541. break;
  542. case Intrinsic::ppc_vsx_stxvw4x:
  543. case Intrinsic::ppc_vsx_stxvd2x: {
  544. // Turn PPC VSX stores into normal stores.
  545. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
  546. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  547. return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
  548. }
  549. case Intrinsic::ppc_qpx_qvlfs:
  550. // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
  551. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
  552. 16) {
  553. Type *VTy = VectorType::get(Builder->getFloatTy(),
  554. II->getType()->getVectorNumElements());
  555. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  556. PointerType::getUnqual(VTy));
  557. Value *Load = Builder->CreateLoad(Ptr);
  558. return new FPExtInst(Load, II->getType());
  559. }
  560. break;
  561. case Intrinsic::ppc_qpx_qvlfd:
  562. // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
  563. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
  564. 32) {
  565. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
  566. PointerType::getUnqual(II->getType()));
  567. return new LoadInst(Ptr);
  568. }
  569. break;
  570. case Intrinsic::ppc_qpx_qvstfs:
  571. // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
  572. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
  573. 16) {
  574. Type *VTy = VectorType::get(Builder->getFloatTy(),
  575. II->getArgOperand(0)->getType()->getVectorNumElements());
  576. Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
  577. Type *OpPtrTy = PointerType::getUnqual(VTy);
  578. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  579. return new StoreInst(TOp, Ptr);
  580. }
  581. break;
  582. case Intrinsic::ppc_qpx_qvstfd:
  583. // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
  584. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
  585. 32) {
  586. Type *OpPtrTy =
  587. PointerType::getUnqual(II->getArgOperand(0)->getType());
  588. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
  589. return new StoreInst(II->getArgOperand(0), Ptr);
  590. }
  591. break;
  592. case Intrinsic::x86_sse_storeu_ps:
  593. case Intrinsic::x86_sse2_storeu_pd:
  594. case Intrinsic::x86_sse2_storeu_dq:
  595. // Turn X86 storeu -> store if the pointer is known aligned.
  596. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
  597. 16) {
  598. Type *OpPtrTy =
  599. PointerType::getUnqual(II->getArgOperand(1)->getType());
  600. Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
  601. return new StoreInst(II->getArgOperand(1), Ptr);
  602. }
  603. break;
  604. case Intrinsic::x86_sse_cvtss2si:
  605. case Intrinsic::x86_sse_cvtss2si64:
  606. case Intrinsic::x86_sse_cvttss2si:
  607. case Intrinsic::x86_sse_cvttss2si64:
  608. case Intrinsic::x86_sse2_cvtsd2si:
  609. case Intrinsic::x86_sse2_cvtsd2si64:
  610. case Intrinsic::x86_sse2_cvttsd2si:
  611. case Intrinsic::x86_sse2_cvttsd2si64: {
  612. // These intrinsics only demand the 0th element of their input vectors. If
  613. // we can simplify the input based on that, do so now.
  614. unsigned VWidth =
  615. cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
  616. APInt DemandedElts(VWidth, 1);
  617. APInt UndefElts(VWidth, 0);
  618. if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
  619. DemandedElts, UndefElts)) {
  620. II->setArgOperand(0, V);
  621. return II;
  622. }
  623. break;
  624. }
  625. // Constant fold <A x Bi> << Ci.
  626. // FIXME: We don't handle _dq because it's a shift of an i128, but is
  627. // represented in the IR as <2 x i64>. A per element shift is wrong.
  628. case Intrinsic::x86_sse2_psll_d:
  629. case Intrinsic::x86_sse2_psll_q:
  630. case Intrinsic::x86_sse2_psll_w:
  631. case Intrinsic::x86_sse2_pslli_d:
  632. case Intrinsic::x86_sse2_pslli_q:
  633. case Intrinsic::x86_sse2_pslli_w:
  634. case Intrinsic::x86_avx2_psll_d:
  635. case Intrinsic::x86_avx2_psll_q:
  636. case Intrinsic::x86_avx2_psll_w:
  637. case Intrinsic::x86_avx2_pslli_d:
  638. case Intrinsic::x86_avx2_pslli_q:
  639. case Intrinsic::x86_avx2_pslli_w:
  640. case Intrinsic::x86_sse2_psrl_d:
  641. case Intrinsic::x86_sse2_psrl_q:
  642. case Intrinsic::x86_sse2_psrl_w:
  643. case Intrinsic::x86_sse2_psrli_d:
  644. case Intrinsic::x86_sse2_psrli_q:
  645. case Intrinsic::x86_sse2_psrli_w:
  646. case Intrinsic::x86_avx2_psrl_d:
  647. case Intrinsic::x86_avx2_psrl_q:
  648. case Intrinsic::x86_avx2_psrl_w:
  649. case Intrinsic::x86_avx2_psrli_d:
  650. case Intrinsic::x86_avx2_psrli_q:
  651. case Intrinsic::x86_avx2_psrli_w: {
  652. // Simplify if count is constant. To 0 if >= BitWidth,
  653. // otherwise to shl/lshr.
  654. auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
  655. auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
  656. if (!CDV && !CInt)
  657. break;
  658. ConstantInt *Count;
  659. if (CDV)
  660. Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
  661. else
  662. Count = CInt;
  663. auto Vec = II->getArgOperand(0);
  664. auto VT = cast<VectorType>(Vec->getType());
  665. if (Count->getZExtValue() >
  666. VT->getElementType()->getPrimitiveSizeInBits() - 1)
  667. return ReplaceInstUsesWith(
  668. CI, ConstantAggregateZero::get(Vec->getType()));
  669. bool isPackedShiftLeft = true;
  670. switch (II->getIntrinsicID()) {
  671. default : break;
  672. case Intrinsic::x86_sse2_psrl_d:
  673. case Intrinsic::x86_sse2_psrl_q:
  674. case Intrinsic::x86_sse2_psrl_w:
  675. case Intrinsic::x86_sse2_psrli_d:
  676. case Intrinsic::x86_sse2_psrli_q:
  677. case Intrinsic::x86_sse2_psrli_w:
  678. case Intrinsic::x86_avx2_psrl_d:
  679. case Intrinsic::x86_avx2_psrl_q:
  680. case Intrinsic::x86_avx2_psrl_w:
  681. case Intrinsic::x86_avx2_psrli_d:
  682. case Intrinsic::x86_avx2_psrli_q:
  683. case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
  684. }
  685. unsigned VWidth = VT->getNumElements();
  686. // Get a constant vector of the same type as the first operand.
  687. auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
  688. if (isPackedShiftLeft)
  689. return BinaryOperator::CreateShl(Vec,
  690. Builder->CreateVectorSplat(VWidth, VTCI));
  691. return BinaryOperator::CreateLShr(Vec,
  692. Builder->CreateVectorSplat(VWidth, VTCI));
  693. }
  694. case Intrinsic::x86_sse41_pmovsxbw:
  695. case Intrinsic::x86_sse41_pmovsxwd:
  696. case Intrinsic::x86_sse41_pmovsxdq:
  697. case Intrinsic::x86_sse41_pmovzxbw:
  698. case Intrinsic::x86_sse41_pmovzxwd:
  699. case Intrinsic::x86_sse41_pmovzxdq: {
  700. // pmov{s|z}x ignores the upper half of their input vectors.
  701. unsigned VWidth =
  702. cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
  703. unsigned LowHalfElts = VWidth / 2;
  704. APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
  705. APInt UndefElts(VWidth, 0);
  706. if (Value *TmpV = SimplifyDemandedVectorElts(
  707. II->getArgOperand(0), InputDemandedElts, UndefElts)) {
  708. II->setArgOperand(0, TmpV);
  709. return II;
  710. }
  711. break;
  712. }
  713. case Intrinsic::x86_sse41_insertps:
  714. if (Value *V = SimplifyX86insertps(*II, *Builder))
  715. return ReplaceInstUsesWith(*II, V);
  716. break;
  717. case Intrinsic::x86_sse4a_insertqi: {
  718. // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
  719. // ones undef
  720. // TODO: eventually we should lower this intrinsic to IR
  721. if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
  722. if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
  723. unsigned Index = CIStart->getZExtValue();
  724. // From AMD documentation: "a value of zero in the field length is
  725. // defined as length of 64".
  726. unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
  727. // From AMD documentation: "If the sum of the bit index + length field
  728. // is greater than 64, the results are undefined".
  729. // Note that both field index and field length are 8-bit quantities.
  730. // Since variables 'Index' and 'Length' are unsigned values
  731. // obtained from zero-extending field index and field length
  732. // respectively, their sum should never wrap around.
  733. if ((Index + Length) > 64)
  734. return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
  735. if (Length == 64 && Index == 0) {
  736. Value *Vec = II->getArgOperand(1);
  737. Value *Undef = UndefValue::get(Vec->getType());
  738. const uint32_t Mask[] = { 0, 2 };
  739. return ReplaceInstUsesWith(
  740. CI,
  741. Builder->CreateShuffleVector(
  742. Vec, Undef, ConstantDataVector::get(
  743. II->getContext(), makeArrayRef(Mask))));
  744. } else if (auto Source =
  745. dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
  746. if (Source->hasOneUse() &&
  747. Source->getArgOperand(1) == II->getArgOperand(1)) {
  748. // If the source of the insert has only one use and it's another
  749. // insert (and they're both inserting from the same vector), try to
  750. // bundle both together.
  751. auto CISourceWidth =
  752. dyn_cast<ConstantInt>(Source->getArgOperand(2));
  753. auto CISourceStart =
  754. dyn_cast<ConstantInt>(Source->getArgOperand(3));
  755. if (CISourceStart && CISourceWidth) {
  756. unsigned Start = CIStart->getZExtValue();
  757. unsigned Width = CIWidth->getZExtValue();
  758. unsigned End = Start + Width;
  759. unsigned SourceStart = CISourceStart->getZExtValue();
  760. unsigned SourceWidth = CISourceWidth->getZExtValue();
  761. unsigned SourceEnd = SourceStart + SourceWidth;
  762. unsigned NewStart, NewWidth;
  763. bool ShouldReplace = false;
  764. if (Start <= SourceStart && SourceStart <= End) {
  765. NewStart = Start;
  766. NewWidth = std::max(End, SourceEnd) - NewStart;
  767. ShouldReplace = true;
  768. } else if (SourceStart <= Start && Start <= SourceEnd) {
  769. NewStart = SourceStart;
  770. NewWidth = std::max(SourceEnd, End) - NewStart;
  771. ShouldReplace = true;
  772. }
  773. if (ShouldReplace) {
  774. Constant *ConstantWidth = ConstantInt::get(
  775. II->getArgOperand(2)->getType(), NewWidth, false);
  776. Constant *ConstantStart = ConstantInt::get(
  777. II->getArgOperand(3)->getType(), NewStart, false);
  778. Value *Args[4] = { Source->getArgOperand(0),
  779. II->getArgOperand(1), ConstantWidth,
  780. ConstantStart };
  781. Module *M = CI.getParent()->getParent()->getParent();
  782. Value *F =
  783. Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
  784. return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
  785. }
  786. }
  787. }
  788. }
  789. }
  790. }
  791. break;
  792. }
  793. case Intrinsic::x86_sse41_pblendvb:
  794. case Intrinsic::x86_sse41_blendvps:
  795. case Intrinsic::x86_sse41_blendvpd:
  796. case Intrinsic::x86_avx_blendv_ps_256:
  797. case Intrinsic::x86_avx_blendv_pd_256:
  798. case Intrinsic::x86_avx2_pblendvb: {
  799. // Convert blendv* to vector selects if the mask is constant.
  800. // This optimization is convoluted because the intrinsic is defined as
  801. // getting a vector of floats or doubles for the ps and pd versions.
  802. // FIXME: That should be changed.
  803. Value *Mask = II->getArgOperand(2);
  804. if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
  805. auto Tyi1 = Builder->getInt1Ty();
  806. auto SelectorType = cast<VectorType>(Mask->getType());
  807. auto EltTy = SelectorType->getElementType();
  808. unsigned Size = SelectorType->getNumElements();
  809. unsigned BitWidth =
  810. EltTy->isFloatTy()
  811. ? 32
  812. : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth());
  813. assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) &&
  814. "Wrong arguments for variable blend intrinsic");
  815. SmallVector<Constant *, 32> Selectors;
  816. for (unsigned I = 0; I < Size; ++I) {
  817. // The intrinsics only read the top bit
  818. uint64_t Selector;
  819. if (BitWidth == 8)
  820. Selector = C->getElementAsInteger(I);
  821. else
  822. Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue();
  823. Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
  824. }
  825. auto NewSelector = ConstantVector::get(Selectors);
  826. return SelectInst::Create(NewSelector, II->getArgOperand(1),
  827. II->getArgOperand(0), "blendv");
  828. } else {
  829. break;
  830. }
  831. }
  832. case Intrinsic::x86_avx_vpermilvar_ps:
  833. case Intrinsic::x86_avx_vpermilvar_ps_256:
  834. case Intrinsic::x86_avx_vpermilvar_pd:
  835. case Intrinsic::x86_avx_vpermilvar_pd_256: {
  836. // Convert vpermil* to shufflevector if the mask is constant.
  837. Value *V = II->getArgOperand(1);
  838. unsigned Size = cast<VectorType>(V->getType())->getNumElements();
  839. assert(Size == 8 || Size == 4 || Size == 2);
  840. uint32_t Indexes[8];
  841. if (auto C = dyn_cast<ConstantDataVector>(V)) {
  842. // The intrinsics only read one or two bits, clear the rest.
  843. for (unsigned I = 0; I < Size; ++I) {
  844. uint32_t Index = C->getElementAsInteger(I) & 0x3;
  845. if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
  846. II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
  847. Index >>= 1;
  848. Indexes[I] = Index;
  849. }
  850. } else if (isa<ConstantAggregateZero>(V)) {
  851. for (unsigned I = 0; I < Size; ++I)
  852. Indexes[I] = 0;
  853. } else {
  854. break;
  855. }
  856. // The _256 variants are a bit trickier since the mask bits always index
  857. // into the corresponding 128 half. In order to convert to a generic
  858. // shuffle, we have to make that explicit.
  859. if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
  860. II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
  861. for (unsigned I = Size / 2; I < Size; ++I)
  862. Indexes[I] += Size / 2;
  863. }
  864. auto NewC =
  865. ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
  866. auto V1 = II->getArgOperand(0);
  867. auto V2 = UndefValue::get(V1->getType());
  868. auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
  869. return ReplaceInstUsesWith(CI, Shuffle);
  870. }
  871. case Intrinsic::x86_avx_vperm2f128_pd_256:
  872. case Intrinsic::x86_avx_vperm2f128_ps_256:
  873. case Intrinsic::x86_avx_vperm2f128_si_256:
  874. case Intrinsic::x86_avx2_vperm2i128:
  875. if (Value *V = SimplifyX86vperm2(*II, *Builder))
  876. return ReplaceInstUsesWith(*II, V);
  877. break;
  878. case Intrinsic::ppc_altivec_vperm:
  879. // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
  880. // Note that ppc_altivec_vperm has a big-endian bias, so when creating
  881. // a vectorshuffle for little endian, we must undo the transformation
  882. // performed on vec_perm in altivec.h. That is, we must complement
  883. // the permutation mask with respect to 31 and reverse the order of
  884. // V1 and V2.
  885. if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
  886. assert(Mask->getType()->getVectorNumElements() == 16 &&
  887. "Bad type for intrinsic!");
  888. // Check that all of the elements are integer constants or undefs.
  889. bool AllEltsOk = true;
  890. for (unsigned i = 0; i != 16; ++i) {
  891. Constant *Elt = Mask->getAggregateElement(i);
  892. if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
  893. AllEltsOk = false;
  894. break;
  895. }
  896. }
  897. if (AllEltsOk) {
  898. // Cast the input vectors to byte vectors.
  899. Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
  900. Mask->getType());
  901. Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
  902. Mask->getType());
  903. Value *Result = UndefValue::get(Op0->getType());
  904. // Only extract each element once.
  905. Value *ExtractedElts[32];
  906. memset(ExtractedElts, 0, sizeof(ExtractedElts));
  907. for (unsigned i = 0; i != 16; ++i) {
  908. if (isa<UndefValue>(Mask->getAggregateElement(i)))
  909. continue;
  910. unsigned Idx =
  911. cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
  912. Idx &= 31; // Match the hardware behavior.
  913. if (DL.isLittleEndian())
  914. Idx = 31 - Idx;
  915. if (!ExtractedElts[Idx]) {
  916. Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
  917. Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
  918. ExtractedElts[Idx] =
  919. Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
  920. Builder->getInt32(Idx&15));
  921. }
  922. // Insert this value into the result vector.
  923. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
  924. Builder->getInt32(i));
  925. }
  926. return CastInst::Create(Instruction::BitCast, Result, CI.getType());
  927. }
  928. }
  929. break;
  930. case Intrinsic::arm_neon_vld1:
  931. case Intrinsic::arm_neon_vld2:
  932. case Intrinsic::arm_neon_vld3:
  933. case Intrinsic::arm_neon_vld4:
  934. case Intrinsic::arm_neon_vld2lane:
  935. case Intrinsic::arm_neon_vld3lane:
  936. case Intrinsic::arm_neon_vld4lane:
  937. case Intrinsic::arm_neon_vst1:
  938. case Intrinsic::arm_neon_vst2:
  939. case Intrinsic::arm_neon_vst3:
  940. case Intrinsic::arm_neon_vst4:
  941. case Intrinsic::arm_neon_vst2lane:
  942. case Intrinsic::arm_neon_vst3lane:
  943. case Intrinsic::arm_neon_vst4lane: {
  944. unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
  945. unsigned AlignArg = II->getNumArgOperands() - 1;
  946. ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
  947. if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
  948. II->setArgOperand(AlignArg,
  949. ConstantInt::get(Type::getInt32Ty(II->getContext()),
  950. MemAlign, false));
  951. return II;
  952. }
  953. break;
  954. }
  955. case Intrinsic::arm_neon_vmulls:
  956. case Intrinsic::arm_neon_vmullu:
  957. case Intrinsic::aarch64_neon_smull:
  958. case Intrinsic::aarch64_neon_umull: {
  959. Value *Arg0 = II->getArgOperand(0);
  960. Value *Arg1 = II->getArgOperand(1);
  961. // Handle mul by zero first:
  962. if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
  963. return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
  964. }
  965. // Check for constant LHS & RHS - in this case we just simplify.
  966. bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
  967. II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
  968. VectorType *NewVT = cast<VectorType>(II->getType());
  969. if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
  970. if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
  971. CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
  972. CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
  973. return ReplaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
  974. }
  975. // Couldn't simplify - canonicalize constant to the RHS.
  976. std::swap(Arg0, Arg1);
  977. }
  978. // Handle mul by one:
  979. if (Constant *CV1 = dyn_cast<Constant>(Arg1))
  980. if (ConstantInt *Splat =
  981. dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
  982. if (Splat->isOne())
  983. return CastInst::CreateIntegerCast(Arg0, II->getType(),
  984. /*isSigned=*/!Zext);
  985. break;
  986. }
  987. case Intrinsic::AMDGPU_rcp: {
  988. if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
  989. const APFloat &ArgVal = C->getValueAPF();
  990. APFloat Val(ArgVal.getSemantics(), 1.0);
  991. APFloat::opStatus Status = Val.divide(ArgVal,
  992. APFloat::rmNearestTiesToEven);
  993. // Only do this if it was exact and therefore not dependent on the
  994. // rounding mode.
  995. if (Status == APFloat::opOK)
  996. return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
  997. }
  998. break;
  999. }
  1000. #endif // HLSL Change - remove platform intrinsics
  1001. case Intrinsic::stackrestore: {
  1002. // If the save is right next to the restore, remove the restore. This can
  1003. // happen when variable allocas are DCE'd.
  1004. if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
  1005. if (SS->getIntrinsicID() == Intrinsic::stacksave) {
  1006. BasicBlock::iterator BI = SS;
  1007. if (&*++BI == II)
  1008. return EraseInstFromFunction(CI);
  1009. }
  1010. }
  1011. // Scan down this block to see if there is another stack restore in the
  1012. // same block without an intervening call/alloca.
  1013. BasicBlock::iterator BI = II;
  1014. TerminatorInst *TI = II->getParent()->getTerminator();
  1015. bool CannotRemove = false;
  1016. for (++BI; &*BI != TI; ++BI) {
  1017. if (isa<AllocaInst>(BI)) {
  1018. CannotRemove = true;
  1019. break;
  1020. }
  1021. if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
  1022. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
  1023. // If there is a stackrestore below this one, remove this one.
  1024. if (II->getIntrinsicID() == Intrinsic::stackrestore)
  1025. return EraseInstFromFunction(CI);
  1026. // Otherwise, ignore the intrinsic.
  1027. } else {
  1028. // If we found a non-intrinsic call, we can't remove the stack
  1029. // restore.
  1030. CannotRemove = true;
  1031. break;
  1032. }
  1033. }
  1034. }
  1035. // If the stack restore is in a return, resume, or unwind block and if there
  1036. // are no allocas or calls between the restore and the return, nuke the
  1037. // restore.
  1038. if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
  1039. return EraseInstFromFunction(CI);
  1040. break;
  1041. }
  1042. case Intrinsic::assume: {
  1043. // Canonicalize assume(a && b) -> assume(a); assume(b);
  1044. // Note: New assumption intrinsics created here are registered by
  1045. // the InstCombineIRInserter object.
  1046. Value *IIOperand = II->getArgOperand(0), *A, *B,
  1047. *AssumeIntrinsic = II->getCalledValue();
  1048. if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
  1049. Builder->CreateCall(AssumeIntrinsic, A, II->getName());
  1050. Builder->CreateCall(AssumeIntrinsic, B, II->getName());
  1051. return EraseInstFromFunction(*II);
  1052. }
  1053. // assume(!(a || b)) -> assume(!a); assume(!b);
  1054. if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
  1055. Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
  1056. II->getName());
  1057. Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
  1058. II->getName());
  1059. return EraseInstFromFunction(*II);
  1060. }
  1061. // assume( (load addr) != null ) -> add 'nonnull' metadata to load
  1062. // (if assume is valid at the load)
  1063. if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
  1064. Value *LHS = ICmp->getOperand(0);
  1065. Value *RHS = ICmp->getOperand(1);
  1066. if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
  1067. isa<LoadInst>(LHS) &&
  1068. isa<Constant>(RHS) &&
  1069. RHS->getType()->isPointerTy() &&
  1070. cast<Constant>(RHS)->isNullValue()) {
  1071. LoadInst* LI = cast<LoadInst>(LHS);
  1072. if (isValidAssumeForContext(II, LI, DT)) {
  1073. MDNode *MD = MDNode::get(II->getContext(), None);
  1074. LI->setMetadata(LLVMContext::MD_nonnull, MD);
  1075. return EraseInstFromFunction(*II);
  1076. }
  1077. }
  1078. // TODO: apply nonnull return attributes to calls and invokes
  1079. // TODO: apply range metadata for range check patterns?
  1080. }
  1081. // If there is a dominating assume with the same condition as this one,
  1082. // then this one is redundant, and should be removed.
  1083. APInt KnownZero(1, 0), KnownOne(1, 0);
  1084. computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
  1085. if (KnownOne.isAllOnesValue())
  1086. return EraseInstFromFunction(*II);
  1087. break;
  1088. }
  1089. case Intrinsic::experimental_gc_relocate: {
  1090. // Translate facts known about a pointer before relocating into
  1091. // facts about the relocate value, while being careful to
  1092. // preserve relocation semantics.
  1093. GCRelocateOperands Operands(II);
  1094. Value *DerivedPtr = Operands.getDerivedPtr();
  1095. auto *GCRelocateType = cast<PointerType>(II->getType());
  1096. // Remove the relocation if unused, note that this check is required
  1097. // to prevent the cases below from looping forever.
  1098. if (II->use_empty())
  1099. return EraseInstFromFunction(*II);
  1100. // Undef is undef, even after relocation.
  1101. // TODO: provide a hook for this in GCStrategy. This is clearly legal for
  1102. // most practical collectors, but there was discussion in the review thread
  1103. // about whether it was legal for all possible collectors.
  1104. if (isa<UndefValue>(DerivedPtr)) {
  1105. // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
  1106. return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
  1107. }
  1108. // The relocation of null will be null for most any collector.
  1109. // TODO: provide a hook for this in GCStrategy. There might be some weird
  1110. // collector this property does not hold for.
  1111. if (isa<ConstantPointerNull>(DerivedPtr)) {
  1112. // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
  1113. return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
  1114. }
  1115. // isKnownNonNull -> nonnull attribute
  1116. if (isKnownNonNull(DerivedPtr))
  1117. II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
  1118. // isDereferenceablePointer -> deref attribute
  1119. if (isDereferenceablePointer(DerivedPtr, DL)) {
  1120. if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
  1121. uint64_t Bytes = A->getDereferenceableBytes();
  1122. II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
  1123. }
  1124. }
  1125. // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
  1126. // Canonicalize on the type from the uses to the defs
  1127. // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
  1128. }
  1129. }
  1130. return visitCallSite(II);
  1131. }
  1132. // InvokeInst simplification
  1133. //
  1134. Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
  1135. return visitCallSite(&II);
  1136. }
  1137. /// isSafeToEliminateVarargsCast - If this cast does not affect the value
  1138. /// passed through the varargs area, we can eliminate the use of the cast.
  1139. static bool isSafeToEliminateVarargsCast(const CallSite CS,
  1140. const DataLayout &DL,
  1141. const CastInst *const CI,
  1142. const int ix) {
  1143. if (!CI->isLosslessCast())
  1144. return false;
  1145. // If this is a GC intrinsic, avoid munging types. We need types for
  1146. // statepoint reconstruction in SelectionDAG.
  1147. // TODO: This is probably something which should be expanded to all
  1148. // intrinsics since the entire point of intrinsics is that
  1149. // they are understandable by the optimizer.
  1150. if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
  1151. return false;
  1152. // The size of ByVal or InAlloca arguments is derived from the type, so we
  1153. // can't change to a type with a different size. If the size were
  1154. // passed explicitly we could avoid this check.
  1155. if (!CS.isByValOrInAllocaArgument(ix))
  1156. return true;
  1157. Type* SrcTy =
  1158. cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
  1159. Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
  1160. if (!SrcTy->isSized() || !DstTy->isSized())
  1161. return false;
  1162. if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
  1163. return false;
  1164. return true;
  1165. }
  1166. // Try to fold some different type of calls here.
  1167. // Currently we're only working with the checking functions, memcpy_chk,
  1168. // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
  1169. // strcat_chk and strncat_chk.
  1170. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
  1171. if (!CI->getCalledFunction()) return nullptr;
  1172. auto InstCombineRAUW = [this](Instruction *From, Value *With) {
  1173. ReplaceInstUsesWith(*From, With);
  1174. };
  1175. LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
  1176. if (Value *With = Simplifier.optimizeCall(CI)) {
  1177. ++NumSimplified;
  1178. return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
  1179. }
  1180. return nullptr;
  1181. }
  1182. static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
  1183. // Strip off at most one level of pointer casts, looking for an alloca. This
  1184. // is good enough in practice and simpler than handling any number of casts.
  1185. Value *Underlying = TrampMem->stripPointerCasts();
  1186. if (Underlying != TrampMem &&
  1187. (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
  1188. return nullptr;
  1189. if (!isa<AllocaInst>(Underlying))
  1190. return nullptr;
  1191. IntrinsicInst *InitTrampoline = nullptr;
  1192. for (User *U : TrampMem->users()) {
  1193. IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
  1194. if (!II)
  1195. return nullptr;
  1196. if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
  1197. if (InitTrampoline)
  1198. // More than one init_trampoline writes to this value. Give up.
  1199. return nullptr;
  1200. InitTrampoline = II;
  1201. continue;
  1202. }
  1203. if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
  1204. // Allow any number of calls to adjust.trampoline.
  1205. continue;
  1206. return nullptr;
  1207. }
  1208. // No call to init.trampoline found.
  1209. if (!InitTrampoline)
  1210. return nullptr;
  1211. // Check that the alloca is being used in the expected way.
  1212. if (InitTrampoline->getOperand(0) != TrampMem)
  1213. return nullptr;
  1214. return InitTrampoline;
  1215. }
  1216. static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
  1217. Value *TrampMem) {
  1218. // Visit all the previous instructions in the basic block, and try to find a
  1219. // init.trampoline which has a direct path to the adjust.trampoline.
  1220. for (BasicBlock::iterator I = AdjustTramp,
  1221. E = AdjustTramp->getParent()->begin(); I != E; ) {
  1222. Instruction *Inst = --I;
  1223. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
  1224. if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
  1225. II->getOperand(0) == TrampMem)
  1226. return II;
  1227. if (Inst->mayWriteToMemory())
  1228. return nullptr;
  1229. }
  1230. return nullptr;
  1231. }
  1232. // Given a call to llvm.adjust.trampoline, find and return the corresponding
  1233. // call to llvm.init.trampoline if the call to the trampoline can be optimized
  1234. // to a direct call to a function. Otherwise return NULL.
  1235. //
  1236. static IntrinsicInst *FindInitTrampoline(Value *Callee) {
  1237. Callee = Callee->stripPointerCasts();
  1238. IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
  1239. if (!AdjustTramp ||
  1240. AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
  1241. return nullptr;
  1242. Value *TrampMem = AdjustTramp->getOperand(0);
  1243. if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
  1244. return IT;
  1245. if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
  1246. return IT;
  1247. return nullptr;
  1248. }
  1249. // visitCallSite - Improvements for call and invoke instructions.
  1250. //
  1251. Instruction *InstCombiner::visitCallSite(CallSite CS) {
  1252. if (isAllocLikeFn(CS.getInstruction(), TLI))
  1253. return visitAllocSite(*CS.getInstruction());
  1254. bool Changed = false;
  1255. // Mark any parameters that are known to be non-null with the nonnull
  1256. // attribute. This is helpful for inlining calls to functions with null
  1257. // checks on their arguments.
  1258. unsigned ArgNo = 0;
  1259. for (Value *V : CS.args()) {
  1260. if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
  1261. isKnownNonNull(V)) {
  1262. AttributeSet AS = CS.getAttributes();
  1263. AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
  1264. Attribute::NonNull);
  1265. CS.setAttributes(AS);
  1266. Changed = true;
  1267. }
  1268. ArgNo++;
  1269. }
  1270. assert(ArgNo == CS.arg_size() && "sanity check");
  1271. // If the callee is a pointer to a function, attempt to move any casts to the
  1272. // arguments of the call/invoke.
  1273. Value *Callee = CS.getCalledValue();
  1274. if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
  1275. return nullptr;
  1276. if (Function *CalleeF = dyn_cast<Function>(Callee))
  1277. // If the call and callee calling conventions don't match, this call must
  1278. // be unreachable, as the call is undefined.
  1279. if (CalleeF->getCallingConv() != CS.getCallingConv() &&
  1280. // Only do this for calls to a function with a body. A prototype may
  1281. // not actually end up matching the implementation's calling conv for a
  1282. // variety of reasons (e.g. it may be written in assembly).
  1283. !CalleeF->isDeclaration()) {
  1284. Instruction *OldCall = CS.getInstruction();
  1285. new StoreInst(ConstantInt::getTrue(Callee->getContext()),
  1286. UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
  1287. OldCall);
  1288. // If OldCall does not return void then replaceAllUsesWith undef.
  1289. // This allows ValueHandlers and custom metadata to adjust itself.
  1290. if (!OldCall->getType()->isVoidTy())
  1291. ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
  1292. if (isa<CallInst>(OldCall))
  1293. return EraseInstFromFunction(*OldCall);
  1294. // We cannot remove an invoke, because it would change the CFG, just
  1295. // change the callee to a null pointer.
  1296. cast<InvokeInst>(OldCall)->setCalledFunction(
  1297. Constant::getNullValue(CalleeF->getType()));
  1298. return nullptr;
  1299. }
  1300. if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
  1301. // If CS does not return void then replaceAllUsesWith undef.
  1302. // This allows ValueHandlers and custom metadata to adjust itself.
  1303. if (!CS.getInstruction()->getType()->isVoidTy())
  1304. ReplaceInstUsesWith(*CS.getInstruction(),
  1305. UndefValue::get(CS.getInstruction()->getType()));
  1306. if (isa<InvokeInst>(CS.getInstruction())) {
  1307. // Can't remove an invoke because we cannot change the CFG.
  1308. return nullptr;
  1309. }
  1310. // This instruction is not reachable, just remove it. We insert a store to
  1311. // undef so that we know that this code is not reachable, despite the fact
  1312. // that we can't modify the CFG here.
  1313. new StoreInst(ConstantInt::getTrue(Callee->getContext()),
  1314. UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
  1315. CS.getInstruction());
  1316. return EraseInstFromFunction(*CS.getInstruction());
  1317. }
  1318. if (IntrinsicInst *II = FindInitTrampoline(Callee))
  1319. return transformCallThroughTrampoline(CS, II);
  1320. PointerType *PTy = cast<PointerType>(Callee->getType());
  1321. FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
  1322. if (FTy->isVarArg()) {
  1323. int ix = FTy->getNumParams();
  1324. // See if we can optimize any arguments passed through the varargs area of
  1325. // the call.
  1326. for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
  1327. E = CS.arg_end(); I != E; ++I, ++ix) {
  1328. CastInst *CI = dyn_cast<CastInst>(*I);
  1329. if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
  1330. *I = CI->getOperand(0);
  1331. Changed = true;
  1332. }
  1333. }
  1334. }
  1335. if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
  1336. // Inline asm calls cannot throw - mark them 'nounwind'.
  1337. CS.setDoesNotThrow();
  1338. Changed = true;
  1339. }
  1340. // Try to optimize the call if possible, we require DataLayout for most of
  1341. // this. None of these calls are seen as possibly dead so go ahead and
  1342. // delete the instruction now.
  1343. if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
  1344. Instruction *I = tryOptimizeCall(CI);
  1345. // If we changed something return the result, etc. Otherwise let
  1346. // the fallthrough check.
  1347. if (I) return EraseInstFromFunction(*I);
  1348. }
  1349. return Changed ? CS.getInstruction() : nullptr;
  1350. }
  1351. // transformConstExprCastCall - If the callee is a constexpr cast of a function,
  1352. // attempt to move the cast to the arguments of the call/invoke.
  1353. //
  1354. bool InstCombiner::transformConstExprCastCall(CallSite CS) {
  1355. Function *Callee =
  1356. dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
  1357. if (!Callee)
  1358. return false;
  1359. // The prototype of thunks are a lie, don't try to directly call such
  1360. // functions.
  1361. if (Callee->hasFnAttribute("thunk"))
  1362. return false;
  1363. Instruction *Caller = CS.getInstruction();
  1364. const AttributeSet &CallerPAL = CS.getAttributes();
  1365. // Okay, this is a cast from a function to a different type. Unless doing so
  1366. // would cause a type conversion of one of our arguments, change this call to
  1367. // be a direct call with arguments casted to the appropriate types.
  1368. //
  1369. FunctionType *FT = Callee->getFunctionType();
  1370. Type *OldRetTy = Caller->getType();
  1371. Type *NewRetTy = FT->getReturnType();
  1372. // Check to see if we are changing the return type...
  1373. if (OldRetTy != NewRetTy) {
  1374. if (NewRetTy->isStructTy())
  1375. return false; // TODO: Handle multiple return values.
  1376. if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
  1377. if (Callee->isDeclaration())
  1378. return false; // Cannot transform this return value.
  1379. if (!Caller->use_empty() &&
  1380. // void -> non-void is handled specially
  1381. !NewRetTy->isVoidTy())
  1382. return false; // Cannot transform this return value.
  1383. }
  1384. if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
  1385. AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
  1386. if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
  1387. return false; // Attribute not compatible with transformed value.
  1388. }
  1389. // If the callsite is an invoke instruction, and the return value is used by
  1390. // a PHI node in a successor, we cannot change the return type of the call
  1391. // because there is no place to put the cast instruction (without breaking
  1392. // the critical edge). Bail out in this case.
  1393. if (!Caller->use_empty())
  1394. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
  1395. for (User *U : II->users())
  1396. if (PHINode *PN = dyn_cast<PHINode>(U))
  1397. if (PN->getParent() == II->getNormalDest() ||
  1398. PN->getParent() == II->getUnwindDest())
  1399. return false;
  1400. }
  1401. unsigned NumActualArgs = CS.arg_size();
  1402. unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
  1403. // Prevent us turning:
  1404. // declare void @takes_i32_inalloca(i32* inalloca)
  1405. // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
  1406. //
  1407. // into:
  1408. // call void @takes_i32_inalloca(i32* null)
  1409. //
  1410. // Similarly, avoid folding away bitcasts of byval calls.
  1411. if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
  1412. Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
  1413. return false;
  1414. CallSite::arg_iterator AI = CS.arg_begin();
  1415. for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
  1416. Type *ParamTy = FT->getParamType(i);
  1417. Type *ActTy = (*AI)->getType();
  1418. if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
  1419. return false; // Cannot transform this parameter value.
  1420. if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
  1421. overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
  1422. return false; // Attribute not compatible with transformed value.
  1423. if (CS.isInAllocaArgument(i))
  1424. return false; // Cannot transform to and from inalloca.
  1425. // If the parameter is passed as a byval argument, then we have to have a
  1426. // sized type and the sized type has to have the same size as the old type.
  1427. if (ParamTy != ActTy &&
  1428. CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
  1429. Attribute::ByVal)) {
  1430. PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
  1431. if (!ParamPTy || !ParamPTy->getElementType()->isSized())
  1432. return false;
  1433. Type *CurElTy = ActTy->getPointerElementType();
  1434. if (DL.getTypeAllocSize(CurElTy) !=
  1435. DL.getTypeAllocSize(ParamPTy->getElementType()))
  1436. return false;
  1437. }
  1438. }
  1439. if (Callee->isDeclaration()) {
  1440. // Do not delete arguments unless we have a function body.
  1441. if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
  1442. return false;
  1443. // If the callee is just a declaration, don't change the varargsness of the
  1444. // call. We don't want to introduce a varargs call where one doesn't
  1445. // already exist.
  1446. PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
  1447. if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
  1448. return false;
  1449. // If both the callee and the cast type are varargs, we still have to make
  1450. // sure the number of fixed parameters are the same or we have the same
  1451. // ABI issues as if we introduce a varargs call.
  1452. if (FT->isVarArg() &&
  1453. cast<FunctionType>(APTy->getElementType())->isVarArg() &&
  1454. FT->getNumParams() !=
  1455. cast<FunctionType>(APTy->getElementType())->getNumParams())
  1456. return false;
  1457. }
  1458. if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
  1459. !CallerPAL.isEmpty())
  1460. // In this case we have more arguments than the new function type, but we
  1461. // won't be dropping them. Check that these extra arguments have attributes
  1462. // that are compatible with being a vararg call argument.
  1463. for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
  1464. unsigned Index = CallerPAL.getSlotIndex(i - 1);
  1465. if (Index <= FT->getNumParams())
  1466. break;
  1467. // Check if it has an attribute that's incompatible with varargs.
  1468. AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
  1469. if (PAttrs.hasAttribute(Index, Attribute::StructRet))
  1470. return false;
  1471. }
  1472. // Okay, we decided that this is a safe thing to do: go ahead and start
  1473. // inserting cast instructions as necessary.
  1474. std::vector<Value*> Args;
  1475. Args.reserve(NumActualArgs);
  1476. SmallVector<AttributeSet, 8> attrVec;
  1477. attrVec.reserve(NumCommonArgs);
  1478. // Get any return attributes.
  1479. AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
  1480. // If the return value is not being used, the type may not be compatible
  1481. // with the existing attributes. Wipe out any problematic attributes.
  1482. RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
  1483. // Add the new return attributes.
  1484. if (RAttrs.hasAttributes())
  1485. attrVec.push_back(AttributeSet::get(Caller->getContext(),
  1486. AttributeSet::ReturnIndex, RAttrs));
  1487. AI = CS.arg_begin();
  1488. for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
  1489. Type *ParamTy = FT->getParamType(i);
  1490. if ((*AI)->getType() == ParamTy) {
  1491. Args.push_back(*AI);
  1492. } else {
  1493. Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
  1494. }
  1495. // Add any parameter attributes.
  1496. AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
  1497. if (PAttrs.hasAttributes())
  1498. attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
  1499. PAttrs));
  1500. }
  1501. // If the function takes more arguments than the call was taking, add them
  1502. // now.
  1503. for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
  1504. Args.push_back(Constant::getNullValue(FT->getParamType(i)));
  1505. // If we are removing arguments to the function, emit an obnoxious warning.
  1506. if (FT->getNumParams() < NumActualArgs) {
  1507. // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
  1508. if (FT->isVarArg()) {
  1509. // Add all of the arguments in their promoted form to the arg list.
  1510. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
  1511. Type *PTy = getPromotedType((*AI)->getType());
  1512. if (PTy != (*AI)->getType()) {
  1513. // Must promote to pass through va_arg area!
  1514. Instruction::CastOps opcode =
  1515. CastInst::getCastOpcode(*AI, false, PTy, false);
  1516. Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
  1517. } else {
  1518. Args.push_back(*AI);
  1519. }
  1520. // Add any parameter attributes.
  1521. AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
  1522. if (PAttrs.hasAttributes())
  1523. attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
  1524. PAttrs));
  1525. }
  1526. }
  1527. }
  1528. AttributeSet FnAttrs = CallerPAL.getFnAttributes();
  1529. if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
  1530. attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
  1531. if (NewRetTy->isVoidTy())
  1532. Caller->setName(""); // Void type should not have a name.
  1533. const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
  1534. attrVec);
  1535. Instruction *NC;
  1536. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  1537. NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
  1538. II->getUnwindDest(), Args);
  1539. NC->takeName(II);
  1540. cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
  1541. cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
  1542. } else {
  1543. CallInst *CI = cast<CallInst>(Caller);
  1544. NC = Builder->CreateCall(Callee, Args);
  1545. NC->takeName(CI);
  1546. if (CI->isTailCall())
  1547. cast<CallInst>(NC)->setTailCall();
  1548. cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
  1549. cast<CallInst>(NC)->setAttributes(NewCallerPAL);
  1550. }
  1551. // Insert a cast of the return type as necessary.
  1552. Value *NV = NC;
  1553. if (OldRetTy != NV->getType() && !Caller->use_empty()) {
  1554. if (!NV->getType()->isVoidTy()) {
  1555. NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
  1556. NC->setDebugLoc(Caller->getDebugLoc());
  1557. // If this is an invoke instruction, we should insert it after the first
  1558. // non-phi, instruction in the normal successor block.
  1559. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  1560. BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
  1561. InsertNewInstBefore(NC, *I);
  1562. } else {
  1563. // Otherwise, it's a call, just insert cast right after the call.
  1564. InsertNewInstBefore(NC, *Caller);
  1565. }
  1566. Worklist.AddUsersToWorkList(*Caller);
  1567. } else {
  1568. NV = UndefValue::get(Caller->getType());
  1569. }
  1570. }
  1571. if (!Caller->use_empty())
  1572. ReplaceInstUsesWith(*Caller, NV);
  1573. else if (Caller->hasValueHandle()) {
  1574. if (OldRetTy == NV->getType())
  1575. ValueHandleBase::ValueIsRAUWd(Caller, NV);
  1576. else
  1577. // We cannot call ValueIsRAUWd with a different type, and the
  1578. // actual tracked value will disappear.
  1579. ValueHandleBase::ValueIsDeleted(Caller);
  1580. }
  1581. EraseInstFromFunction(*Caller);
  1582. return true;
  1583. }
  1584. // transformCallThroughTrampoline - Turn a call to a function created by
  1585. // init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
  1586. // underlying function.
  1587. //
  1588. Instruction *
  1589. InstCombiner::transformCallThroughTrampoline(CallSite CS,
  1590. IntrinsicInst *Tramp) {
  1591. Value *Callee = CS.getCalledValue();
  1592. PointerType *PTy = cast<PointerType>(Callee->getType());
  1593. FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
  1594. const AttributeSet &Attrs = CS.getAttributes();
  1595. // If the call already has the 'nest' attribute somewhere then give up -
  1596. // otherwise 'nest' would occur twice after splicing in the chain.
  1597. if (Attrs.hasAttrSomewhere(Attribute::Nest))
  1598. return nullptr;
  1599. assert(Tramp &&
  1600. "transformCallThroughTrampoline called with incorrect CallSite.");
  1601. Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
  1602. PointerType *NestFPTy = cast<PointerType>(NestF->getType());
  1603. FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
  1604. const AttributeSet &NestAttrs = NestF->getAttributes();
  1605. if (!NestAttrs.isEmpty()) {
  1606. unsigned NestIdx = 1;
  1607. Type *NestTy = nullptr;
  1608. AttributeSet NestAttr;
  1609. // Look for a parameter marked with the 'nest' attribute.
  1610. for (FunctionType::param_iterator I = NestFTy->param_begin(),
  1611. E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
  1612. if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
  1613. // Record the parameter type and any other attributes.
  1614. NestTy = *I;
  1615. NestAttr = NestAttrs.getParamAttributes(NestIdx);
  1616. break;
  1617. }
  1618. if (NestTy) {
  1619. Instruction *Caller = CS.getInstruction();
  1620. std::vector<Value*> NewArgs;
  1621. NewArgs.reserve(CS.arg_size() + 1);
  1622. SmallVector<AttributeSet, 8> NewAttrs;
  1623. NewAttrs.reserve(Attrs.getNumSlots() + 1);
  1624. // Insert the nest argument into the call argument list, which may
  1625. // mean appending it. Likewise for attributes.
  1626. // Add any result attributes.
  1627. if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
  1628. NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
  1629. Attrs.getRetAttributes()));
  1630. {
  1631. unsigned Idx = 1;
  1632. CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
  1633. do {
  1634. if (Idx == NestIdx) {
  1635. // Add the chain argument and attributes.
  1636. Value *NestVal = Tramp->getArgOperand(2);
  1637. if (NestVal->getType() != NestTy)
  1638. NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
  1639. NewArgs.push_back(NestVal);
  1640. NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
  1641. NestAttr));
  1642. }
  1643. if (I == E)
  1644. break;
  1645. // Add the original argument and attributes.
  1646. NewArgs.push_back(*I);
  1647. AttributeSet Attr = Attrs.getParamAttributes(Idx);
  1648. if (Attr.hasAttributes(Idx)) {
  1649. AttrBuilder B(Attr, Idx);
  1650. NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
  1651. Idx + (Idx >= NestIdx), B));
  1652. }
  1653. ++Idx, ++I;
  1654. } while (1);
  1655. }
  1656. // Add any function attributes.
  1657. if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
  1658. NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
  1659. Attrs.getFnAttributes()));
  1660. // The trampoline may have been bitcast to a bogus type (FTy).
  1661. // Handle this by synthesizing a new function type, equal to FTy
  1662. // with the chain parameter inserted.
  1663. std::vector<Type*> NewTypes;
  1664. NewTypes.reserve(FTy->getNumParams()+1);
  1665. // Insert the chain's type into the list of parameter types, which may
  1666. // mean appending it.
  1667. {
  1668. unsigned Idx = 1;
  1669. FunctionType::param_iterator I = FTy->param_begin(),
  1670. E = FTy->param_end();
  1671. do {
  1672. if (Idx == NestIdx)
  1673. // Add the chain's type.
  1674. NewTypes.push_back(NestTy);
  1675. if (I == E)
  1676. break;
  1677. // Add the original type.
  1678. NewTypes.push_back(*I);
  1679. ++Idx, ++I;
  1680. } while (1);
  1681. }
  1682. // Replace the trampoline call with a direct call. Let the generic
  1683. // code sort out any function type mismatches.
  1684. FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
  1685. FTy->isVarArg());
  1686. Constant *NewCallee =
  1687. NestF->getType() == PointerType::getUnqual(NewFTy) ?
  1688. NestF : ConstantExpr::getBitCast(NestF,
  1689. PointerType::getUnqual(NewFTy));
  1690. const AttributeSet &NewPAL =
  1691. AttributeSet::get(FTy->getContext(), NewAttrs);
  1692. Instruction *NewCaller;
  1693. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  1694. NewCaller = InvokeInst::Create(NewCallee,
  1695. II->getNormalDest(), II->getUnwindDest(),
  1696. NewArgs);
  1697. cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
  1698. cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
  1699. } else {
  1700. NewCaller = CallInst::Create(NewCallee, NewArgs);
  1701. if (cast<CallInst>(Caller)->isTailCall())
  1702. cast<CallInst>(NewCaller)->setTailCall();
  1703. cast<CallInst>(NewCaller)->
  1704. setCallingConv(cast<CallInst>(Caller)->getCallingConv());
  1705. cast<CallInst>(NewCaller)->setAttributes(NewPAL);
  1706. }
  1707. return NewCaller;
  1708. }
  1709. }
  1710. // Replace the trampoline call with a direct call. Since there is no 'nest'
  1711. // parameter, there is no need to adjust the argument list. Let the generic
  1712. // code sort out any function type mismatches.
  1713. Constant *NewCallee =
  1714. NestF->getType() == PTy ? NestF :
  1715. ConstantExpr::getBitCast(NestF, PTy);
  1716. CS.setCalledFunction(NewCallee);
  1717. return CS.getInstruction();
  1718. }