AutoUpgrade.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the auto-upgrade helper functions.
  11. // This is where deprecated IR intrinsics and other IR features are updated to
  12. // current specifications.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/IR/AutoUpgrade.h"
  16. #include "llvm/IR/CFG.h"
  17. #include "llvm/IR/CallSite.h"
  18. #include "llvm/IR/Constants.h"
  19. #include "llvm/IR/DIBuilder.h"
  20. #include "llvm/IR/DebugInfo.h"
  21. #include "llvm/IR/DiagnosticInfo.h"
  22. #include "llvm/IR/Function.h"
  23. #include "llvm/IR/IRBuilder.h"
  24. #include "llvm/IR/Instruction.h"
  25. #include "llvm/IR/IntrinsicInst.h"
  26. #include "llvm/IR/LLVMContext.h"
  27. #include "llvm/IR/Module.h"
  28. #include "llvm/Support/ErrorHandling.h"
  29. #include <cstring>
  30. using namespace llvm;
  31. #if 0 // HLSL Change - remove platform intrinsics
  32. // Upgrade the declarations of the SSE4.1 functions whose arguments have
  33. // changed their type from v4f32 to v2i64.
  34. static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
  35. Function *&NewFn) {
  36. // Check whether this is an old version of the function, which received
  37. // v4f32 arguments.
  38. Type *Arg0Type = F->getFunctionType()->getParamType(0);
  39. if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
  40. return false;
  41. // Yes, it's old, replace it with new version.
  42. F->setName(F->getName() + ".old");
  43. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  44. return true;
  45. }
  46. // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
  47. // arguments have changed their type from i32 to i8.
  48. static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
  49. Function *&NewFn) {
  50. // Check that the last argument is an i32.
  51. Type *LastArgType = F->getFunctionType()->getParamType(
  52. F->getFunctionType()->getNumParams() - 1);
  53. if (!LastArgType->isIntegerTy(32))
  54. return false;
  55. // Move this function aside and map down.
  56. F->setName(F->getName() + ".old");
  57. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  58. return true;
  59. }
  60. #endif // HLSL Change - remove platform intrinsics
  61. static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
  62. assert(F && "Illegal to upgrade a non-existent Function.");
  63. // Quickly eliminate it, if it's not a candidate.
  64. StringRef Name = F->getName();
  65. if (Name.size() <= 8 || !Name.startswith("llvm."))
  66. return false;
  67. Name = Name.substr(5); // Strip off "llvm."
  68. switch (Name[0]) {
  69. default: break;
  70. case 'a': {
  71. if (Name.startswith("arm.neon.vclz")) {
  72. Type* args[2] = {
  73. F->arg_begin()->getType(),
  74. Type::getInt1Ty(F->getContext())
  75. };
  76. // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
  77. // the end of the name. Change name from llvm.arm.neon.vclz.* to
  78. // llvm.ctlz.*
  79. FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
  80. NewFn = Function::Create(fType, F->getLinkage(),
  81. "llvm.ctlz." + Name.substr(14), F->getParent());
  82. return true;
  83. }
  84. if (Name.startswith("arm.neon.vcnt")) {
  85. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
  86. F->arg_begin()->getType());
  87. return true;
  88. }
  89. break;
  90. }
  91. case 'c': {
  92. if (Name.startswith("ctlz.") && F->arg_size() == 1) {
  93. F->setName(Name + ".old");
  94. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
  95. F->arg_begin()->getType());
  96. return true;
  97. }
  98. if (Name.startswith("cttz.") && F->arg_size() == 1) {
  99. F->setName(Name + ".old");
  100. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
  101. F->arg_begin()->getType());
  102. return true;
  103. }
  104. break;
  105. }
  106. case 'o':
  107. // We only need to change the name to match the mangling including the
  108. // address space.
  109. if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
  110. Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
  111. if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
  112. F->setName(Name + ".old");
  113. NewFn = Intrinsic::getDeclaration(F->getParent(),
  114. Intrinsic::objectsize, Tys);
  115. return true;
  116. }
  117. }
  118. break;
  119. #if 0 // HLSL Change - remove platform intrinsics
  120. case 'x': {
  121. if (Name.startswith("x86.sse2.pcmpeq.") ||
  122. Name.startswith("x86.sse2.pcmpgt.") ||
  123. Name.startswith("x86.avx2.pcmpeq.") ||
  124. Name.startswith("x86.avx2.pcmpgt.") ||
  125. Name.startswith("x86.avx.vpermil.") ||
  126. Name == "x86.avx.vinsertf128.pd.256" ||
  127. Name == "x86.avx.vinsertf128.ps.256" ||
  128. Name == "x86.avx.vinsertf128.si.256" ||
  129. Name == "x86.avx2.vinserti128" ||
  130. Name == "x86.avx.vextractf128.pd.256" ||
  131. Name == "x86.avx.vextractf128.ps.256" ||
  132. Name == "x86.avx.vextractf128.si.256" ||
  133. Name == "x86.avx2.vextracti128" ||
  134. Name == "x86.avx.movnt.dq.256" ||
  135. Name == "x86.avx.movnt.pd.256" ||
  136. Name == "x86.avx.movnt.ps.256" ||
  137. Name == "x86.sse42.crc32.64.8" ||
  138. Name == "x86.avx.vbroadcast.ss" ||
  139. Name == "x86.avx.vbroadcast.ss.256" ||
  140. Name == "x86.avx.vbroadcast.sd.256" ||
  141. Name == "x86.sse2.psll.dq" ||
  142. Name == "x86.sse2.psrl.dq" ||
  143. Name == "x86.avx2.psll.dq" ||
  144. Name == "x86.avx2.psrl.dq" ||
  145. Name == "x86.sse2.psll.dq.bs" ||
  146. Name == "x86.sse2.psrl.dq.bs" ||
  147. Name == "x86.avx2.psll.dq.bs" ||
  148. Name == "x86.avx2.psrl.dq.bs" ||
  149. Name == "x86.sse41.pblendw" ||
  150. Name == "x86.sse41.blendpd" ||
  151. Name == "x86.sse41.blendps" ||
  152. Name == "x86.avx.blend.pd.256" ||
  153. Name == "x86.avx.blend.ps.256" ||
  154. Name == "x86.avx2.pblendw" ||
  155. Name == "x86.avx2.pblendd.128" ||
  156. Name == "x86.avx2.pblendd.256" ||
  157. Name == "x86.avx2.vbroadcasti128" ||
  158. (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
  159. NewFn = nullptr;
  160. return true;
  161. }
  162. // SSE4.1 ptest functions may have an old signature.
  163. if (Name.startswith("x86.sse41.ptest")) {
  164. if (Name == "x86.sse41.ptestc")
  165. return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
  166. if (Name == "x86.sse41.ptestz")
  167. return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
  168. if (Name == "x86.sse41.ptestnzc")
  169. return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
  170. }
  171. // Several blend and other instructions with masks used the wrong number of
  172. // bits.
  173. if (Name == "x86.sse41.insertps")
  174. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
  175. NewFn);
  176. if (Name == "x86.sse41.dppd")
  177. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
  178. NewFn);
  179. if (Name == "x86.sse41.dpps")
  180. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
  181. NewFn);
  182. if (Name == "x86.sse41.mpsadbw")
  183. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
  184. NewFn);
  185. if (Name == "x86.avx.dp.ps.256")
  186. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
  187. NewFn);
  188. if (Name == "x86.avx2.mpsadbw")
  189. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
  190. NewFn);
  191. // frcz.ss/sd may need to have an argument dropped
  192. if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
  193. F->setName(Name + ".old");
  194. NewFn = Intrinsic::getDeclaration(F->getParent(),
  195. Intrinsic::x86_xop_vfrcz_ss);
  196. return true;
  197. }
  198. if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
  199. F->setName(Name + ".old");
  200. NewFn = Intrinsic::getDeclaration(F->getParent(),
  201. Intrinsic::x86_xop_vfrcz_sd);
  202. return true;
  203. }
  204. // Fix the FMA4 intrinsics to remove the 4
  205. if (Name.startswith("x86.fma4.")) {
  206. F->setName("llvm.x86.fma" + Name.substr(8));
  207. NewFn = F;
  208. return true;
  209. }
  210. break;
  211. }
  212. #endif // HLSL Change - remove platform intrinsics
  213. }
  214. // This may not belong here. This function is effectively being overloaded
  215. // to both detect an intrinsic which needs upgrading, and to provide the
  216. // upgraded form of the intrinsic. We should perhaps have two separate
  217. // functions for this.
  218. return false;
  219. }
  220. bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
  221. NewFn = nullptr;
  222. bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
  223. assert(F != NewFn && "Intrinsic function upgraded to the same function");
  224. // Upgrade intrinsic attributes. This does not change the function.
  225. if (NewFn)
  226. F = NewFn;
  227. if (Intrinsic::ID id = F->getIntrinsicID())
  228. F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
  229. return Upgraded;
  230. }
  231. bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
  232. // Nothing to do yet.
  233. return false;
  234. }
  235. #if 0 // HLSL Change - remove platform intrinsics
  236. // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
  237. // to byte shuffles.
  238. static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
  239. Value *Op, unsigned NumLanes,
  240. unsigned Shift) {
  241. // Each lane is 16 bytes.
  242. unsigned NumElts = NumLanes * 16;
  243. // Bitcast from a 64-bit element type to a byte element type.
  244. Op = Builder.CreateBitCast(Op,
  245. VectorType::get(Type::getInt8Ty(C), NumElts),
  246. "cast");
  247. // We'll be shuffling in zeroes.
  248. Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
  249. // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  250. // we'll just return the zero vector.
  251. if (Shift < 16) {
  252. SmallVector<Constant*, 32> Idxs;
  253. // 256-bit version is split into two 16-byte lanes.
  254. for (unsigned l = 0; l != NumElts; l += 16)
  255. for (unsigned i = 0; i != 16; ++i) {
  256. unsigned Idx = NumElts + i - Shift;
  257. if (Idx < NumElts)
  258. Idx -= NumElts - 16; // end of lane, switch operand.
  259. Idxs.push_back(Builder.getInt32(Idx + l));
  260. }
  261. Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
  262. }
  263. // Bitcast back to a 64-bit element type.
  264. return Builder.CreateBitCast(Res,
  265. VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
  266. "cast");
  267. }
  268. // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
  269. // to byte shuffles.
  270. static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
  271. Value *Op, unsigned NumLanes,
  272. unsigned Shift) {
  273. // Each lane is 16 bytes.
  274. unsigned NumElts = NumLanes * 16;
  275. // Bitcast from a 64-bit element type to a byte element type.
  276. Op = Builder.CreateBitCast(Op,
  277. VectorType::get(Type::getInt8Ty(C), NumElts),
  278. "cast");
  279. // We'll be shuffling in zeroes.
  280. Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
  281. // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  282. // we'll just return the zero vector.
  283. if (Shift < 16) {
  284. SmallVector<Constant*, 32> Idxs;
  285. // 256-bit version is split into two 16-byte lanes.
  286. for (unsigned l = 0; l != NumElts; l += 16)
  287. for (unsigned i = 0; i != 16; ++i) {
  288. unsigned Idx = i + Shift;
  289. if (Idx >= 16)
  290. Idx += NumElts - 16; // end of lane, switch operand.
  291. Idxs.push_back(Builder.getInt32(Idx + l));
  292. }
  293. Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
  294. }
  295. // Bitcast back to a 64-bit element type.
  296. return Builder.CreateBitCast(Res,
  297. VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
  298. "cast");
  299. }
  300. #endif // HLSL Change - remove platform intrinsics
  301. // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
  302. // upgraded intrinsic. All argument and return casting must be provided in
  303. // order to seamlessly integrate with existing context.
  304. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
  305. Function *F = CI->getCalledFunction();
  306. LLVMContext &C = CI->getContext();
  307. IRBuilder<> Builder(C);
  308. Builder.SetInsertPoint(CI->getParent(), CI);
  309. assert(F && "Intrinsic call is not direct?");
  310. if (!NewFn) {
  311. (void)F; // HLSL Change - unused local variable
  312. #if 0 // HLSL Change - remove platform intrinsics
  313. // Get the Function's name.
  314. StringRef Name = F->getName();
  315. Value *Rep;
  316. // Upgrade packed integer vector compares intrinsics to compare instructions
  317. if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
  318. Name.startswith("llvm.x86.avx2.pcmpeq.")) {
  319. Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
  320. "pcmpeq");
  321. // need to sign extend since icmp returns vector of i1
  322. Rep = Builder.CreateSExt(Rep, CI->getType(), "");
  323. } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
  324. Name.startswith("llvm.x86.avx2.pcmpgt.")) {
  325. Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
  326. "pcmpgt");
  327. // need to sign extend since icmp returns vector of i1
  328. Rep = Builder.CreateSExt(Rep, CI->getType(), "");
  329. } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
  330. Name == "llvm.x86.avx.movnt.ps.256" ||
  331. Name == "llvm.x86.avx.movnt.pd.256") {
  332. IRBuilder<> Builder(C);
  333. Builder.SetInsertPoint(CI->getParent(), CI);
  334. Module *M = F->getParent();
  335. SmallVector<Metadata *, 1> Elts;
  336. Elts.push_back(
  337. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  338. MDNode *Node = MDNode::get(C, Elts);
  339. Value *Arg0 = CI->getArgOperand(0);
  340. Value *Arg1 = CI->getArgOperand(1);
  341. // Convert the type of the pointer to a pointer to the stored type.
  342. Value *BC = Builder.CreateBitCast(Arg0,
  343. PointerType::getUnqual(Arg1->getType()),
  344. "cast");
  345. StoreInst *SI = Builder.CreateStore(Arg1, BC);
  346. SI->setMetadata(M->getMDKindID("nontemporal"), Node);
  347. SI->setAlignment(16);
  348. // Remove intrinsic.
  349. CI->eraseFromParent();
  350. return;
  351. } else if (Name.startswith("llvm.x86.xop.vpcom")) {
  352. Intrinsic::ID intID;
  353. if (Name.endswith("ub"))
  354. intID = Intrinsic::x86_xop_vpcomub;
  355. else if (Name.endswith("uw"))
  356. intID = Intrinsic::x86_xop_vpcomuw;
  357. else if (Name.endswith("ud"))
  358. intID = Intrinsic::x86_xop_vpcomud;
  359. else if (Name.endswith("uq"))
  360. intID = Intrinsic::x86_xop_vpcomuq;
  361. else if (Name.endswith("b"))
  362. intID = Intrinsic::x86_xop_vpcomb;
  363. else if (Name.endswith("w"))
  364. intID = Intrinsic::x86_xop_vpcomw;
  365. else if (Name.endswith("d"))
  366. intID = Intrinsic::x86_xop_vpcomd;
  367. else if (Name.endswith("q"))
  368. intID = Intrinsic::x86_xop_vpcomq;
  369. else
  370. llvm_unreachable("Unknown suffix");
  371. Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
  372. unsigned Imm;
  373. if (Name.startswith("lt"))
  374. Imm = 0;
  375. else if (Name.startswith("le"))
  376. Imm = 1;
  377. else if (Name.startswith("gt"))
  378. Imm = 2;
  379. else if (Name.startswith("ge"))
  380. Imm = 3;
  381. else if (Name.startswith("eq"))
  382. Imm = 4;
  383. else if (Name.startswith("ne"))
  384. Imm = 5;
  385. else if (Name.startswith("false"))
  386. Imm = 6;
  387. else if (Name.startswith("true"))
  388. Imm = 7;
  389. else
  390. llvm_unreachable("Unknown condition");
  391. Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
  392. Rep =
  393. Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
  394. Builder.getInt8(Imm)});
  395. } else if (Name == "llvm.x86.sse42.crc32.64.8") {
  396. Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
  397. Intrinsic::x86_sse42_crc32_32_8);
  398. Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
  399. Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
  400. Rep = Builder.CreateZExt(Rep, CI->getType(), "");
  401. } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
  402. // Replace broadcasts with a series of insertelements.
  403. Type *VecTy = CI->getType();
  404. Type *EltTy = VecTy->getVectorElementType();
  405. unsigned EltNum = VecTy->getVectorNumElements();
  406. Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
  407. EltTy->getPointerTo());
  408. Value *Load = Builder.CreateLoad(EltTy, Cast);
  409. Type *I32Ty = Type::getInt32Ty(C);
  410. Rep = UndefValue::get(VecTy);
  411. for (unsigned I = 0; I < EltNum; ++I)
  412. Rep = Builder.CreateInsertElement(Rep, Load,
  413. ConstantInt::get(I32Ty, I));
  414. } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
  415. // Replace vbroadcasts with a vector shuffle.
  416. Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
  417. Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
  418. PointerType::getUnqual(VT));
  419. Value *Load = Builder.CreateLoad(VT, Op);
  420. const int Idxs[4] = { 0, 1, 0, 1 };
  421. Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
  422. Idxs);
  423. } else if (Name == "llvm.x86.sse2.psll.dq") {
  424. // 128-bit shift left specified in bits.
  425. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  426. Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
  427. Shift / 8); // Shift is in bits.
  428. } else if (Name == "llvm.x86.sse2.psrl.dq") {
  429. // 128-bit shift right specified in bits.
  430. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  431. Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
  432. Shift / 8); // Shift is in bits.
  433. } else if (Name == "llvm.x86.avx2.psll.dq") {
  434. // 256-bit shift left specified in bits.
  435. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  436. Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
  437. Shift / 8); // Shift is in bits.
  438. } else if (Name == "llvm.x86.avx2.psrl.dq") {
  439. // 256-bit shift right specified in bits.
  440. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  441. Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
  442. Shift / 8); // Shift is in bits.
  443. } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
  444. // 128-bit shift left specified in bytes.
  445. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  446. Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
  447. Shift);
  448. } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
  449. // 128-bit shift right specified in bytes.
  450. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  451. Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
  452. Shift);
  453. } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
  454. // 256-bit shift left specified in bytes.
  455. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  456. Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
  457. Shift);
  458. } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
  459. // 256-bit shift right specified in bytes.
  460. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  461. Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
  462. Shift);
  463. } else if (Name == "llvm.x86.sse41.pblendw" ||
  464. Name == "llvm.x86.sse41.blendpd" ||
  465. Name == "llvm.x86.sse41.blendps" ||
  466. Name == "llvm.x86.avx.blend.pd.256" ||
  467. Name == "llvm.x86.avx.blend.ps.256" ||
  468. Name == "llvm.x86.avx2.pblendw" ||
  469. Name == "llvm.x86.avx2.pblendd.128" ||
  470. Name == "llvm.x86.avx2.pblendd.256") {
  471. Value *Op0 = CI->getArgOperand(0);
  472. Value *Op1 = CI->getArgOperand(1);
  473. unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  474. VectorType *VecTy = cast<VectorType>(CI->getType());
  475. unsigned NumElts = VecTy->getNumElements();
  476. SmallVector<Constant*, 16> Idxs;
  477. for (unsigned i = 0; i != NumElts; ++i) {
  478. unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
  479. Idxs.push_back(Builder.getInt32(Idx));
  480. }
  481. Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
  482. } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
  483. Name == "llvm.x86.avx.vinsertf128.ps.256" ||
  484. Name == "llvm.x86.avx.vinsertf128.si.256" ||
  485. Name == "llvm.x86.avx2.vinserti128") {
  486. Value *Op0 = CI->getArgOperand(0);
  487. Value *Op1 = CI->getArgOperand(1);
  488. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  489. VectorType *VecTy = cast<VectorType>(CI->getType());
  490. unsigned NumElts = VecTy->getNumElements();
  491. // Mask off the high bits of the immediate value; hardware ignores those.
  492. Imm = Imm & 1;
  493. // Extend the second operand into a vector that is twice as big.
  494. Value *UndefV = UndefValue::get(Op1->getType());
  495. SmallVector<Constant*, 8> Idxs;
  496. for (unsigned i = 0; i != NumElts; ++i) {
  497. Idxs.push_back(Builder.getInt32(i));
  498. }
  499. Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
  500. // Insert the second operand into the first operand.
  501. // Note that there is no guarantee that instruction lowering will actually
  502. // produce a vinsertf128 instruction for the created shuffles. In
  503. // particular, the 0 immediate case involves no lane changes, so it can
  504. // be handled as a blend.
  505. // Example of shuffle mask for 32-bit elements:
  506. // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  507. // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
  508. SmallVector<Constant*, 8> Idxs2;
  509. // The low half of the result is either the low half of the 1st operand
  510. // or the low half of the 2nd operand (the inserted vector).
  511. for (unsigned i = 0; i != NumElts / 2; ++i) {
  512. unsigned Idx = Imm ? i : (i + NumElts);
  513. Idxs2.push_back(Builder.getInt32(Idx));
  514. }
  515. // The high half of the result is either the low half of the 2nd operand
  516. // (the inserted vector) or the high half of the 1st operand.
  517. for (unsigned i = NumElts / 2; i != NumElts; ++i) {
  518. unsigned Idx = Imm ? (i + NumElts / 2) : i;
  519. Idxs2.push_back(Builder.getInt32(Idx));
  520. }
  521. Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
  522. } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
  523. Name == "llvm.x86.avx.vextractf128.ps.256" ||
  524. Name == "llvm.x86.avx.vextractf128.si.256" ||
  525. Name == "llvm.x86.avx2.vextracti128") {
  526. Value *Op0 = CI->getArgOperand(0);
  527. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  528. VectorType *VecTy = cast<VectorType>(CI->getType());
  529. unsigned NumElts = VecTy->getNumElements();
  530. // Mask off the high bits of the immediate value; hardware ignores those.
  531. Imm = Imm & 1;
  532. // Get indexes for either the high half or low half of the input vector.
  533. SmallVector<Constant*, 4> Idxs(NumElts);
  534. for (unsigned i = 0; i != NumElts; ++i) {
  535. unsigned Idx = Imm ? (i + NumElts) : i;
  536. Idxs[i] = Builder.getInt32(Idx);
  537. }
  538. Value *UndefV = UndefValue::get(Op0->getType());
  539. Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
  540. } else {
  541. bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
  542. if (Name == "llvm.x86.avx.vpermil.pd.256")
  543. PD256 = true;
  544. else if (Name == "llvm.x86.avx.vpermil.pd")
  545. PD128 = true;
  546. else if (Name == "llvm.x86.avx.vpermil.ps.256")
  547. PS256 = true;
  548. else if (Name == "llvm.x86.avx.vpermil.ps")
  549. PS128 = true;
  550. if (PD256 || PD128 || PS256 || PS128) {
  551. Value *Op0 = CI->getArgOperand(0);
  552. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  553. SmallVector<Constant*, 8> Idxs;
  554. if (PD128)
  555. for (unsigned i = 0; i != 2; ++i)
  556. Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
  557. else if (PD256)
  558. for (unsigned l = 0; l != 4; l+=2)
  559. for (unsigned i = 0; i != 2; ++i)
  560. Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
  561. else if (PS128)
  562. for (unsigned i = 0; i != 4; ++i)
  563. Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
  564. else if (PS256)
  565. for (unsigned l = 0; l != 8; l+=4)
  566. for (unsigned i = 0; i != 4; ++i)
  567. Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
  568. else
  569. llvm_unreachable("Unexpected function");
  570. Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
  571. } else {
  572. llvm_unreachable("Unknown function for CallInst upgrade.");
  573. }
  574. }
  575. CI->replaceAllUsesWith(Rep);
  576. CI->eraseFromParent();
  577. #endif // HLSL Change - remove platform intrinsics
  578. llvm_unreachable("HLSL - should not be upgrading platform intrinsics."); // HLSL Change - remove platform intrinsics
  579. return;
  580. }
  581. std::string Name = CI->getName();
  582. if (!Name.empty())
  583. CI->setName(Name + ".old");
  584. switch (NewFn->getIntrinsicID()) {
  585. default:
  586. llvm_unreachable("Unknown function for CallInst upgrade.");
  587. case Intrinsic::ctlz:
  588. case Intrinsic::cttz:
  589. assert(CI->getNumArgOperands() == 1 &&
  590. "Mismatch between function args and call args");
  591. CI->replaceAllUsesWith(Builder.CreateCall(
  592. NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
  593. CI->eraseFromParent();
  594. return;
  595. case Intrinsic::objectsize:
  596. CI->replaceAllUsesWith(Builder.CreateCall(
  597. NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
  598. CI->eraseFromParent();
  599. return;
  600. case Intrinsic::ctpop: {
  601. CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
  602. CI->eraseFromParent();
  603. return;
  604. }
  605. #if 0 // HLSL Change - remove platform intrinsics
  606. case Intrinsic::x86_xop_vfrcz_ss:
  607. case Intrinsic::x86_xop_vfrcz_sd:
  608. CI->replaceAllUsesWith(
  609. Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
  610. CI->eraseFromParent();
  611. return;
  612. case Intrinsic::x86_sse41_ptestc:
  613. case Intrinsic::x86_sse41_ptestz:
  614. case Intrinsic::x86_sse41_ptestnzc: {
  615. // The arguments for these intrinsics used to be v4f32, and changed
  616. // to v2i64. This is purely a nop, since those are bitwise intrinsics.
  617. // So, the only thing required is a bitcast for both arguments.
  618. // First, check the arguments have the old type.
  619. Value *Arg0 = CI->getArgOperand(0);
  620. if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
  621. return;
  622. // Old intrinsic, add bitcasts
  623. Value *Arg1 = CI->getArgOperand(1);
  624. Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
  625. Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
  626. Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
  627. CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
  628. CI->replaceAllUsesWith(NewCall);
  629. CI->eraseFromParent();
  630. return;
  631. }
  632. case Intrinsic::x86_sse41_insertps:
  633. case Intrinsic::x86_sse41_dppd:
  634. case Intrinsic::x86_sse41_dpps:
  635. case Intrinsic::x86_sse41_mpsadbw:
  636. case Intrinsic::x86_avx_dp_ps_256:
  637. case Intrinsic::x86_avx2_mpsadbw: {
  638. // Need to truncate the last argument from i32 to i8 -- this argument models
  639. // an inherently 8-bit immediate operand to these x86 instructions.
  640. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  641. CI->arg_operands().end());
  642. // Replace the last argument with a trunc.
  643. Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
  644. CallInst *NewCall = Builder.CreateCall(NewFn, Args);
  645. CI->replaceAllUsesWith(NewCall);
  646. CI->eraseFromParent();
  647. return;
  648. }
  649. #endif // HLSL Change - remove platform intrinsics
  650. }
  651. }
  652. // This tests each Function to determine if it needs upgrading. When we find
  653. // one we are interested in, we then upgrade all calls to reflect the new
  654. // function.
  655. void llvm::UpgradeCallsToIntrinsic(Function* F) {
  656. assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
  657. // Upgrade the function and check if it is a totaly new function.
  658. Function *NewFn;
  659. if (UpgradeIntrinsicFunction(F, NewFn)) {
  660. // Replace all uses to the old function with the new one if necessary.
  661. for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
  662. UI != UE;) {
  663. if (CallInst *CI = dyn_cast<CallInst>(*UI++))
  664. UpgradeIntrinsicCall(CI, NewFn);
  665. }
  666. // Remove old function, no longer used, from the module.
  667. F->eraseFromParent();
  668. }
  669. }
  670. void llvm::UpgradeInstWithTBAATag(Instruction *I) {
  671. MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
  672. assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
  673. // Check if the tag uses struct-path aware TBAA format.
  674. if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
  675. return;
  676. if (MD->getNumOperands() == 3) {
  677. Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
  678. MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
  679. // Create a MDNode <ScalarType, ScalarType, offset 0, const>
  680. Metadata *Elts2[] = {ScalarType, ScalarType,
  681. ConstantAsMetadata::get(Constant::getNullValue(
  682. Type::getInt64Ty(I->getContext()))),
  683. MD->getOperand(2)};
  684. I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
  685. } else {
  686. // Create a MDNode <MD, MD, offset 0>
  687. Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
  688. Type::getInt64Ty(I->getContext())))};
  689. I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
  690. }
  691. }
  692. Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
  693. Instruction *&Temp) {
  694. if (Opc != Instruction::BitCast)
  695. return nullptr;
  696. Temp = nullptr;
  697. Type *SrcTy = V->getType();
  698. if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
  699. SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
  700. LLVMContext &Context = V->getContext();
  701. // We have no information about target data layout, so we assume that
  702. // the maximum pointer size is 64bit.
  703. Type *MidTy = Type::getInt64Ty(Context);
  704. Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
  705. return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
  706. }
  707. return nullptr;
  708. }
  709. Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
  710. if (Opc != Instruction::BitCast)
  711. return nullptr;
  712. Type *SrcTy = C->getType();
  713. if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
  714. SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
  715. LLVMContext &Context = C->getContext();
  716. // We have no information about target data layout, so we assume that
  717. // the maximum pointer size is 64bit.
  718. Type *MidTy = Type::getInt64Ty(Context);
  719. return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
  720. DestTy);
  721. }
  722. return nullptr;
  723. }
  724. /// Check the debug info version number, if it is out-dated, drop the debug
  725. /// info. Return true if module is modified.
  726. bool llvm::UpgradeDebugInfo(Module &M) {
  727. unsigned Version = getDebugMetadataVersionFromModule(M);
  728. if (Version == DEBUG_METADATA_VERSION)
  729. return false;
  730. bool RetCode = StripDebugInfo(M);
  731. if (RetCode) {
  732. DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
  733. M.getContext().diagnose(DiagVersion);
  734. }
  735. return RetCode;
  736. }
  737. void llvm::UpgradeMDStringConstant(std::string &String) {
  738. const std::string OldPrefix = "llvm.vectorizer.";
  739. if (String == "llvm.vectorizer.unroll") {
  740. String = "llvm.loop.interleave.count";
  741. } else if (String.find(OldPrefix) == 0) {
  742. String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");
  743. }
  744. }