NeonEmitter.cpp 72 KB


  1. //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This tablegen backend is responsible for emitting arm_neon.h, which includes
  11. // a declaration and definition of each function specified by the ARM NEON
  12. // compiler interface. See ARM document DUI0348B.
  13. //
  14. // Each NEON instruction is implemented in terms of 1 or more functions which
  15. // are suffixed with the element type of the input vectors. Functions may be
  16. // implemented in terms of generic vector operations such as +, *, -, etc. or
  17. // by calling a __builtin_-prefixed function which will be handled by clang's
  18. // CodeGen library.
  19. //
  20. // Additional validation code can be generated by this file when runHeader() is
  21. // called, rather than the normal run() entry point.
  22. //
  23. // See also the documentation in include/clang/Basic/arm_neon.td.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. #include "llvm/ADT/DenseMap.h"
  27. #include "llvm/ADT/STLExtras.h"
  28. #include "llvm/ADT/SmallString.h"
  29. #include "llvm/ADT/SmallVector.h"
  30. #include "llvm/ADT/StringExtras.h"
  31. #include "llvm/ADT/StringMap.h"
  32. #include "llvm/Support/ErrorHandling.h"
  33. #include "llvm/TableGen/Error.h"
  34. #include "llvm/TableGen/Record.h"
  35. #include "llvm/TableGen/SetTheory.h"
  36. #include "llvm/TableGen/TableGenBackend.h"
  37. #include <algorithm>
  38. #include <map>
  39. #include <sstream>
  40. #include <string>
  41. #include <vector>
  42. using namespace llvm;
  43. namespace {
  44. // While globals are generally bad, this one allows us to perform assertions
  45. // liberally and somehow still trace them back to the def they indirectly
  46. // came from.
  47. static Record *CurrentRecord = nullptr;
  48. static void assert_with_loc(bool Assertion, const std::string &Str) {
  49. if (!Assertion) {
  50. if (CurrentRecord)
  51. PrintFatalError(CurrentRecord->getLoc(), Str);
  52. else
  53. PrintFatalError(Str);
  54. }
  55. }
  56. enum ClassKind {
  57. ClassNone,
  58. ClassI, // generic integer instruction, e.g., "i8" suffix
  59. ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  60. ClassW, // width-specific instruction, e.g., "8" suffix
  61. ClassB, // bitcast arguments with enum argument to specify type
  62. ClassL, // Logical instructions which are op instructions
  63. // but we need to not emit any suffix for in our
  64. // tests.
  65. ClassNoTest // Instructions which we do not test since they are
  66. // not TRUE instructions.
  67. };
  68. /// NeonTypeFlags - Flags to identify the types for overloaded Neon
  69. /// builtins. These must be kept in sync with the flags in
  70. /// include/clang/Basic/TargetBuiltins.h.
  71. namespace NeonTypeFlags {
  72. enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
  73. enum EltType {
  74. Int8,
  75. Int16,
  76. Int32,
  77. Int64,
  78. Poly8,
  79. Poly16,
  80. Poly64,
  81. Poly128,
  82. Float16,
  83. Float32,
  84. Float64
  85. };
  86. }
  87. class Intrinsic;
  88. class NeonEmitter;
  89. class Type;
  90. class Variable;
  91. //===----------------------------------------------------------------------===//
  92. // TypeSpec
  93. //===----------------------------------------------------------------------===//
  94. /// A TypeSpec is just a simple wrapper around a string, but gets its own type
  95. /// for strong typing purposes.
  96. ///
  97. /// A TypeSpec can be used to create a type.
  98. class TypeSpec : public std::string {
  99. public:
  100. static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
  101. std::vector<TypeSpec> Ret;
  102. TypeSpec Acc;
  103. for (char I : Str.str()) {
  104. if (islower(I)) {
  105. Acc.push_back(I);
  106. Ret.push_back(TypeSpec(Acc));
  107. Acc.clear();
  108. } else {
  109. Acc.push_back(I);
  110. }
  111. }
  112. return Ret;
  113. }
  114. };
  115. //===----------------------------------------------------------------------===//
  116. // Type
  117. //===----------------------------------------------------------------------===//
  118. /// A Type. Not much more to say here.
  119. class Type {
  120. private:
  121. TypeSpec TS;
  122. bool Float, Signed, Immediate, Void, Poly, Constant, Pointer;
  123. // ScalarForMangling and NoManglingQ are really not suited to live here as
  124. // they are not related to the type. But they live in the TypeSpec (not the
  125. // prototype), so this is really the only place to store them.
  126. bool ScalarForMangling, NoManglingQ;
  127. unsigned Bitwidth, ElementBitwidth, NumVectors;
  128. public:
  129. Type()
  130. : Float(false), Signed(false), Immediate(false), Void(true), Poly(false),
  131. Constant(false), Pointer(false), ScalarForMangling(false),
  132. NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
  133. Type(TypeSpec TS, char CharMod)
  134. : TS(TS), Float(false), Signed(false), Immediate(false), Void(false),
  135. Poly(false), Constant(false), Pointer(false), ScalarForMangling(false),
  136. NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
  137. applyModifier(CharMod);
  138. }
  139. /// Returns a type representing "void".
  140. static Type getVoid() { return Type(); }
  141. bool operator==(const Type &Other) const { return str() == Other.str(); }
  142. bool operator!=(const Type &Other) const { return !operator==(Other); }
  143. //
  144. // Query functions
  145. //
  146. bool isScalarForMangling() const { return ScalarForMangling; }
  147. bool noManglingQ() const { return NoManglingQ; }
  148. bool isPointer() const { return Pointer; }
  149. bool isFloating() const { return Float; }
  150. bool isInteger() const { return !Float && !Poly; }
  151. bool isSigned() const { return Signed; }
  152. bool isImmediate() const { return Immediate; }
  153. bool isScalar() const { return NumVectors == 0; }
  154. bool isVector() const { return NumVectors > 0; }
  155. bool isFloat() const { return Float && ElementBitwidth == 32; }
  156. bool isDouble() const { return Float && ElementBitwidth == 64; }
  157. bool isHalf() const { return Float && ElementBitwidth == 16; }
  158. bool isPoly() const { return Poly; }
  159. bool isChar() const { return ElementBitwidth == 8; }
  160. bool isShort() const { return !Float && ElementBitwidth == 16; }
  161. bool isInt() const { return !Float && ElementBitwidth == 32; }
  162. bool isLong() const { return !Float && ElementBitwidth == 64; }
  163. bool isVoid() const { return Void; }
  164. unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
  165. unsigned getSizeInBits() const { return Bitwidth; }
  166. unsigned getElementSizeInBits() const { return ElementBitwidth; }
  167. unsigned getNumVectors() const { return NumVectors; }
  168. //
  169. // Mutator functions
  170. //
  171. void makeUnsigned() { Signed = false; }
  172. void makeSigned() { Signed = true; }
  173. void makeInteger(unsigned ElemWidth, bool Sign) {
  174. Float = false;
  175. Poly = false;
  176. Signed = Sign;
  177. Immediate = false;
  178. ElementBitwidth = ElemWidth;
  179. }
  180. void makeImmediate(unsigned ElemWidth) {
  181. Float = false;
  182. Poly = false;
  183. Signed = true;
  184. Immediate = true;
  185. ElementBitwidth = ElemWidth;
  186. }
  187. void makeScalar() {
  188. Bitwidth = ElementBitwidth;
  189. NumVectors = 0;
  190. }
  191. void makeOneVector() {
  192. assert(isVector());
  193. NumVectors = 1;
  194. }
  195. void doubleLanes() {
  196. assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
  197. Bitwidth = 128;
  198. }
  199. void halveLanes() {
  200. assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
  201. Bitwidth = 64;
  202. }
  203. /// Return the C string representation of a type, which is the typename
  204. /// defined in stdint.h or arm_neon.h.
  205. std::string str() const;
  206. /// Return the string representation of a type, which is an encoded
  207. /// string for passing to the BUILTIN() macro in Builtins.def.
  208. std::string builtin_str() const;
  209. /// Return the value in NeonTypeFlags for this type.
  210. unsigned getNeonEnum() const;
  211. /// Parse a type from a stdint.h or arm_neon.h typedef name,
  212. /// for example uint32x2_t or int64_t.
  213. static Type fromTypedefName(StringRef Name);
  214. private:
  215. /// Creates the type based on the typespec string in TS.
  216. /// Sets "Quad" to true if the "Q" or "H" modifiers were
  217. /// seen. This is needed by applyModifier as some modifiers
  218. /// only take effect if the type size was changed by "Q" or "H".
  219. void applyTypespec(bool &Quad);
  220. /// Applies a prototype modifier to the type.
  221. void applyModifier(char Mod);
  222. };
  223. //===----------------------------------------------------------------------===//
  224. // Variable
  225. //===----------------------------------------------------------------------===//
  226. /// A variable is a simple class that just has a type and a name.
  227. class Variable {
  228. Type T;
  229. std::string N;
  230. public:
  231. Variable() : T(Type::getVoid()), N("") {}
  232. Variable(Type T, std::string N) : T(T), N(N) {}
  233. Type getType() const { return T; }
  234. std::string getName() const { return "__" + N; }
  235. };
  236. //===----------------------------------------------------------------------===//
  237. // Intrinsic
  238. //===----------------------------------------------------------------------===//
  239. /// The main grunt class. This represents an instantiation of an intrinsic with
  240. /// a particular typespec and prototype.
  241. class Intrinsic {
  242. friend class DagEmitter;
  243. /// The Record this intrinsic was created from.
  244. Record *R;
  245. /// The unmangled name and prototype.
  246. std::string Name, Proto;
  247. /// The input and output typespecs. InTS == OutTS except when
  248. /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
  249. TypeSpec OutTS, InTS;
  250. /// The base class kind. Most intrinsics use ClassS, which has full type
  251. /// info for integers (s32/u32). Some use ClassI, which doesn't care about
  252. /// signedness (i32), while some (ClassB) have no type at all, only a width
  253. /// (32).
  254. ClassKind CK;
  255. /// The list of DAGs for the body. May be empty, in which case we should
  256. /// emit a builtin call.
  257. ListInit *Body;
  258. /// The architectural #ifdef guard.
  259. std::string Guard;
  260. /// Set if the Unvailable bit is 1. This means we don't generate a body,
  261. /// just an "unavailable" attribute on a declaration.
  262. bool IsUnavailable;
  263. /// Is this intrinsic safe for big-endian? or does it need its arguments
  264. /// reversing?
  265. bool BigEndianSafe;
  266. /// The types of return value [0] and parameters [1..].
  267. std::vector<Type> Types;
  268. /// The local variables defined.
  269. std::map<std::string, Variable> Variables;
  270. /// NeededEarly - set if any other intrinsic depends on this intrinsic.
  271. bool NeededEarly;
  272. /// UseMacro - set if we should implement using a macro or unset for a
  273. /// function.
  274. bool UseMacro;
  275. /// The set of intrinsics that this intrinsic uses/requires.
  276. std::set<Intrinsic *> Dependencies;
  277. /// The "base type", which is Type('d', OutTS). InBaseType is only
  278. /// different if CartesianProductOfTypes = 1 (for vreinterpret).
  279. Type BaseType, InBaseType;
  280. /// The return variable.
  281. Variable RetVar;
  282. /// A postfix to apply to every variable. Defaults to "".
  283. std::string VariablePostfix;
  284. NeonEmitter &Emitter;
  285. std::stringstream OS;
  286. public:
  287. Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
  288. TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
  289. StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
  290. : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
  291. CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
  292. BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
  293. BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
  294. // If this builtin takes an immediate argument, we need to #define it rather
  295. // than use a standard declaration, so that SemaChecking can range check
  296. // the immediate passed by the user.
  297. if (Proto.find('i') != std::string::npos)
  298. UseMacro = true;
  299. // Pointer arguments need to use macros to avoid hiding aligned attributes
  300. // from the pointer type.
  301. if (Proto.find('p') != std::string::npos ||
  302. Proto.find('c') != std::string::npos)
  303. UseMacro = true;
  304. // It is not permitted to pass or return an __fp16 by value, so intrinsics
  305. // taking a scalar float16_t must be implemented as macros.
  306. if (OutTS.find('h') != std::string::npos &&
  307. Proto.find('s') != std::string::npos)
  308. UseMacro = true;
  309. // Modify the TypeSpec per-argument to get a concrete Type, and create
  310. // known variables for each.
  311. // Types[0] is the return value.
  312. Types.emplace_back(OutTS, Proto[0]);
  313. for (unsigned I = 1; I < Proto.size(); ++I)
  314. Types.emplace_back(InTS, Proto[I]);
  315. }
  316. /// Get the Record that this intrinsic is based off.
  317. Record *getRecord() const { return R; }
  318. /// Get the set of Intrinsics that this intrinsic calls.
  319. /// this is the set of immediate dependencies, NOT the
  320. /// transitive closure.
  321. const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
  322. /// Get the architectural guard string (#ifdef).
  323. std::string getGuard() const { return Guard; }
  324. /// Get the non-mangled name.
  325. std::string getName() const { return Name; }
  326. /// Return true if the intrinsic takes an immediate operand.
  327. bool hasImmediate() const {
  328. return Proto.find('i') != std::string::npos;
  329. }
  330. /// Return the parameter index of the immediate operand.
  331. unsigned getImmediateIdx() const {
  332. assert(hasImmediate());
  333. unsigned Idx = Proto.find('i');
  334. assert(Idx > 0 && "Can't return an immediate!");
  335. return Idx - 1;
  336. }
  337. /// Return true if the intrinsic takes an splat operand.
  338. bool hasSplat() const { return Proto.find('a') != std::string::npos; }
  339. /// Return the parameter index of the splat operand.
  340. unsigned getSplatIdx() const {
  341. assert(hasSplat());
  342. unsigned Idx = Proto.find('a');
  343. assert(Idx > 0 && "Can't return a splat!");
  344. return Idx - 1;
  345. }
  346. unsigned getNumParams() const { return Proto.size() - 1; }
  347. Type getReturnType() const { return Types[0]; }
  348. Type getParamType(unsigned I) const { return Types[I + 1]; }
  349. Type getBaseType() const { return BaseType; }
  350. /// Return the raw prototype string.
  351. std::string getProto() const { return Proto; }
  352. /// Return true if the prototype has a scalar argument.
  353. /// This does not return true for the "splat" code ('a').
  354. bool protoHasScalar();
  355. /// Return the index that parameter PIndex will sit at
  356. /// in a generated function call. This is often just PIndex,
  357. /// but may not be as things such as multiple-vector operands
  358. /// and sret parameters need to be taken into accont.
  359. unsigned getGeneratedParamIdx(unsigned PIndex) {
  360. unsigned Idx = 0;
  361. if (getReturnType().getNumVectors() > 1)
  362. // Multiple vectors are passed as sret.
  363. ++Idx;
  364. for (unsigned I = 0; I < PIndex; ++I)
  365. Idx += std::max(1U, getParamType(I).getNumVectors());
  366. return Idx;
  367. }
  368. bool hasBody() const { return Body && Body->getValues().size() > 0; }
  369. void setNeededEarly() { NeededEarly = true; }
  370. bool operator<(const Intrinsic &Other) const {
  371. // Sort lexicographically on a two-tuple (Guard, Name)
  372. if (Guard != Other.Guard)
  373. return Guard < Other.Guard;
  374. return Name < Other.Name;
  375. }
  376. ClassKind getClassKind(bool UseClassBIfScalar = false) {
  377. if (UseClassBIfScalar && !protoHasScalar())
  378. return ClassB;
  379. return CK;
  380. }
  381. /// Return the name, mangled with type information.
  382. /// If ForceClassS is true, use ClassS (u32/s32) instead
  383. /// of the intrinsic's own type class.
  384. std::string getMangledName(bool ForceClassS = false);
  385. /// Return the type code for a builtin function call.
  386. std::string getInstTypeCode(Type T, ClassKind CK);
  387. /// Return the type string for a BUILTIN() macro in Builtins.def.
  388. std::string getBuiltinTypeStr();
  389. /// Generate the intrinsic, returning code.
  390. std::string generate();
  391. /// Perform type checking and populate the dependency graph, but
  392. /// don't generate code yet.
  393. void indexBody();
  394. private:
  395. std::string mangleName(std::string Name, ClassKind CK);
  396. void initVariables();
  397. std::string replaceParamsIn(std::string S);
  398. void emitBodyAsBuiltinCall();
  399. void generateImpl(bool ReverseArguments,
  400. StringRef NamePrefix, StringRef CallPrefix);
  401. void emitReturn();
  402. void emitBody(StringRef CallPrefix);
  403. void emitShadowedArgs();
  404. void emitArgumentReversal();
  405. void emitReturnReversal();
  406. void emitReverseVariable(Variable &Dest, Variable &Src);
  407. void emitNewLine();
  408. void emitClosingBrace();
  409. void emitOpeningBrace();
  410. void emitPrototype(StringRef NamePrefix);
  411. class DagEmitter {
  412. Intrinsic &Intr;
  413. StringRef CallPrefix;
  414. public:
  415. DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
  416. Intr(Intr), CallPrefix(CallPrefix) {
  417. }
  418. std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
  419. std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
  420. std::pair<Type, std::string> emitDagSplat(DagInit *DI);
  421. std::pair<Type, std::string> emitDagDup(DagInit *DI);
  422. std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
  423. std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
  424. std::pair<Type, std::string> emitDagCall(DagInit *DI);
  425. std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
  426. std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
  427. std::pair<Type, std::string> emitDagOp(DagInit *DI);
  428. std::pair<Type, std::string> emitDag(DagInit *DI);
  429. };
  430. };
  431. //===----------------------------------------------------------------------===//
  432. // NeonEmitter
  433. //===----------------------------------------------------------------------===//
  434. class NeonEmitter {
  435. RecordKeeper &Records;
  436. DenseMap<Record *, ClassKind> ClassMap;
  437. std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
  438. unsigned UniqueNumber;
  439. void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
  440. void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
  441. void genOverloadTypeCheckCode(raw_ostream &OS,
  442. SmallVectorImpl<Intrinsic *> &Defs);
  443. void genIntrinsicRangeCheckCode(raw_ostream &OS,
  444. SmallVectorImpl<Intrinsic *> &Defs);
  445. public:
  446. /// Called by Intrinsic - this attempts to get an intrinsic that takes
  447. /// the given types as arguments.
  448. Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);
  449. /// Called by Intrinsic - returns a globally-unique number.
  450. unsigned getUniqueNumber() { return UniqueNumber++; }
  451. NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
  452. Record *SI = R.getClass("SInst");
  453. Record *II = R.getClass("IInst");
  454. Record *WI = R.getClass("WInst");
  455. Record *SOpI = R.getClass("SOpInst");
  456. Record *IOpI = R.getClass("IOpInst");
  457. Record *WOpI = R.getClass("WOpInst");
  458. Record *LOpI = R.getClass("LOpInst");
  459. Record *NoTestOpI = R.getClass("NoTestOpInst");
  460. ClassMap[SI] = ClassS;
  461. ClassMap[II] = ClassI;
  462. ClassMap[WI] = ClassW;
  463. ClassMap[SOpI] = ClassS;
  464. ClassMap[IOpI] = ClassI;
  465. ClassMap[WOpI] = ClassW;
  466. ClassMap[LOpI] = ClassL;
  467. ClassMap[NoTestOpI] = ClassNoTest;
  468. }
  469. // run - Emit arm_neon.h.inc
  470. void run(raw_ostream &o);
  471. // runHeader - Emit all the __builtin prototypes used in arm_neon.h
  472. void runHeader(raw_ostream &o);
  473. // runTests - Emit tests for all the Neon intrinsics.
  474. void runTests(raw_ostream &o);
  475. };
  476. } // end anonymous namespace
  477. //===----------------------------------------------------------------------===//
  478. // Type implementation
  479. //===----------------------------------------------------------------------===//
  480. std::string Type::str() const {
  481. if (Void)
  482. return "void";
  483. std::string S;
  484. if (!Signed && isInteger())
  485. S += "u";
  486. if (Poly)
  487. S += "poly";
  488. else if (Float)
  489. S += "float";
  490. else
  491. S += "int";
  492. S += utostr(ElementBitwidth);
  493. if (isVector())
  494. S += "x" + utostr(getNumElements());
  495. if (NumVectors > 1)
  496. S += "x" + utostr(NumVectors);
  497. S += "_t";
  498. if (Constant)
  499. S += " const";
  500. if (Pointer)
  501. S += " *";
  502. return S;
  503. }
  504. std::string Type::builtin_str() const {
  505. std::string S;
  506. if (isVoid())
  507. return "v";
  508. if (Pointer)
  509. // All pointers are void pointers.
  510. S += "v";
  511. else if (isInteger())
  512. switch (ElementBitwidth) {
  513. case 8: S += "c"; break;
  514. case 16: S += "s"; break;
  515. case 32: S += "i"; break;
  516. case 64: S += "Wi"; break;
  517. case 128: S += "LLLi"; break;
  518. default: llvm_unreachable("Unhandled case!");
  519. }
  520. else
  521. switch (ElementBitwidth) {
  522. case 16: S += "h"; break;
  523. case 32: S += "f"; break;
  524. case 64: S += "d"; break;
  525. default: llvm_unreachable("Unhandled case!");
  526. }
  527. if (isChar() && !Pointer)
  528. // Make chars explicitly signed.
  529. S = "S" + S;
  530. else if (isInteger() && !Pointer && !Signed)
  531. S = "U" + S;
  532. // Constant indices are "int", but have the "constant expression" modifier.
  533. if (isImmediate()) {
  534. assert(isInteger() && isSigned());
  535. S = "I" + S;
  536. }
  537. if (isScalar()) {
  538. if (Constant) S += "C";
  539. if (Pointer) S += "*";
  540. return S;
  541. }
  542. std::string Ret;
  543. for (unsigned I = 0; I < NumVectors; ++I)
  544. Ret += "V" + utostr(getNumElements()) + S;
  545. return Ret;
  546. }
  547. unsigned Type::getNeonEnum() const {
  548. unsigned Addend;
  549. switch (ElementBitwidth) {
  550. case 8: Addend = 0; break;
  551. case 16: Addend = 1; break;
  552. case 32: Addend = 2; break;
  553. case 64: Addend = 3; break;
  554. case 128: Addend = 4; break;
  555. default: llvm_unreachable("Unhandled element bitwidth!");
  556. }
  557. unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
  558. if (Poly) {
  559. // Adjustment needed because Poly32 doesn't exist.
  560. if (Addend >= 2)
  561. --Addend;
  562. Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
  563. }
  564. if (Float) {
  565. assert(Addend != 0 && "Float8 doesn't exist!");
  566. Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
  567. }
  568. if (Bitwidth == 128)
  569. Base |= (unsigned)NeonTypeFlags::QuadFlag;
  570. if (isInteger() && !Signed)
  571. Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
  572. return Base;
  573. }
  574. Type Type::fromTypedefName(StringRef Name) {
  575. Type T;
  576. T.Void = false;
  577. T.Float = false;
  578. T.Poly = false;
  579. if (Name.front() == 'u') {
  580. T.Signed = false;
  581. Name = Name.drop_front();
  582. } else {
  583. T.Signed = true;
  584. }
  585. if (Name.startswith("float")) {
  586. T.Float = true;
  587. Name = Name.drop_front(5);
  588. } else if (Name.startswith("poly")) {
  589. T.Poly = true;
  590. Name = Name.drop_front(4);
  591. } else {
  592. assert(Name.startswith("int"));
  593. Name = Name.drop_front(3);
  594. }
  595. unsigned I = 0;
  596. for (I = 0; I < Name.size(); ++I) {
  597. if (!isdigit(Name[I]))
  598. break;
  599. }
  600. Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
  601. Name = Name.drop_front(I);
  602. T.Bitwidth = T.ElementBitwidth;
  603. T.NumVectors = 1;
  604. if (Name.front() == 'x') {
  605. Name = Name.drop_front();
  606. unsigned I = 0;
  607. for (I = 0; I < Name.size(); ++I) {
  608. if (!isdigit(Name[I]))
  609. break;
  610. }
  611. unsigned NumLanes;
  612. Name.substr(0, I).getAsInteger(10, NumLanes);
  613. Name = Name.drop_front(I);
  614. T.Bitwidth = T.ElementBitwidth * NumLanes;
  615. } else {
  616. // Was scalar.
  617. T.NumVectors = 0;
  618. }
  619. if (Name.front() == 'x') {
  620. Name = Name.drop_front();
  621. unsigned I = 0;
  622. for (I = 0; I < Name.size(); ++I) {
  623. if (!isdigit(Name[I]))
  624. break;
  625. }
  626. Name.substr(0, I).getAsInteger(10, T.NumVectors);
  627. Name = Name.drop_front(I);
  628. }
  629. assert(Name.startswith("_t") && "Malformed typedef!");
  630. return T;
  631. }
  632. void Type::applyTypespec(bool &Quad) {
  633. std::string S = TS;
  634. ScalarForMangling = false;
  635. Void = false;
  636. Poly = Float = false;
  637. ElementBitwidth = ~0U;
  638. Signed = true;
  639. NumVectors = 1;
  640. for (char I : S) {
  641. switch (I) {
  642. case 'S':
  643. ScalarForMangling = true;
  644. break;
  645. case 'H':
  646. NoManglingQ = true;
  647. Quad = true;
  648. break;
  649. case 'Q':
  650. Quad = true;
  651. break;
  652. case 'P':
  653. Poly = true;
  654. break;
  655. case 'U':
  656. Signed = false;
  657. break;
  658. case 'c':
  659. ElementBitwidth = 8;
  660. break;
  661. case 'h':
  662. Float = true;
  663. // Fall through
  664. case 's':
  665. ElementBitwidth = 16;
  666. break;
  667. case 'f':
  668. Float = true;
  669. // Fall through
  670. case 'i':
  671. ElementBitwidth = 32;
  672. break;
  673. case 'd':
  674. Float = true;
  675. // Fall through
  676. case 'l':
  677. ElementBitwidth = 64;
  678. break;
  679. case 'k':
  680. ElementBitwidth = 128;
  681. // Poly doesn't have a 128x1 type.
  682. if (Poly)
  683. NumVectors = 0;
  684. break;
  685. default:
  686. llvm_unreachable("Unhandled type code!");
  687. }
  688. }
  689. assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
  690. Bitwidth = Quad ? 128 : 64;
  691. }
  692. void Type::applyModifier(char Mod) {
  693. bool AppliedQuad = false;
  694. applyTypespec(AppliedQuad);
  695. switch (Mod) {
  696. case 'v':
  697. Void = true;
  698. break;
  699. case 't':
  700. if (Poly) {
  701. Poly = false;
  702. Signed = false;
  703. }
  704. break;
  705. case 'b':
  706. Signed = false;
  707. Float = false;
  708. Poly = false;
  709. NumVectors = 0;
  710. Bitwidth = ElementBitwidth;
  711. break;
  712. case '$':
  713. Signed = true;
  714. Float = false;
  715. Poly = false;
  716. NumVectors = 0;
  717. Bitwidth = ElementBitwidth;
  718. break;
  719. case 'u':
  720. Signed = false;
  721. Poly = false;
  722. Float = false;
  723. break;
  724. case 'x':
  725. Signed = true;
  726. assert(!Poly && "'u' can't be used with poly types!");
  727. Float = false;
  728. break;
  729. case 'o':
  730. Bitwidth = ElementBitwidth = 64;
  731. NumVectors = 0;
  732. Float = true;
  733. break;
  734. case 'y':
  735. Bitwidth = ElementBitwidth = 32;
  736. NumVectors = 0;
  737. Float = true;
  738. break;
  739. case 'f':
  740. // Special case - if we're half-precision, a floating
  741. // point argument needs to be 128-bits (double size).
  742. if (isHalf())
  743. Bitwidth = 128;
  744. Float = true;
  745. ElementBitwidth = 32;
  746. break;
  747. case 'F':
  748. Float = true;
  749. ElementBitwidth = 64;
  750. break;
  751. case 'g':
  752. if (AppliedQuad)
  753. Bitwidth /= 2;
  754. break;
  755. case 'j':
  756. if (!AppliedQuad)
  757. Bitwidth *= 2;
  758. break;
  759. case 'w':
  760. ElementBitwidth *= 2;
  761. Bitwidth *= 2;
  762. break;
  763. case 'n':
  764. ElementBitwidth *= 2;
  765. break;
  766. case 'i':
  767. Float = false;
  768. Poly = false;
  769. ElementBitwidth = Bitwidth = 32;
  770. NumVectors = 0;
  771. Signed = true;
  772. Immediate = true;
  773. break;
  774. case 'l':
  775. Float = false;
  776. Poly = false;
  777. ElementBitwidth = Bitwidth = 64;
  778. NumVectors = 0;
  779. Signed = false;
  780. Immediate = true;
  781. break;
  782. case 'z':
  783. ElementBitwidth /= 2;
  784. Bitwidth = ElementBitwidth;
  785. NumVectors = 0;
  786. break;
  787. case 'r':
  788. ElementBitwidth *= 2;
  789. Bitwidth = ElementBitwidth;
  790. NumVectors = 0;
  791. break;
  792. case 's':
  793. case 'a':
  794. Bitwidth = ElementBitwidth;
  795. NumVectors = 0;
  796. break;
  797. case 'k':
  798. Bitwidth *= 2;
  799. break;
  800. case 'c':
  801. Constant = true;
  802. // Fall through
  803. case 'p':
  804. Pointer = true;
  805. Bitwidth = ElementBitwidth;
  806. NumVectors = 0;
  807. break;
  808. case 'h':
  809. ElementBitwidth /= 2;
  810. break;
  811. case 'q':
  812. ElementBitwidth /= 2;
  813. Bitwidth *= 2;
  814. break;
  815. case 'e':
  816. ElementBitwidth /= 2;
  817. Signed = false;
  818. break;
  819. case 'm':
  820. ElementBitwidth /= 2;
  821. Bitwidth /= 2;
  822. break;
  823. case 'd':
  824. break;
  825. case '2':
  826. NumVectors = 2;
  827. break;
  828. case '3':
  829. NumVectors = 3;
  830. break;
  831. case '4':
  832. NumVectors = 4;
  833. break;
  834. case 'B':
  835. NumVectors = 2;
  836. if (!AppliedQuad)
  837. Bitwidth *= 2;
  838. break;
  839. case 'C':
  840. NumVectors = 3;
  841. if (!AppliedQuad)
  842. Bitwidth *= 2;
  843. break;
  844. case 'D':
  845. NumVectors = 4;
  846. if (!AppliedQuad)
  847. Bitwidth *= 2;
  848. break;
  849. default:
  850. llvm_unreachable("Unhandled character!");
  851. }
  852. }
  853. //===----------------------------------------------------------------------===//
  854. // Intrinsic implementation
  855. //===----------------------------------------------------------------------===//
  856. std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
  857. char typeCode = '\0';
  858. bool printNumber = true;
  859. if (CK == ClassB)
  860. return "";
  861. if (T.isPoly())
  862. typeCode = 'p';
  863. else if (T.isInteger())
  864. typeCode = T.isSigned() ? 's' : 'u';
  865. else
  866. typeCode = 'f';
  867. if (CK == ClassI) {
  868. switch (typeCode) {
  869. default:
  870. break;
  871. case 's':
  872. case 'u':
  873. case 'p':
  874. typeCode = 'i';
  875. break;
  876. }
  877. }
  878. if (CK == ClassB) {
  879. typeCode = '\0';
  880. }
  881. std::string S;
  882. if (typeCode != '\0')
  883. S.push_back(typeCode);
  884. if (printNumber)
  885. S += utostr(T.getElementSizeInBits());
  886. return S;
  887. }
  888. std::string Intrinsic::getBuiltinTypeStr() {
  889. ClassKind LocalCK = getClassKind(true);
  890. std::string S;
  891. Type RetT = getReturnType();
  892. if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
  893. !RetT.isFloating())
  894. RetT.makeInteger(RetT.getElementSizeInBits(), false);
  895. // Since the return value must be one type, return a vector type of the
  896. // appropriate width which we will bitcast. An exception is made for
  897. // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  898. // fashion, storing them to a pointer arg.
  899. if (RetT.getNumVectors() > 1) {
  900. S += "vv*"; // void result with void* first argument
  901. } else {
  902. if (RetT.isPoly())
  903. RetT.makeInteger(RetT.getElementSizeInBits(), false);
  904. if (!RetT.isScalar() && !RetT.isSigned())
  905. RetT.makeSigned();
  906. bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
  907. if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
  908. // Cast to vector of 8-bit elements.
  909. RetT.makeInteger(8, true);
  910. S += RetT.builtin_str();
  911. }
  912. for (unsigned I = 0; I < getNumParams(); ++I) {
  913. Type T = getParamType(I);
  914. if (T.isPoly())
  915. T.makeInteger(T.getElementSizeInBits(), false);
  916. bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
  917. if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
  918. T.makeInteger(8, true);
  919. // Halves always get converted to 8-bit elements.
  920. if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
  921. T.makeInteger(8, true);
  922. if (LocalCK == ClassI)
  923. T.makeSigned();
  924. if (hasImmediate() && getImmediateIdx() == I)
  925. T.makeImmediate(32);
  926. S += T.builtin_str();
  927. }
  928. // Extra constant integer to hold type class enum for this function, e.g. s8
  929. if (LocalCK == ClassB)
  930. S += "i";
  931. return S;
  932. }
  933. std::string Intrinsic::getMangledName(bool ForceClassS) {
  934. // Check if the prototype has a scalar operand with the type of the vector
  935. // elements. If not, bitcasting the args will take care of arg checking.
  936. // The actual signedness etc. will be taken care of with special enums.
  937. ClassKind LocalCK = CK;
  938. if (!protoHasScalar())
  939. LocalCK = ClassB;
  940. return mangleName(Name, ForceClassS ? ClassS : LocalCK);
  941. }
  942. std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
  943. std::string typeCode = getInstTypeCode(BaseType, LocalCK);
  944. std::string S = Name;
  945. if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
  946. Name == "vcvt_f64_f32")
  947. return Name;
  948. if (typeCode.size() > 0) {
  949. // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
  950. if (Name.size() >= 3 && isdigit(Name.back()) &&
  951. Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
  952. S.insert(S.length() - 3, "_" + typeCode);
  953. else
  954. S += "_" + typeCode;
  955. }
  956. if (BaseType != InBaseType) {
  957. // A reinterpret - out the input base type at the end.
  958. S += "_" + getInstTypeCode(InBaseType, LocalCK);
  959. }
  960. if (LocalCK == ClassB)
  961. S += "_v";
  962. // Insert a 'q' before the first '_' character so that it ends up before
  963. // _lane or _n on vector-scalar operations.
  964. if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
  965. size_t Pos = S.find('_');
  966. S.insert(Pos, "q");
  967. }
  968. char Suffix = '\0';
  969. if (BaseType.isScalarForMangling()) {
  970. switch (BaseType.getElementSizeInBits()) {
  971. case 8: Suffix = 'b'; break;
  972. case 16: Suffix = 'h'; break;
  973. case 32: Suffix = 's'; break;
  974. case 64: Suffix = 'd'; break;
  975. default: llvm_unreachable("Bad suffix!");
  976. }
  977. }
  978. if (Suffix != '\0') {
  979. size_t Pos = S.find('_');
  980. S.insert(Pos, &Suffix, 1);
  981. }
  982. return S;
  983. }
  984. std::string Intrinsic::replaceParamsIn(std::string S) {
  985. while (S.find('$') != std::string::npos) {
  986. size_t Pos = S.find('$');
  987. size_t End = Pos + 1;
  988. while (isalpha(S[End]))
  989. ++End;
  990. std::string VarName = S.substr(Pos + 1, End - Pos - 1);
  991. assert_with_loc(Variables.find(VarName) != Variables.end(),
  992. "Variable not defined!");
  993. S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
  994. }
  995. return S;
  996. }
  997. void Intrinsic::initVariables() {
  998. Variables.clear();
  999. // Modify the TypeSpec per-argument to get a concrete Type, and create
  1000. // known variables for each.
  1001. for (unsigned I = 1; I < Proto.size(); ++I) {
  1002. char NameC = '0' + (I - 1);
  1003. std::string Name = "p";
  1004. Name.push_back(NameC);
  1005. Variables[Name] = Variable(Types[I], Name + VariablePostfix);
  1006. }
  1007. RetVar = Variable(Types[0], "ret" + VariablePostfix);
  1008. }
  1009. void Intrinsic::emitPrototype(StringRef NamePrefix) {
  1010. if (UseMacro)
  1011. OS << "#define ";
  1012. else
  1013. OS << "__ai " << Types[0].str() << " ";
  1014. OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
  1015. for (unsigned I = 0; I < getNumParams(); ++I) {
  1016. if (I != 0)
  1017. OS << ", ";
  1018. char NameC = '0' + I;
  1019. std::string Name = "p";
  1020. Name.push_back(NameC);
  1021. assert(Variables.find(Name) != Variables.end());
  1022. Variable &V = Variables[Name];
  1023. if (!UseMacro)
  1024. OS << V.getType().str() << " ";
  1025. OS << V.getName();
  1026. }
  1027. OS << ")";
  1028. }
  1029. void Intrinsic::emitOpeningBrace() {
  1030. if (UseMacro)
  1031. OS << " __extension__ ({";
  1032. else
  1033. OS << " {";
  1034. emitNewLine();
  1035. }
  1036. void Intrinsic::emitClosingBrace() {
  1037. if (UseMacro)
  1038. OS << "})";
  1039. else
  1040. OS << "}";
  1041. }
  1042. void Intrinsic::emitNewLine() {
  1043. if (UseMacro)
  1044. OS << " \\\n";
  1045. else
  1046. OS << "\n";
  1047. }
  1048. void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
  1049. if (Dest.getType().getNumVectors() > 1) {
  1050. emitNewLine();
  1051. for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
  1052. OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = "
  1053. << "__builtin_shufflevector("
  1054. << Src.getName() << ".val[" << utostr(K) << "], "
  1055. << Src.getName() << ".val[" << utostr(K) << "]";
  1056. for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
  1057. OS << ", " << utostr(J);
  1058. OS << ");";
  1059. emitNewLine();
  1060. }
  1061. } else {
  1062. OS << " " << Dest.getName()
  1063. << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
  1064. for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
  1065. OS << ", " << utostr(J);
  1066. OS << ");";
  1067. emitNewLine();
  1068. }
  1069. }
  1070. void Intrinsic::emitArgumentReversal() {
  1071. if (BigEndianSafe)
  1072. return;
  1073. // Reverse all vector arguments.
  1074. for (unsigned I = 0; I < getNumParams(); ++I) {
  1075. std::string Name = "p" + utostr(I);
  1076. std::string NewName = "rev" + utostr(I);
  1077. Variable &V = Variables[Name];
  1078. Variable NewV(V.getType(), NewName + VariablePostfix);
  1079. if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
  1080. continue;
  1081. OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
  1082. emitReverseVariable(NewV, V);
  1083. V = NewV;
  1084. }
  1085. }
  1086. void Intrinsic::emitReturnReversal() {
  1087. if (BigEndianSafe)
  1088. return;
  1089. if (!getReturnType().isVector() || getReturnType().isVoid() ||
  1090. getReturnType().getNumElements() == 1)
  1091. return;
  1092. emitReverseVariable(RetVar, RetVar);
  1093. }
  1094. void Intrinsic::emitShadowedArgs() {
  1095. // Macro arguments are not type-checked like inline function arguments,
  1096. // so assign them to local temporaries to get the right type checking.
  1097. if (!UseMacro)
  1098. return;
  1099. for (unsigned I = 0; I < getNumParams(); ++I) {
  1100. // Do not create a temporary for an immediate argument.
  1101. // That would defeat the whole point of using a macro!
  1102. if (hasImmediate() && Proto[I+1] == 'i')
  1103. continue;
  1104. // Do not create a temporary for pointer arguments. The input
  1105. // pointer may have an alignment hint.
  1106. if (getParamType(I).isPointer())
  1107. continue;
  1108. std::string Name = "p" + utostr(I);
  1109. assert(Variables.find(Name) != Variables.end());
  1110. Variable &V = Variables[Name];
  1111. std::string NewName = "s" + utostr(I);
  1112. Variable V2(V.getType(), NewName + VariablePostfix);
  1113. OS << " " << V2.getType().str() << " " << V2.getName() << " = "
  1114. << V.getName() << ";";
  1115. emitNewLine();
  1116. V = V2;
  1117. }
  1118. }
  1119. // We don't check 'a' in this function, because for builtin function the
  1120. // argument matching to 'a' uses a vector type splatted from a scalar type.
  1121. bool Intrinsic::protoHasScalar() {
  1122. return (Proto.find('s') != std::string::npos ||
  1123. Proto.find('z') != std::string::npos ||
  1124. Proto.find('r') != std::string::npos ||
  1125. Proto.find('b') != std::string::npos ||
  1126. Proto.find('$') != std::string::npos ||
  1127. Proto.find('y') != std::string::npos ||
  1128. Proto.find('o') != std::string::npos);
  1129. }
  1130. void Intrinsic::emitBodyAsBuiltinCall() {
  1131. std::string S;
  1132. // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  1133. // sret-like argument.
  1134. bool SRet = getReturnType().getNumVectors() >= 2;
  1135. StringRef N = Name;
  1136. if (hasSplat()) {
  1137. // Call the non-splat builtin: chop off the "_n" suffix from the name.
  1138. assert(N.endswith("_n"));
  1139. N = N.drop_back(2);
  1140. }
  1141. ClassKind LocalCK = CK;
  1142. if (!protoHasScalar())
  1143. LocalCK = ClassB;
  1144. if (!getReturnType().isVoid() && !SRet)
  1145. S += "(" + RetVar.getType().str() + ") ";
  1146. S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
  1147. if (SRet)
  1148. S += "&" + RetVar.getName() + ", ";
  1149. for (unsigned I = 0; I < getNumParams(); ++I) {
  1150. Variable &V = Variables["p" + utostr(I)];
  1151. Type T = V.getType();
  1152. // Handle multiple-vector values specially, emitting each subvector as an
  1153. // argument to the builtin.
  1154. if (T.getNumVectors() > 1) {
  1155. // Check if an explicit cast is needed.
  1156. std::string Cast;
  1157. if (T.isChar() || T.isPoly() || !T.isSigned()) {
  1158. Type T2 = T;
  1159. T2.makeOneVector();
  1160. T2.makeInteger(8, /*Signed=*/true);
  1161. Cast = "(" + T2.str() + ")";
  1162. }
  1163. for (unsigned J = 0; J < T.getNumVectors(); ++J)
  1164. S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
  1165. continue;
  1166. }
  1167. std::string Arg;
  1168. Type CastToType = T;
  1169. if (hasSplat() && I == getSplatIdx()) {
  1170. Arg = "(" + BaseType.str() + ") {";
  1171. for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
  1172. if (J != 0)
  1173. Arg += ", ";
  1174. Arg += V.getName();
  1175. }
  1176. Arg += "}";
  1177. CastToType = BaseType;
  1178. } else {
  1179. Arg = V.getName();
  1180. }
  1181. // Check if an explicit cast is needed.
  1182. if (CastToType.isVector()) {
  1183. CastToType.makeInteger(8, true);
  1184. Arg = "(" + CastToType.str() + ")" + Arg;
  1185. }
  1186. S += Arg + ", ";
  1187. }
  1188. // Extra constant integer to hold type class enum for this function, e.g. s8
  1189. if (getClassKind(true) == ClassB) {
  1190. Type ThisTy = getReturnType();
  1191. if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
  1192. ThisTy = getParamType(0);
  1193. if (ThisTy.isPointer())
  1194. ThisTy = getParamType(1);
  1195. S += utostr(ThisTy.getNeonEnum());
  1196. } else {
  1197. // Remove extraneous ", ".
  1198. S.pop_back();
  1199. S.pop_back();
  1200. }
  1201. S += ");";
  1202. std::string RetExpr;
  1203. if (!SRet && !RetVar.getType().isVoid())
  1204. RetExpr = RetVar.getName() + " = ";
  1205. OS << " " << RetExpr << S;
  1206. emitNewLine();
  1207. }
  1208. void Intrinsic::emitBody(StringRef CallPrefix) {
  1209. std::vector<std::string> Lines;
  1210. assert(RetVar.getType() == Types[0]);
  1211. // Create a return variable, if we're not void.
  1212. if (!RetVar.getType().isVoid()) {
  1213. OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
  1214. emitNewLine();
  1215. }
  1216. if (!Body || Body->getValues().size() == 0) {
  1217. // Nothing specific to output - must output a builtin.
  1218. emitBodyAsBuiltinCall();
  1219. return;
  1220. }
  1221. // We have a list of "things to output". The last should be returned.
  1222. for (auto *I : Body->getValues()) {
  1223. if (StringInit *SI = dyn_cast<StringInit>(I)) {
  1224. Lines.push_back(replaceParamsIn(SI->getAsString()));
  1225. } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
  1226. DagEmitter DE(*this, CallPrefix);
  1227. Lines.push_back(DE.emitDag(DI).second + ";");
  1228. }
  1229. }
  1230. assert(!Lines.empty() && "Empty def?");
  1231. if (!RetVar.getType().isVoid())
  1232. Lines.back().insert(0, RetVar.getName() + " = ");
  1233. for (auto &L : Lines) {
  1234. OS << " " << L;
  1235. emitNewLine();
  1236. }
  1237. }
  1238. void Intrinsic::emitReturn() {
  1239. if (RetVar.getType().isVoid())
  1240. return;
  1241. if (UseMacro)
  1242. OS << " " << RetVar.getName() << ";";
  1243. else
  1244. OS << " return " << RetVar.getName() << ";";
  1245. emitNewLine();
  1246. }
  1247. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
  1248. // At this point we should only be seeing a def.
  1249. DefInit *DefI = cast<DefInit>(DI->getOperator());
  1250. std::string Op = DefI->getAsString();
  1251. if (Op == "cast" || Op == "bitcast")
  1252. return emitDagCast(DI, Op == "bitcast");
  1253. if (Op == "shuffle")
  1254. return emitDagShuffle(DI);
  1255. if (Op == "dup")
  1256. return emitDagDup(DI);
  1257. if (Op == "splat")
  1258. return emitDagSplat(DI);
  1259. if (Op == "save_temp")
  1260. return emitDagSaveTemp(DI);
  1261. if (Op == "op")
  1262. return emitDagOp(DI);
  1263. if (Op == "call")
  1264. return emitDagCall(DI);
  1265. if (Op == "name_replace")
  1266. return emitDagNameReplace(DI);
  1267. if (Op == "literal")
  1268. return emitDagLiteral(DI);
  1269. assert_with_loc(false, "Unknown operation!");
  1270. return std::make_pair(Type::getVoid(), "");
  1271. }
  1272. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
  1273. std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1274. if (DI->getNumArgs() == 2) {
  1275. // Unary op.
  1276. std::pair<Type, std::string> R =
  1277. emitDagArg(DI->getArg(1), DI->getArgName(1));
  1278. return std::make_pair(R.first, Op + R.second);
  1279. } else {
  1280. assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
  1281. std::pair<Type, std::string> R1 =
  1282. emitDagArg(DI->getArg(1), DI->getArgName(1));
  1283. std::pair<Type, std::string> R2 =
  1284. emitDagArg(DI->getArg(2), DI->getArgName(2));
  1285. assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
  1286. return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
  1287. }
  1288. }
  1289. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
  1290. std::vector<Type> Types;
  1291. std::vector<std::string> Values;
  1292. for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
  1293. std::pair<Type, std::string> R =
  1294. emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1));
  1295. Types.push_back(R.first);
  1296. Values.push_back(R.second);
  1297. }
  1298. // Look up the called intrinsic.
  1299. std::string N;
  1300. if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
  1301. N = SI->getAsUnquotedString();
  1302. else
  1303. N = emitDagArg(DI->getArg(0), "").second;
  1304. Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types);
  1305. assert(Callee && "getIntrinsic should not return us nullptr!");
  1306. // Make sure the callee is known as an early def.
  1307. Callee->setNeededEarly();
  1308. Intr.Dependencies.insert(Callee);
  1309. // Now create the call itself.
  1310. std::string S = CallPrefix.str() + Callee->getMangledName(true) + "(";
  1311. for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
  1312. if (I != 0)
  1313. S += ", ";
  1314. S += Values[I];
  1315. }
  1316. S += ")";
  1317. return std::make_pair(Callee->getReturnType(), S);
  1318. }
  1319. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
  1320. bool IsBitCast){
  1321. // (cast MOD* VAL) -> cast VAL to type given by MOD.
  1322. std::pair<Type, std::string> R = emitDagArg(
  1323. DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1));
  1324. Type castToType = R.first;
  1325. for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
  1326. // MOD can take several forms:
  1327. // 1. $X - take the type of parameter / variable X.
  1328. // 2. The value "R" - take the type of the return type.
  1329. // 3. a type string
  1330. // 4. The value "U" or "S" to switch the signedness.
  1331. // 5. The value "H" or "D" to half or double the bitwidth.
  1332. // 6. The value "8" to convert to 8-bit (signed) integer lanes.
  1333. if (DI->getArgName(ArgIdx).size()) {
  1334. assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) !=
  1335. Intr.Variables.end(),
  1336. "Variable not found");
  1337. castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType();
  1338. } else {
  1339. StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
  1340. assert_with_loc(SI, "Expected string type or $Name for cast type");
  1341. if (SI->getAsUnquotedString() == "R") {
  1342. castToType = Intr.getReturnType();
  1343. } else if (SI->getAsUnquotedString() == "U") {
  1344. castToType.makeUnsigned();
  1345. } else if (SI->getAsUnquotedString() == "S") {
  1346. castToType.makeSigned();
  1347. } else if (SI->getAsUnquotedString() == "H") {
  1348. castToType.halveLanes();
  1349. } else if (SI->getAsUnquotedString() == "D") {
  1350. castToType.doubleLanes();
  1351. } else if (SI->getAsUnquotedString() == "8") {
  1352. castToType.makeInteger(8, true);
  1353. } else {
  1354. castToType = Type::fromTypedefName(SI->getAsUnquotedString());
  1355. assert_with_loc(!castToType.isVoid(), "Unknown typedef");
  1356. }
  1357. }
  1358. }
  1359. std::string S;
  1360. if (IsBitCast) {
  1361. // Emit a reinterpret cast. The second operand must be an lvalue, so create
  1362. // a temporary.
  1363. std::string N = "reint";
  1364. unsigned I = 0;
  1365. while (Intr.Variables.find(N) != Intr.Variables.end())
  1366. N = "reint" + utostr(++I);
  1367. Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
  1368. Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
  1369. << R.second << ";";
  1370. Intr.emitNewLine();
  1371. S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
  1372. } else {
  1373. // Emit a normal (static) cast.
  1374. S = "(" + castToType.str() + ")(" + R.second + ")";
  1375. }
  1376. return std::make_pair(castToType, S);
  1377. }
  1378. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
  1379. // See the documentation in arm_neon.td for a description of these operators.
  1380. class LowHalf : public SetTheory::Operator {
  1381. public:
  1382. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1383. ArrayRef<SMLoc> Loc) override {
  1384. SetTheory::RecSet Elts2;
  1385. ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
  1386. Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
  1387. }
  1388. };
  1389. class HighHalf : public SetTheory::Operator {
  1390. public:
  1391. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1392. ArrayRef<SMLoc> Loc) override {
  1393. SetTheory::RecSet Elts2;
  1394. ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
  1395. Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
  1396. }
  1397. };
  1398. class Rev : public SetTheory::Operator {
  1399. unsigned ElementSize;
  1400. public:
  1401. Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
  1402. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1403. ArrayRef<SMLoc> Loc) override {
  1404. SetTheory::RecSet Elts2;
  1405. ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
  1406. int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
  1407. VectorSize /= ElementSize;
  1408. std::vector<Record *> Revved;
  1409. for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
  1410. for (int LI = VectorSize - 1; LI >= 0; --LI) {
  1411. Revved.push_back(Elts2[VI + LI]);
  1412. }
  1413. }
  1414. Elts.insert(Revved.begin(), Revved.end());
  1415. }
  1416. };
  1417. class MaskExpander : public SetTheory::Expander {
  1418. unsigned N;
  1419. public:
  1420. MaskExpander(unsigned N) : N(N) {}
  1421. void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
  1422. unsigned Addend = 0;
  1423. if (R->getName() == "mask0")
  1424. Addend = 0;
  1425. else if (R->getName() == "mask1")
  1426. Addend = N;
  1427. else
  1428. return;
  1429. for (unsigned I = 0; I < N; ++I)
  1430. Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
  1431. }
  1432. };
  1433. // (shuffle arg1, arg2, sequence)
  1434. std::pair<Type, std::string> Arg1 =
  1435. emitDagArg(DI->getArg(0), DI->getArgName(0));
  1436. std::pair<Type, std::string> Arg2 =
  1437. emitDagArg(DI->getArg(1), DI->getArgName(1));
  1438. assert_with_loc(Arg1.first == Arg2.first,
  1439. "Different types in arguments to shuffle!");
  1440. SetTheory ST;
  1441. SetTheory::RecSet Elts;
  1442. ST.addOperator("lowhalf", llvm::make_unique<LowHalf>());
  1443. ST.addOperator("highhalf", llvm::make_unique<HighHalf>());
  1444. ST.addOperator("rev",
  1445. llvm::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
  1446. ST.addExpander("MaskExpand",
  1447. llvm::make_unique<MaskExpander>(Arg1.first.getNumElements()));
  1448. ST.evaluate(DI->getArg(2), Elts, None);
  1449. std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
  1450. for (auto &E : Elts) {
  1451. StringRef Name = E->getName();
  1452. assert_with_loc(Name.startswith("sv"),
  1453. "Incorrect element kind in shuffle mask!");
  1454. S += ", " + Name.drop_front(2).str();
  1455. }
  1456. S += ")";
  1457. // Recalculate the return type - the shuffle may have halved or doubled it.
  1458. Type T(Arg1.first);
  1459. if (Elts.size() > T.getNumElements()) {
  1460. assert_with_loc(
  1461. Elts.size() == T.getNumElements() * 2,
  1462. "Can only double or half the number of elements in a shuffle!");
  1463. T.doubleLanes();
  1464. } else if (Elts.size() < T.getNumElements()) {
  1465. assert_with_loc(
  1466. Elts.size() == T.getNumElements() / 2,
  1467. "Can only double or half the number of elements in a shuffle!");
  1468. T.halveLanes();
  1469. }
  1470. return std::make_pair(T, S);
  1471. }
  1472. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
  1473. assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
  1474. std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
  1475. assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
  1476. Type T = Intr.getBaseType();
  1477. assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
  1478. std::string S = "(" + T.str() + ") {";
  1479. for (unsigned I = 0; I < T.getNumElements(); ++I) {
  1480. if (I != 0)
  1481. S += ", ";
  1482. S += A.second;
  1483. }
  1484. S += "}";
  1485. return std::make_pair(T, S);
  1486. }
  1487. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
  1488. assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
  1489. std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
  1490. std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1));
  1491. assert_with_loc(B.first.isScalar(),
  1492. "splat() requires a scalar int as the second argument");
  1493. std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
  1494. for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
  1495. S += ", " + B.second;
  1496. }
  1497. S += ")";
  1498. return std::make_pair(Intr.getBaseType(), S);
  1499. }
  1500. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
  1501. assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
  1502. std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1));
  1503. assert_with_loc(!A.first.isVoid(),
  1504. "Argument to save_temp() must have non-void type!");
  1505. std::string N = DI->getArgName(0);
  1506. assert_with_loc(N.size(), "save_temp() expects a name as the first argument");
  1507. assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
  1508. "Variable already defined!");
  1509. Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
  1510. std::string S =
  1511. A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
  1512. return std::make_pair(Type::getVoid(), S);
  1513. }
  1514. std::pair<Type, std::string>
  1515. Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
  1516. std::string S = Intr.Name;
  1517. assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
  1518. std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1519. std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  1520. size_t Idx = S.find(ToReplace);
  1521. assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
  1522. S.replace(Idx, ToReplace.size(), ReplaceWith);
  1523. return std::make_pair(Type::getVoid(), S);
  1524. }
  1525. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
  1526. std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1527. std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  1528. return std::make_pair(Type::fromTypedefName(Ty), Value);
  1529. }
  1530. std::pair<Type, std::string>
  1531. Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
  1532. if (ArgName.size()) {
  1533. assert_with_loc(!Arg->isComplete(),
  1534. "Arguments must either be DAGs or names, not both!");
  1535. assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
  1536. "Variable not defined!");
  1537. Variable &V = Intr.Variables[ArgName];
  1538. return std::make_pair(V.getType(), V.getName());
  1539. }
  1540. assert(Arg && "Neither ArgName nor Arg?!");
  1541. DagInit *DI = dyn_cast<DagInit>(Arg);
  1542. assert_with_loc(DI, "Arguments must either be DAGs or names!");
  1543. return emitDag(DI);
  1544. }
  1545. std::string Intrinsic::generate() {
  1546. // Little endian intrinsics are simple and don't require any argument
  1547. // swapping.
  1548. OS << "#ifdef __LITTLE_ENDIAN__\n";
  1549. generateImpl(false, "", "");
  1550. OS << "#else\n";
  1551. // Big endian intrinsics are more complex. The user intended these
  1552. // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
  1553. // but we load as-if (V)LD1. So we should swap all arguments and
  1554. // swap the return value too.
  1555. //
  1556. // If we call sub-intrinsics, we should call a version that does
  1557. // not re-swap the arguments!
  1558. generateImpl(true, "", "__noswap_");
  1559. // If we're needed early, create a non-swapping variant for
  1560. // big-endian.
  1561. if (NeededEarly) {
  1562. generateImpl(false, "__noswap_", "__noswap_");
  1563. }
  1564. OS << "#endif\n\n";
  1565. return OS.str();
  1566. }
  1567. void Intrinsic::generateImpl(bool ReverseArguments,
  1568. StringRef NamePrefix, StringRef CallPrefix) {
  1569. CurrentRecord = R;
  1570. // If we call a macro, our local variables may be corrupted due to
  1571. // lack of proper lexical scoping. So, add a globally unique postfix
  1572. // to every variable.
  1573. //
  1574. // indexBody() should have set up the Dependencies set by now.
  1575. for (auto *I : Dependencies)
  1576. if (I->UseMacro) {
  1577. VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
  1578. break;
  1579. }
  1580. initVariables();
  1581. emitPrototype(NamePrefix);
  1582. if (IsUnavailable) {
  1583. OS << " __attribute__((unavailable));";
  1584. } else {
  1585. emitOpeningBrace();
  1586. emitShadowedArgs();
  1587. if (ReverseArguments)
  1588. emitArgumentReversal();
  1589. emitBody(CallPrefix);
  1590. if (ReverseArguments)
  1591. emitReturnReversal();
  1592. emitReturn();
  1593. emitClosingBrace();
  1594. }
  1595. OS << "\n";
  1596. CurrentRecord = nullptr;
  1597. }
  1598. void Intrinsic::indexBody() {
  1599. CurrentRecord = R;
  1600. initVariables();
  1601. emitBody("");
  1602. OS.str("");
  1603. CurrentRecord = nullptr;
  1604. }
  1605. //===----------------------------------------------------------------------===//
  1606. // NeonEmitter implementation
  1607. //===----------------------------------------------------------------------===//
  1608. Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
  1609. // First, look up the name in the intrinsic map.
  1610. assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
  1611. ("Intrinsic '" + Name + "' not found!").str());
  1612. std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
  1613. std::vector<Intrinsic *> GoodVec;
  1614. // Create a string to print if we end up failing.
  1615. std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
  1616. for (unsigned I = 0; I < Types.size(); ++I) {
  1617. if (I != 0)
  1618. ErrMsg += ", ";
  1619. ErrMsg += Types[I].str();
  1620. }
  1621. ErrMsg += ")'\n";
  1622. ErrMsg += "Available overloads:\n";
  1623. // Now, look through each intrinsic implementation and see if the types are
  1624. // compatible.
  1625. for (auto *I : V) {
  1626. ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName();
  1627. ErrMsg += "(";
  1628. for (unsigned A = 0; A < I->getNumParams(); ++A) {
  1629. if (A != 0)
  1630. ErrMsg += ", ";
  1631. ErrMsg += I->getParamType(A).str();
  1632. }
  1633. ErrMsg += ")\n";
  1634. if (I->getNumParams() != Types.size())
  1635. continue;
  1636. bool Good = true;
  1637. for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
  1638. if (I->getParamType(Arg) != Types[Arg]) {
  1639. Good = false;
  1640. break;
  1641. }
  1642. }
  1643. if (Good)
  1644. GoodVec.push_back(I);
  1645. }
  1646. assert_with_loc(GoodVec.size() > 0,
  1647. "No compatible intrinsic found - " + ErrMsg);
  1648. assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
  1649. return GoodVec.front();
  1650. }
  1651. void NeonEmitter::createIntrinsic(Record *R,
  1652. SmallVectorImpl<Intrinsic *> &Out) {
  1653. std::string Name = R->getValueAsString("Name");
  1654. std::string Proto = R->getValueAsString("Prototype");
  1655. std::string Types = R->getValueAsString("Types");
  1656. Record *OperationRec = R->getValueAsDef("Operation");
  1657. bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
  1658. bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
  1659. std::string Guard = R->getValueAsString("ArchGuard");
  1660. bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
  1661. // Set the global current record. This allows assert_with_loc to produce
  1662. // decent location information even when highly nested.
  1663. CurrentRecord = R;
  1664. ListInit *Body = OperationRec->getValueAsListInit("Ops");
  1665. std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
  1666. ClassKind CK = ClassNone;
  1667. if (R->getSuperClasses().size() >= 2)
  1668. CK = ClassMap[R->getSuperClasses()[1]];
  1669. std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
  1670. for (auto TS : TypeSpecs) {
  1671. if (CartesianProductOfTypes) {
  1672. Type DefaultT(TS, 'd');
  1673. for (auto SrcTS : TypeSpecs) {
  1674. Type DefaultSrcT(SrcTS, 'd');
  1675. if (TS == SrcTS ||
  1676. DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
  1677. continue;
  1678. NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
  1679. }
  1680. } else {
  1681. NewTypeSpecs.push_back(std::make_pair(TS, TS));
  1682. }
  1683. }
  1684. std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
  1685. NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
  1686. NewTypeSpecs.end());
  1687. for (auto &I : NewTypeSpecs) {
  1688. Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
  1689. *this, Guard, IsUnavailable, BigEndianSafe);
  1690. IntrinsicMap[Name].push_back(IT);
  1691. Out.push_back(IT);
  1692. }
  1693. CurrentRecord = nullptr;
  1694. }
  1695. /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
  1696. /// declaration of builtins, checking for unique builtin declarations.
  1697. void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
  1698. SmallVectorImpl<Intrinsic *> &Defs) {
  1699. OS << "#ifdef GET_NEON_BUILTINS\n";
  1700. // We only want to emit a builtin once, and we want to emit them in
  1701. // alphabetical order, so use a std::set.
  1702. std::set<std::string> Builtins;
  1703. for (auto *Def : Defs) {
  1704. if (Def->hasBody())
  1705. continue;
  1706. // Functions with 'a' (the splat code) in the type prototype should not get
  1707. // their own builtin as they use the non-splat variant.
  1708. if (Def->hasSplat())
  1709. continue;
  1710. std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
  1711. S += Def->getBuiltinTypeStr();
  1712. S += "\", \"n\")";
  1713. Builtins.insert(S);
  1714. }
  1715. for (auto &S : Builtins)
  1716. OS << S << "\n";
  1717. OS << "#endif\n\n";
  1718. }
  1719. /// Generate the ARM and AArch64 overloaded type checking code for
  1720. /// SemaChecking.cpp, checking for unique builtin declarations.
  1721. void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
  1722. SmallVectorImpl<Intrinsic *> &Defs) {
  1723. OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
  1724. // We record each overload check line before emitting because subsequent Inst
  1725. // definitions may extend the number of permitted types (i.e. augment the
  1726. // Mask). Use std::map to avoid sorting the table by hash number.
  1727. struct OverloadInfo {
  1728. uint64_t Mask;
  1729. int PtrArgNum;
  1730. bool HasConstPtr;
  1731. OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
  1732. };
  1733. std::map<std::string, OverloadInfo> OverloadMap;
  1734. for (auto *Def : Defs) {
  1735. // If the def has a body (that is, it has Operation DAGs), it won't call
  1736. // __builtin_neon_* so we don't need to generate a definition for it.
  1737. if (Def->hasBody())
  1738. continue;
  1739. // Functions with 'a' (the splat code) in the type prototype should not get
  1740. // their own builtin as they use the non-splat variant.
  1741. if (Def->hasSplat())
  1742. continue;
  1743. // Functions which have a scalar argument cannot be overloaded, no need to
  1744. // check them if we are emitting the type checking code.
  1745. if (Def->protoHasScalar())
  1746. continue;
  1747. uint64_t Mask = 0ULL;
  1748. Type Ty = Def->getReturnType();
  1749. if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
  1750. Def->getProto()[0] == 'F')
  1751. Ty = Def->getParamType(0);
  1752. if (Ty.isPointer())
  1753. Ty = Def->getParamType(1);
  1754. Mask |= 1ULL << Ty.getNeonEnum();
  1755. // Check if the function has a pointer or const pointer argument.
  1756. std::string Proto = Def->getProto();
  1757. int PtrArgNum = -1;
  1758. bool HasConstPtr = false;
  1759. for (unsigned I = 0; I < Def->getNumParams(); ++I) {
  1760. char ArgType = Proto[I + 1];
  1761. if (ArgType == 'c') {
  1762. HasConstPtr = true;
  1763. PtrArgNum = I;
  1764. break;
  1765. }
  1766. if (ArgType == 'p') {
  1767. PtrArgNum = I;
  1768. break;
  1769. }
  1770. }
  1771. // For sret builtins, adjust the pointer argument index.
  1772. if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
  1773. PtrArgNum += 1;
  1774. std::string Name = Def->getName();
  1775. // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
  1776. // and vst1_lane intrinsics. Using a pointer to the vector element
  1777. // type with one of those operations causes codegen to select an aligned
  1778. // load/store instruction. If you want an unaligned operation,
  1779. // the pointer argument needs to have less alignment than element type,
  1780. // so just accept any pointer type.
  1781. if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
  1782. PtrArgNum = -1;
  1783. HasConstPtr = false;
  1784. }
  1785. if (Mask) {
  1786. std::string Name = Def->getMangledName();
  1787. OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
  1788. OverloadInfo &OI = OverloadMap[Name];
  1789. OI.Mask |= Mask;
  1790. OI.PtrArgNum |= PtrArgNum;
  1791. OI.HasConstPtr = HasConstPtr;
  1792. }
  1793. }
  1794. for (auto &I : OverloadMap) {
  1795. OverloadInfo &OI = I.second;
  1796. OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
  1797. OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
  1798. if (OI.PtrArgNum >= 0)
  1799. OS << "; PtrArgNum = " << OI.PtrArgNum;
  1800. if (OI.HasConstPtr)
  1801. OS << "; HasConstPtr = true";
  1802. OS << "; break;\n";
  1803. }
  1804. OS << "#endif\n\n";
  1805. }
  1806. void
  1807. NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
  1808. SmallVectorImpl<Intrinsic *> &Defs) {
  1809. OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
  1810. std::set<std::string> Emitted;
  1811. for (auto *Def : Defs) {
  1812. if (Def->hasBody())
  1813. continue;
  1814. // Functions with 'a' (the splat code) in the type prototype should not get
  1815. // their own builtin as they use the non-splat variant.
  1816. if (Def->hasSplat())
  1817. continue;
  1818. // Functions which do not have an immediate do not need to have range
  1819. // checking code emitted.
  1820. if (!Def->hasImmediate())
  1821. continue;
  1822. if (Emitted.find(Def->getMangledName()) != Emitted.end())
  1823. continue;
  1824. std::string LowerBound, UpperBound;
  1825. Record *R = Def->getRecord();
  1826. if (R->getValueAsBit("isVCVT_N")) {
  1827. // VCVT between floating- and fixed-point values takes an immediate
  1828. // in the range [1, 32) for f32 or [1, 64) for f64.
  1829. LowerBound = "1";
  1830. if (Def->getBaseType().getElementSizeInBits() == 32)
  1831. UpperBound = "31";
  1832. else
  1833. UpperBound = "63";
  1834. } else if (R->getValueAsBit("isScalarShift")) {
  1835. // Right shifts have an 'r' in the name, left shifts do not. Convert
  1836. // instructions have the same bounds and right shifts.
  1837. if (Def->getName().find('r') != std::string::npos ||
  1838. Def->getName().find("cvt") != std::string::npos)
  1839. LowerBound = "1";
  1840. UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
  1841. } else if (R->getValueAsBit("isShift")) {
  1842. // Builtins which are overloaded by type will need to have their upper
  1843. // bound computed at Sema time based on the type constant.
  1844. // Right shifts have an 'r' in the name, left shifts do not.
  1845. if (Def->getName().find('r') != std::string::npos)
  1846. LowerBound = "1";
  1847. UpperBound = "RFT(TV, true)";
  1848. } else if (Def->getClassKind(true) == ClassB) {
  1849. // ClassB intrinsics have a type (and hence lane number) that is only
  1850. // known at runtime.
  1851. if (R->getValueAsBit("isLaneQ"))
  1852. UpperBound = "RFT(TV, false, true)";
  1853. else
  1854. UpperBound = "RFT(TV, false, false)";
  1855. } else {
  1856. // The immediate generally refers to a lane in the preceding argument.
  1857. assert(Def->getImmediateIdx() > 0);
  1858. Type T = Def->getParamType(Def->getImmediateIdx() - 1);
  1859. UpperBound = utostr(T.getNumElements() - 1);
  1860. }
  1861. // Calculate the index of the immediate that should be range checked.
  1862. unsigned Idx = Def->getNumParams();
  1863. if (Def->hasImmediate())
  1864. Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
  1865. OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
  1866. << "i = " << Idx << ";";
  1867. if (LowerBound.size())
  1868. OS << " l = " << LowerBound << ";";
  1869. if (UpperBound.size())
  1870. OS << " u = " << UpperBound << ";";
  1871. OS << " break;\n";
  1872. Emitted.insert(Def->getMangledName());
  1873. }
  1874. OS << "#endif\n\n";
  1875. }
  1876. /// runHeader - Emit a file with sections defining:
  1877. /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
  1878. /// 2. the SemaChecking code for the type overload checking.
  1879. /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
  1880. void NeonEmitter::runHeader(raw_ostream &OS) {
  1881. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  1882. SmallVector<Intrinsic *, 128> Defs;
  1883. for (auto *R : RV)
  1884. createIntrinsic(R, Defs);
  1885. // Generate shared BuiltinsXXX.def
  1886. genBuiltinsDef(OS, Defs);
  1887. // Generate ARM overloaded type checking code for SemaChecking.cpp
  1888. genOverloadTypeCheckCode(OS, Defs);
  1889. // Generate ARM range checking code for shift/lane immediates.
  1890. genIntrinsicRangeCheckCode(OS, Defs);
  1891. }
  1892. /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
  1893. /// is comprised of type definitions and function declarations.
  1894. void NeonEmitter::run(raw_ostream &OS) {
  1895. OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
  1896. "------------------------------"
  1897. "---===\n"
  1898. " *\n"
  1899. " * Permission is hereby granted, free of charge, to any person "
  1900. "obtaining "
  1901. "a copy\n"
  1902. " * of this software and associated documentation files (the "
  1903. "\"Software\"),"
  1904. " to deal\n"
  1905. " * in the Software without restriction, including without limitation "
  1906. "the "
  1907. "rights\n"
  1908. " * to use, copy, modify, merge, publish, distribute, sublicense, "
  1909. "and/or sell\n"
  1910. " * copies of the Software, and to permit persons to whom the Software "
  1911. "is\n"
  1912. " * furnished to do so, subject to the following conditions:\n"
  1913. " *\n"
  1914. " * The above copyright notice and this permission notice shall be "
  1915. "included in\n"
  1916. " * all copies or substantial portions of the Software.\n"
  1917. " *\n"
  1918. " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
  1919. "EXPRESS OR\n"
  1920. " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
  1921. "MERCHANTABILITY,\n"
  1922. " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
  1923. "SHALL THE\n"
  1924. " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
  1925. "OTHER\n"
  1926. " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
  1927. "ARISING FROM,\n"
  1928. " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
  1929. "DEALINGS IN\n"
  1930. " * THE SOFTWARE.\n"
  1931. " *\n"
  1932. " *===-----------------------------------------------------------------"
  1933. "---"
  1934. "---===\n"
  1935. " */\n\n";
  1936. OS << "#ifndef __ARM_NEON_H\n";
  1937. OS << "#define __ARM_NEON_H\n\n";
  1938. OS << "#if !defined(__ARM_NEON)\n";
  1939. OS << "#error \"NEON support not enabled\"\n";
  1940. OS << "#endif\n\n";
  1941. OS << "#include <stdint.h>\n\n";
  1942. // Emit NEON-specific scalar typedefs.
  1943. OS << "typedef float float32_t;\n";
  1944. OS << "typedef __fp16 float16_t;\n";
  1945. OS << "#ifdef __aarch64__\n";
  1946. OS << "typedef double float64_t;\n";
  1947. OS << "#endif\n\n";
  1948. // For now, signedness of polynomial types depends on target
  1949. OS << "#ifdef __aarch64__\n";
  1950. OS << "typedef uint8_t poly8_t;\n";
  1951. OS << "typedef uint16_t poly16_t;\n";
  1952. OS << "typedef uint64_t poly64_t;\n";
  1953. OS << "typedef __uint128_t poly128_t;\n";
  1954. OS << "#else\n";
  1955. OS << "typedef int8_t poly8_t;\n";
  1956. OS << "typedef int16_t poly16_t;\n";
  1957. OS << "#endif\n";
  1958. // Emit Neon vector typedefs.
  1959. std::string TypedefTypes(
  1960. "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
  1961. std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
  1962. // Emit vector typedefs.
  1963. bool InIfdef = false;
  1964. for (auto &TS : TDTypeVec) {
  1965. bool IsA64 = false;
  1966. Type T(TS, 'd');
  1967. if (T.isDouble() || (T.isPoly() && T.isLong()))
  1968. IsA64 = true;
  1969. if (InIfdef && !IsA64) {
  1970. OS << "#endif\n";
  1971. InIfdef = false;
  1972. }
  1973. if (!InIfdef && IsA64) {
  1974. OS << "#ifdef __aarch64__\n";
  1975. InIfdef = true;
  1976. }
  1977. if (T.isPoly())
  1978. OS << "typedef __attribute__((neon_polyvector_type(";
  1979. else
  1980. OS << "typedef __attribute__((neon_vector_type(";
  1981. Type T2 = T;
  1982. T2.makeScalar();
  1983. OS << utostr(T.getNumElements()) << "))) ";
  1984. OS << T2.str();
  1985. OS << " " << T.str() << ";\n";
  1986. }
  1987. if (InIfdef)
  1988. OS << "#endif\n";
  1989. OS << "\n";
  1990. // Emit struct typedefs.
  1991. InIfdef = false;
  1992. for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
  1993. for (auto &TS : TDTypeVec) {
  1994. bool IsA64 = false;
  1995. Type T(TS, 'd');
  1996. if (T.isDouble() || (T.isPoly() && T.isLong()))
  1997. IsA64 = true;
  1998. if (InIfdef && !IsA64) {
  1999. OS << "#endif\n";
  2000. InIfdef = false;
  2001. }
  2002. if (!InIfdef && IsA64) {
  2003. OS << "#ifdef __aarch64__\n";
  2004. InIfdef = true;
  2005. }
  2006. char M = '2' + (NumMembers - 2);
  2007. Type VT(TS, M);
  2008. OS << "typedef struct " << VT.str() << " {\n";
  2009. OS << " " << T.str() << " val";
  2010. OS << "[" << utostr(NumMembers) << "]";
  2011. OS << ";\n} ";
  2012. OS << VT.str() << ";\n";
  2013. OS << "\n";
  2014. }
  2015. }
  2016. if (InIfdef)
  2017. OS << "#endif\n";
  2018. OS << "\n";
  2019. OS << "#define __ai static inline __attribute__((__always_inline__, "
  2020. "__nodebug__))\n\n";
  2021. SmallVector<Intrinsic *, 128> Defs;
  2022. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2023. for (auto *R : RV)
  2024. createIntrinsic(R, Defs);
  2025. for (auto *I : Defs)
  2026. I->indexBody();
  2027. std::stable_sort(
  2028. Defs.begin(), Defs.end(),
  2029. [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });
  2030. // Only emit a def when its requirements have been met.
  2031. // FIXME: This loop could be made faster, but it's fast enough for now.
  2032. bool MadeProgress = true;
  2033. std::string InGuard = "";
  2034. while (!Defs.empty() && MadeProgress) {
  2035. MadeProgress = false;
  2036. for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
  2037. I != Defs.end(); /*No step*/) {
  2038. bool DependenciesSatisfied = true;
  2039. for (auto *II : (*I)->getDependencies()) {
  2040. if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
  2041. DependenciesSatisfied = false;
  2042. }
  2043. if (!DependenciesSatisfied) {
  2044. // Try the next one.
  2045. ++I;
  2046. continue;
  2047. }
  2048. // Emit #endif/#if pair if needed.
  2049. if ((*I)->getGuard() != InGuard) {
  2050. if (!InGuard.empty())
  2051. OS << "#endif\n";
  2052. InGuard = (*I)->getGuard();
  2053. if (!InGuard.empty())
  2054. OS << "#if " << InGuard << "\n";
  2055. }
  2056. // Actually generate the intrinsic code.
  2057. OS << (*I)->generate();
  2058. MadeProgress = true;
  2059. I = Defs.erase(I);
  2060. }
  2061. }
  2062. assert(Defs.empty() && "Some requirements were not satisfied!");
  2063. if (!InGuard.empty())
  2064. OS << "#endif\n";
  2065. OS << "\n";
  2066. OS << "#undef __ai\n\n";
  2067. OS << "#endif /* __ARM_NEON_H */\n";
  2068. }
  2069. namespace clang {
  2070. void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
  2071. NeonEmitter(Records).run(OS);
  2072. }
  2073. void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
  2074. NeonEmitter(Records).runHeader(OS);
  2075. }
  2076. void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
  2077. llvm_unreachable("Neon test generation no longer implemented!");
  2078. }
  2079. } // End namespace clang