TargetTransformInfo.h 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928
  1. //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. /// \file
  10. /// This pass exposes codegen information to IR-level passes. Every
  11. /// transformation that uses codegen information is broken into three parts:
  12. /// 1. The IR-level analysis pass.
  13. /// 2. The IR-level transformation interface which provides the needed
  14. /// information.
  15. /// 3. Codegen-level implementation which uses target-specific hooks.
  16. ///
  17. /// This file defines #2, which is the interface that IR-level transformations
  18. /// use for querying the codegen.
  19. ///
  20. //===----------------------------------------------------------------------===//
  21. #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  22. #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  23. #include "llvm/ADT/Optional.h"
  24. #include "llvm/IR/IntrinsicInst.h"
  25. #include "llvm/IR/Intrinsics.h"
  26. #include "llvm/Pass.h"
  27. #include "llvm/Support/DataTypes.h"
  28. #include <functional>
  29. namespace llvm {
  30. class Function;
  31. class GlobalValue;
  32. class Loop;
  33. class PreservedAnalyses;
  34. class Type;
  35. class User;
  36. class Value;
  37. /// \brief Information about a load/store intrinsic defined by the target.
  38. struct MemIntrinsicInfo {
  39. MemIntrinsicInfo()
  40. : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0),
  41. NumMemRefs(0), PtrVal(nullptr) {}
  42. bool ReadMem;
  43. bool WriteMem;
  44. bool Vol;
  45. // Same Id is set by the target for corresponding load/store intrinsics.
  46. unsigned short MatchingId;
  47. int NumMemRefs;
  48. Value *PtrVal;
  49. };
  50. /// \brief This pass provides access to the codegen interfaces that are needed
  51. /// for IR-level transformations.
  52. class TargetTransformInfo {
  53. public:
  54. /// \brief Construct a TTI object using a type implementing the \c Concept
  55. /// API below.
  56. ///
  57. /// This is used by targets to construct a TTI wrapping their target-specific
  58. /// implementaion that encodes appropriate costs for their target.
  59. template <typename T> TargetTransformInfo(T Impl);
  60. /// \brief Construct a baseline TTI object using a minimal implementation of
  61. /// the \c Concept API below.
  62. ///
  63. /// The TTI implementation will reflect the information in the DataLayout
  64. /// provided if non-null.
  65. explicit TargetTransformInfo(const DataLayout &DL);
  66. // Provide move semantics.
  67. TargetTransformInfo(TargetTransformInfo &&Arg);
  68. TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
  69. // We need to define the destructor out-of-line to define our sub-classes
  70. // out-of-line.
  71. ~TargetTransformInfo();
  72. /// \brief Handle the invalidation of this information.
  73. ///
  74. /// When used as a result of \c TargetIRAnalysis this method will be called
  75. /// when the function this was computed for changes. When it returns false,
  76. /// the information is preserved across those changes.
  77. bool invalidate(Function &, const PreservedAnalyses &) {
  78. // FIXME: We should probably in some way ensure that the subtarget
  79. // information for a function hasn't changed.
  80. return false;
  81. }
  82. /// \name Generic Target Information
  83. /// @{
  84. /// \brief Underlying constants for 'cost' values in this interface.
  85. ///
  86. /// Many APIs in this interface return a cost. This enum defines the
  87. /// fundamental values that should be used to interpret (and produce) those
  88. /// costs. The costs are returned as an unsigned rather than a member of this
  89. /// enumeration because it is expected that the cost of one IR instruction
  90. /// may have a multiplicative factor to it or otherwise won't fit directly
  91. /// into the enum. Moreover, it is common to sum or average costs which works
  92. /// better as simple integral values. Thus this enum only provides constants.
  93. ///
  94. /// Note that these costs should usually reflect the intersection of code-size
  95. /// cost and execution cost. A free instruction is typically one that folds
  96. /// into another instruction. For example, reg-to-reg moves can often be
  97. /// skipped by renaming the registers in the CPU, but they still are encoded
  98. /// and thus wouldn't be considered 'free' here.
  99. enum TargetCostConstants {
  100. TCC_Free = 0, ///< Expected to fold away in lowering.
  101. TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
  102. TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
  103. };
  104. /// \brief Estimate the cost of a specific operation when lowered.
  105. ///
  106. /// Note that this is designed to work on an arbitrary synthetic opcode, and
  107. /// thus work for hypothetical queries before an instruction has even been
  108. /// formed. However, this does *not* work for GEPs, and must not be called
  109. /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
  110. /// analyzing a GEP's cost required more information.
  111. ///
  112. /// Typically only the result type is required, and the operand type can be
  113. /// omitted. However, if the opcode is one of the cast instructions, the
  114. /// operand type is required.
  115. ///
  116. /// The returned cost is defined in terms of \c TargetCostConstants, see its
  117. /// comments for a detailed explanation of the cost values.
  118. unsigned getOperationCost(unsigned Opcode, Type *Ty,
  119. Type *OpTy = nullptr) const;
  120. /// \brief Estimate the cost of a GEP operation when lowered.
  121. ///
  122. /// The contract for this function is the same as \c getOperationCost except
  123. /// that it supports an interface that provides extra information specific to
  124. /// the GEP operation.
  125. unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const;
  126. /// \brief Estimate the cost of a function call when lowered.
  127. ///
  128. /// The contract for this is the same as \c getOperationCost except that it
  129. /// supports an interface that provides extra information specific to call
  130. /// instructions.
  131. ///
  132. /// This is the most basic query for estimating call cost: it only knows the
  133. /// function type and (potentially) the number of arguments at the call site.
  134. /// The latter is only interesting for varargs function types.
  135. unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
  136. /// \brief Estimate the cost of calling a specific function when lowered.
  137. ///
  138. /// This overload adds the ability to reason about the particular function
  139. /// being called in the event it is a library call with special lowering.
  140. unsigned getCallCost(const Function *F, int NumArgs = -1) const;
  141. /// \brief Estimate the cost of calling a specific function when lowered.
  142. ///
  143. /// This overload allows specifying a set of candidate argument values.
  144. unsigned getCallCost(const Function *F,
  145. ArrayRef<const Value *> Arguments) const;
  146. /// \brief Estimate the cost of an intrinsic when lowered.
  147. ///
  148. /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
  149. unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
  150. ArrayRef<Type *> ParamTys) const;
  151. /// \brief Estimate the cost of an intrinsic when lowered.
  152. ///
  153. /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
  154. unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
  155. ArrayRef<const Value *> Arguments) const;
  156. /// \brief Estimate the cost of a given IR user when lowered.
  157. ///
  158. /// This can estimate the cost of either a ConstantExpr or Instruction when
  159. /// lowered. It has two primary advantages over the \c getOperationCost and
  160. /// \c getGEPCost above, and one significant disadvantage: it can only be
  161. /// used when the IR construct has already been formed.
  162. ///
  163. /// The advantages are that it can inspect the SSA use graph to reason more
  164. /// accurately about the cost. For example, all-constant-GEPs can often be
  165. /// folded into a load or other instruction, but if they are used in some
  166. /// other context they may not be folded. This routine can distinguish such
  167. /// cases.
  168. ///
  169. /// The returned cost is defined in terms of \c TargetCostConstants, see its
  170. /// comments for a detailed explanation of the cost values.
  171. unsigned getUserCost(const User *U) const;
  172. /// \brief Return true if branch divergence exists.
  173. ///
  174. /// Branch divergence has a significantly negative impact on GPU performance
  175. /// when threads in the same wavefront take different paths due to conditional
  176. /// branches.
  177. bool hasBranchDivergence() const;
  178. /// \brief Returns whether V is a source of divergence.
  179. ///
  180. /// This function provides the target-dependent information for
  181. /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
  182. /// builds the dependency graph, and then runs the reachability algorithm
  183. /// starting with the sources of divergence.
  184. bool isSourceOfDivergence(const Value *V) const;
  185. /// \brief Test whether calls to a function lower to actual program function
  186. /// calls.
  187. ///
  188. /// The idea is to test whether the program is likely to require a 'call'
  189. /// instruction or equivalent in order to call the given function.
  190. ///
  191. /// FIXME: It's not clear that this is a good or useful query API. Client's
  192. /// should probably move to simpler cost metrics using the above.
  193. /// Alternatively, we could split the cost interface into distinct code-size
  194. /// and execution-speed costs. This would allow modelling the core of this
  195. /// query more accurately as a call is a single small instruction, but
  196. /// incurs significant execution cost.
  197. bool isLoweredToCall(const Function *F) const;
  198. /// Parameters that control the generic loop unrolling transformation.
  199. struct UnrollingPreferences {
  200. /// The cost threshold for the unrolled loop. Should be relative to the
  201. /// getUserCost values returned by this API, and the expectation is that
  202. /// the unrolled loop's instructions when run through that interface should
  203. /// not exceed this cost. However, this is only an estimate. Also, specific
  204. /// loops may be unrolled even with a cost above this threshold if deemed
  205. /// profitable. Set this to UINT_MAX to disable the loop body cost
  206. /// restriction.
  207. unsigned Threshold;
  208. /// If complete unrolling will reduce the cost of the loop below its
  209. /// expected dynamic cost while rolled by this percentage, apply a discount
  210. /// (below) to its unrolled cost.
  211. unsigned PercentDynamicCostSavedThreshold;
  212. /// The discount applied to the unrolled cost when the *dynamic* cost
  213. /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold.
  214. unsigned DynamicCostSavingsDiscount;
  215. /// The cost threshold for the unrolled loop when optimizing for size (set
  216. /// to UINT_MAX to disable).
  217. unsigned OptSizeThreshold;
  218. /// The cost threshold for the unrolled loop, like Threshold, but used
  219. /// for partial/runtime unrolling (set to UINT_MAX to disable).
  220. unsigned PartialThreshold;
  221. /// The cost threshold for the unrolled loop when optimizing for size, like
  222. /// OptSizeThreshold, but used for partial/runtime unrolling (set to
  223. /// UINT_MAX to disable).
  224. unsigned PartialOptSizeThreshold;
  225. /// A forced unrolling factor (the number of concatenated bodies of the
  226. /// original loop in the unrolled loop body). When set to 0, the unrolling
  227. /// transformation will select an unrolling factor based on the current cost
  228. /// threshold and other factors.
  229. unsigned Count;
  230. // Set the maximum unrolling factor. The unrolling factor may be selected
  231. // using the appropriate cost threshold, but may not exceed this number
  232. // (set to UINT_MAX to disable). This does not apply in cases where the
  233. // loop is being fully unrolled.
  234. unsigned MaxCount;
  235. /// Allow partial unrolling (unrolling of loops to expand the size of the
  236. /// loop body, not only to eliminate small constant-trip-count loops).
  237. bool Partial;
  238. /// Allow runtime unrolling (unrolling of loops to expand the size of the
  239. /// loop body even when the number of loop iterations is not known at
  240. /// compile time).
  241. bool Runtime;
  242. /// Allow emitting expensive instructions (such as divisions) when computing
  243. /// the trip count of a loop for runtime unrolling.
  244. bool AllowExpensiveTripCount;
  245. };
  246. /// \brief Get target-customized preferences for the generic loop unrolling
  247. /// transformation. The caller will initialize UP with the current
  248. /// target-independent defaults.
  249. void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
  250. /// @}
  251. /// \name Scalar Target Information
  252. /// @{
  253. /// \brief Flags indicating the kind of support for population count.
  254. ///
  255. /// Compared to the SW implementation, HW support is supposed to
  256. /// significantly boost the performance when the population is dense, and it
  257. /// may or may not degrade performance if the population is sparse. A HW
  258. /// support is considered as "Fast" if it can outperform, or is on a par
  259. /// with, SW implementation when the population is sparse; otherwise, it is
  260. /// considered as "Slow".
  261. enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
  262. /// \brief Return true if the specified immediate is legal add immediate, that
  263. /// is the target has add instructions which can add a register with the
  264. /// immediate without having to materialize the immediate into a register.
  265. bool isLegalAddImmediate(int64_t Imm) const;
  266. /// \brief Return true if the specified immediate is legal icmp immediate,
  267. /// that is the target has icmp instructions which can compare a register
  268. /// against the immediate without having to materialize the immediate into a
  269. /// register.
  270. bool isLegalICmpImmediate(int64_t Imm) const;
  271. /// \brief Return true if the addressing mode represented by AM is legal for
  272. /// this target, for a load/store of the specified type.
  273. /// The type may be VoidTy, in which case only return true if the addressing
  274. /// mode is legal for a load/store of any legal type.
  275. /// TODO: Handle pre/postinc as well.
  276. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  277. bool HasBaseReg, int64_t Scale,
  278. unsigned AddrSpace = 0) const;
  279. /// \brief Return true if the target works with masked instruction
  280. /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
  281. /// AVX-512 architecture will also allow masks for non-consecutive memory
  282. /// accesses.
  283. bool isLegalMaskedStore(Type *DataType, int Consecutive) const;
  284. bool isLegalMaskedLoad(Type *DataType, int Consecutive) const;
  285. /// \brief Return the cost of the scaling factor used in the addressing
  286. /// mode represented by AM for this target, for a load/store
  287. /// of the specified type.
  288. /// If the AM is supported, the return value must be >= 0.
  289. /// If the AM is not supported, it returns a negative value.
  290. /// TODO: Handle pre/postinc as well.
  291. int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  292. bool HasBaseReg, int64_t Scale,
  293. unsigned AddrSpace = 0) const;
  294. /// \brief Return true if it's free to truncate a value of type Ty1 to type
  295. /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
  296. /// by referencing its sub-register AX.
  297. bool isTruncateFree(Type *Ty1, Type *Ty2) const;
  298. /// \brief Return true if it is profitable to hoist instruction in the
  299. /// then/else to before if.
  300. bool isProfitableToHoist(Instruction *I) const;
  301. /// \brief Return true if this type is legal.
  302. bool isTypeLegal(Type *Ty) const;
  303. /// \brief Returns the target's jmp_buf alignment in bytes.
  304. unsigned getJumpBufAlignment() const;
  305. /// \brief Returns the target's jmp_buf size in bytes.
  306. unsigned getJumpBufSize() const;
  307. /// \brief Return true if switches should be turned into lookup tables for the
  308. /// target.
  309. bool shouldBuildLookupTables() const;
  310. /// \brief Don't restrict interleaved unrolling to small loops.
  311. bool enableAggressiveInterleaving(bool LoopHasReductions) const;
  312. /// \brief Return hardware support for population count.
  313. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
  314. /// \brief Return true if the hardware has a fast square-root instruction.
  315. bool haveFastSqrt(Type *Ty) const;
  316. /// \brief Return the expected cost of supporting the floating point operation
  317. /// of the specified type.
  318. unsigned getFPOpCost(Type *Ty) const;
  319. /// \brief Return the expected cost of materializing for the given integer
  320. /// immediate of the specified type.
  321. unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
  322. /// \brief Return the expected cost of materialization for the given integer
  323. /// immediate of the specified type for a given instruction. The cost can be
  324. /// zero if the immediate can be folded into the specified instruction.
  325. unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
  326. Type *Ty) const;
  327. unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  328. Type *Ty) const;
  329. /// @}
  330. /// \name Vector Target Information
  331. /// @{
  332. /// \brief The various kinds of shuffle patterns for vector queries.
  333. enum ShuffleKind {
  334. SK_Broadcast, ///< Broadcast element 0 to all other elements.
  335. SK_Reverse, ///< Reverse the order of the vector.
  336. SK_Alternate, ///< Choose alternate elements from vector.
  337. SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
  338. SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
  339. };
  340. /// \brief Additional information about an operand's possible values.
  341. enum OperandValueKind {
  342. OK_AnyValue, // Operand can have any value.
  343. OK_UniformValue, // Operand is uniform (splat of a value).
  344. OK_UniformConstantValue, // Operand is uniform constant.
  345. OK_NonUniformConstantValue // Operand is a non uniform constant value.
  346. };
  347. /// \brief Additional properties of an operand's values.
  348. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
  349. /// \return The number of scalar or vector registers that the target has.
  350. /// If 'Vectors' is true, it returns the number of vector registers. If it is
  351. /// set to false, it returns the number of scalar registers.
  352. unsigned getNumberOfRegisters(bool Vector) const;
  353. /// \return The width of the largest scalar or vector register type.
  354. unsigned getRegisterBitWidth(bool Vector) const;
  355. /// \return The maximum interleave factor that any transform should try to
  356. /// perform for this target. This number depends on the level of parallelism
  357. /// and the number of execution units in the CPU.
  358. unsigned getMaxInterleaveFactor(unsigned VF) const;
  359. /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
  360. unsigned
  361. getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  362. OperandValueKind Opd1Info = OK_AnyValue,
  363. OperandValueKind Opd2Info = OK_AnyValue,
  364. OperandValueProperties Opd1PropInfo = OP_None,
  365. OperandValueProperties Opd2PropInfo = OP_None) const;
  366. /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
  367. /// The index and subtype parameters are used by the subvector insertion and
  368. /// extraction shuffle kinds.
  369. unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
  370. Type *SubTp = nullptr) const;
  371. /// \return The expected cost of cast instructions, such as bitcast, trunc,
  372. /// zext, etc.
  373. unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
  374. /// \return The expected cost of control-flow related instructions such as
  375. /// Phi, Ret, Br.
  376. unsigned getCFInstrCost(unsigned Opcode) const;
  377. /// \returns The expected cost of compare and select instructions.
  378. unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  379. Type *CondTy = nullptr) const;
  380. /// \return The expected cost of vector Insert and Extract.
  381. /// Use -1 to indicate that there is no information on the index value.
  382. unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
  383. unsigned Index = -1) const;
  384. /// \return The cost of Load and Store instructions.
  385. unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
  386. unsigned AddressSpace) const;
  387. /// \return The cost of masked Load and Store instructions.
  388. unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
  389. unsigned AddressSpace) const;
  390. /// \return The cost of the interleaved memory operation.
  391. /// \p Opcode is the memory operation code
  392. /// \p VecTy is the vector type of the interleaved access.
  393. /// \p Factor is the interleave factor
  394. /// \p Indices is the indices for interleaved load members (as interleaved
  395. /// load allows gaps)
  396. /// \p Alignment is the alignment of the memory operation
  397. /// \p AddressSpace is address space of the pointer.
  398. unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
  399. unsigned Factor,
  400. ArrayRef<unsigned> Indices,
  401. unsigned Alignment,
  402. unsigned AddressSpace) const;
  403. /// \brief Calculate the cost of performing a vector reduction.
  404. ///
  405. /// This is the cost of reducing the vector value of type \p Ty to a scalar
  406. /// value using the operation denoted by \p Opcode. The form of the reduction
  407. /// can either be a pairwise reduction or a reduction that splits the vector
  408. /// at every reduction level.
  409. ///
  410. /// Pairwise:
  411. /// (v0, v1, v2, v3)
  412. /// ((v0+v1), (v2, v3), undef, undef)
  413. /// Split:
  414. /// (v0, v1, v2, v3)
  415. /// ((v0+v2), (v1+v3), undef, undef)
  416. unsigned getReductionCost(unsigned Opcode, Type *Ty,
  417. bool IsPairwiseForm) const;
  418. /// \returns The cost of Intrinsic instructions.
  419. unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
  420. ArrayRef<Type *> Tys) const;
  421. /// \returns The cost of Call instructions.
  422. unsigned getCallInstrCost(Function *F, Type *RetTy,
  423. ArrayRef<Type *> Tys) const;
  424. /// \returns The number of pieces into which the provided type must be
  425. /// split during legalization. Zero is returned when the answer is unknown.
  426. unsigned getNumberOfParts(Type *Tp) const;
  427. /// \returns The cost of the address computation. For most targets this can be
  428. /// merged into the instruction indexing mode. Some targets might want to
  429. /// distinguish between address computation for memory operations on vector
  430. /// types and scalar types. Such targets should override this function.
  431. /// The 'IsComplex' parameter is a hint that the address computation is likely
  432. /// to involve multiple instructions and as such unlikely to be merged into
  433. /// the address indexing mode.
  434. unsigned getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
  435. /// \returns The cost, if any, of keeping values of the given types alive
  436. /// over a callsite.
  437. ///
  438. /// Some types may require the use of register classes that do not have
  439. /// any callee-saved registers, so would require a spill and fill.
  440. unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
  441. /// \returns True if the intrinsic is a supported memory intrinsic. Info
  442. /// will contain additional information - whether the intrinsic may write
  443. /// or read to memory, volatility and the pointer. Info is undefined
  444. /// if false is returned.
  445. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
  446. /// \returns A value which is the result of the given memory intrinsic. New
  447. /// instructions may be created to extract the result from the given intrinsic
  448. /// memory operation. Returns nullptr if the target cannot create a result
  449. /// from the given intrinsic.
  450. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  451. Type *ExpectedType) const;
  452. /// \returns True if the two functions have compatible attributes for inlining
  453. /// purposes.
  454. bool hasCompatibleFunctionAttributes(const Function *Caller,
  455. const Function *Callee) const;
  456. /// @}
  457. private:
  458. /// \brief The abstract base class used to type erase specific TTI
  459. /// implementations.
  460. class Concept;
  461. /// \brief The template model for the base class which wraps a concrete
  462. /// implementation in a type erased interface.
  463. template <typename T> class Model;
  464. std::unique_ptr<Concept> TTIImpl;
  465. };
  466. class TargetTransformInfo::Concept {
  467. public:
  468. virtual ~Concept() = 0;
  469. virtual const DataLayout &getDataLayout() const = 0;
  470. virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
  471. virtual unsigned getGEPCost(const Value *Ptr,
  472. ArrayRef<const Value *> Operands) = 0;
  473. virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0;
  474. virtual unsigned getCallCost(const Function *F, int NumArgs) = 0;
  475. virtual unsigned getCallCost(const Function *F,
  476. ArrayRef<const Value *> Arguments) = 0;
  477. virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
  478. ArrayRef<Type *> ParamTys) = 0;
  479. virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
  480. ArrayRef<const Value *> Arguments) = 0;
  481. virtual unsigned getUserCost(const User *U) = 0;
  482. virtual bool hasBranchDivergence() = 0;
  483. virtual bool isSourceOfDivergence(const Value *V) = 0;
  484. virtual bool isLoweredToCall(const Function *F) = 0;
  485. virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
  486. virtual bool isLegalAddImmediate(int64_t Imm) = 0;
  487. virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
  488. virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
  489. int64_t BaseOffset, bool HasBaseReg,
  490. int64_t Scale,
  491. unsigned AddrSpace) = 0;
  492. virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0;
  493. virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0;
  494. virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  495. int64_t BaseOffset, bool HasBaseReg,
  496. int64_t Scale, unsigned AddrSpace) = 0;
  497. virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
  498. virtual bool isProfitableToHoist(Instruction *I) = 0;
  499. virtual bool isTypeLegal(Type *Ty) = 0;
  500. virtual unsigned getJumpBufAlignment() = 0;
  501. virtual unsigned getJumpBufSize() = 0;
  502. virtual bool shouldBuildLookupTables() = 0;
  503. virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
  504. virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
  505. virtual bool haveFastSqrt(Type *Ty) = 0;
  506. virtual unsigned getFPOpCost(Type *Ty) = 0;
  507. virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
  508. virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
  509. Type *Ty) = 0;
  510. virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
  511. const APInt &Imm, Type *Ty) = 0;
  512. virtual unsigned getNumberOfRegisters(bool Vector) = 0;
  513. virtual unsigned getRegisterBitWidth(bool Vector) = 0;
  514. virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
  515. virtual unsigned
  516. getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
  517. OperandValueKind Opd2Info,
  518. OperandValueProperties Opd1PropInfo,
  519. OperandValueProperties Opd2PropInfo) = 0;
  520. virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
  521. Type *SubTp) = 0;
  522. virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
  523. virtual unsigned getCFInstrCost(unsigned Opcode) = 0;
  524. virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  525. Type *CondTy) = 0;
  526. virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
  527. unsigned Index) = 0;
  528. virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
  529. unsigned Alignment,
  530. unsigned AddressSpace) = 0;
  531. virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  532. unsigned Alignment,
  533. unsigned AddressSpace) = 0;
  534. virtual unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
  535. unsigned Factor,
  536. ArrayRef<unsigned> Indices,
  537. unsigned Alignment,
  538. unsigned AddressSpace) = 0;
  539. virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
  540. bool IsPairwiseForm) = 0;
  541. virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
  542. ArrayRef<Type *> Tys) = 0;
  543. virtual unsigned getCallInstrCost(Function *F, Type *RetTy,
  544. ArrayRef<Type *> Tys) = 0;
  545. virtual unsigned getNumberOfParts(Type *Tp) = 0;
  546. virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
  547. virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
  548. virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
  549. MemIntrinsicInfo &Info) = 0;
  550. virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  551. Type *ExpectedType) = 0;
  552. virtual bool hasCompatibleFunctionAttributes(const Function *Caller,
  553. const Function *Callee) const = 0;
  554. };
  555. template <typename T>
  556. class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
  557. T Impl;
  558. public:
  559. Model(T Impl) : Impl(std::move(Impl)) {}
  560. ~Model() override {}
  561. const DataLayout &getDataLayout() const override {
  562. return Impl.getDataLayout();
  563. }
  564. unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
  565. return Impl.getOperationCost(Opcode, Ty, OpTy);
  566. }
  567. unsigned getGEPCost(const Value *Ptr,
  568. ArrayRef<const Value *> Operands) override {
  569. return Impl.getGEPCost(Ptr, Operands);
  570. }
  571. unsigned getCallCost(FunctionType *FTy, int NumArgs) override {
  572. return Impl.getCallCost(FTy, NumArgs);
  573. }
  574. unsigned getCallCost(const Function *F, int NumArgs) override {
  575. return Impl.getCallCost(F, NumArgs);
  576. }
  577. unsigned getCallCost(const Function *F,
  578. ArrayRef<const Value *> Arguments) override {
  579. return Impl.getCallCost(F, Arguments);
  580. }
  581. unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
  582. ArrayRef<Type *> ParamTys) override {
  583. return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
  584. }
  585. unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
  586. ArrayRef<const Value *> Arguments) override {
  587. return Impl.getIntrinsicCost(IID, RetTy, Arguments);
  588. }
  589. unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); }
  590. bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
  591. bool isSourceOfDivergence(const Value *V) override {
  592. return Impl.isSourceOfDivergence(V);
  593. }
  594. bool isLoweredToCall(const Function *F) override {
  595. return Impl.isLoweredToCall(F);
  596. }
  597. void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
  598. return Impl.getUnrollingPreferences(L, UP);
  599. }
  600. bool isLegalAddImmediate(int64_t Imm) override {
  601. return Impl.isLegalAddImmediate(Imm);
  602. }
  603. bool isLegalICmpImmediate(int64_t Imm) override {
  604. return Impl.isLegalICmpImmediate(Imm);
  605. }
  606. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  607. bool HasBaseReg, int64_t Scale,
  608. unsigned AddrSpace) override {
  609. return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
  610. Scale, AddrSpace);
  611. }
  612. bool isLegalMaskedStore(Type *DataType, int Consecutive) override {
  613. return Impl.isLegalMaskedStore(DataType, Consecutive);
  614. }
  615. bool isLegalMaskedLoad(Type *DataType, int Consecutive) override {
  616. return Impl.isLegalMaskedLoad(DataType, Consecutive);
  617. }
  618. int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  619. bool HasBaseReg, int64_t Scale,
  620. unsigned AddrSpace) override {
  621. return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
  622. Scale, AddrSpace);
  623. }
  624. bool isTruncateFree(Type *Ty1, Type *Ty2) override {
  625. return Impl.isTruncateFree(Ty1, Ty2);
  626. }
  627. bool isProfitableToHoist(Instruction *I) override {
  628. return Impl.isProfitableToHoist(I);
  629. }
  630. bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
  631. unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
  632. unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
  633. bool shouldBuildLookupTables() override {
  634. return Impl.shouldBuildLookupTables();
  635. }
  636. bool enableAggressiveInterleaving(bool LoopHasReductions) override {
  637. return Impl.enableAggressiveInterleaving(LoopHasReductions);
  638. }
  639. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
  640. return Impl.getPopcntSupport(IntTyWidthInBit);
  641. }
  642. bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
  643. unsigned getFPOpCost(Type *Ty) override {
  644. return Impl.getFPOpCost(Ty);
  645. }
  646. unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
  647. return Impl.getIntImmCost(Imm, Ty);
  648. }
  649. unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
  650. Type *Ty) override {
  651. return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
  652. }
  653. unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  654. Type *Ty) override {
  655. return Impl.getIntImmCost(IID, Idx, Imm, Ty);
  656. }
  657. unsigned getNumberOfRegisters(bool Vector) override {
  658. return Impl.getNumberOfRegisters(Vector);
  659. }
  660. unsigned getRegisterBitWidth(bool Vector) override {
  661. return Impl.getRegisterBitWidth(Vector);
  662. }
  663. unsigned getMaxInterleaveFactor(unsigned VF) override {
  664. return Impl.getMaxInterleaveFactor(VF);
  665. }
  666. unsigned
  667. getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
  668. OperandValueKind Opd2Info,
  669. OperandValueProperties Opd1PropInfo,
  670. OperandValueProperties Opd2PropInfo) override {
  671. return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
  672. Opd1PropInfo, Opd2PropInfo);
  673. }
  674. unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
  675. Type *SubTp) override {
  676. return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
  677. }
  678. unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
  679. return Impl.getCastInstrCost(Opcode, Dst, Src);
  680. }
  681. unsigned getCFInstrCost(unsigned Opcode) override {
  682. return Impl.getCFInstrCost(Opcode);
  683. }
  684. unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  685. Type *CondTy) override {
  686. return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
  687. }
  688. unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
  689. unsigned Index) override {
  690. return Impl.getVectorInstrCost(Opcode, Val, Index);
  691. }
  692. unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
  693. unsigned AddressSpace) override {
  694. return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
  695. }
  696. unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
  697. unsigned AddressSpace) override {
  698. return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
  699. }
  700. unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
  701. unsigned Factor,
  702. ArrayRef<unsigned> Indices,
  703. unsigned Alignment,
  704. unsigned AddressSpace) override {
  705. return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
  706. Alignment, AddressSpace);
  707. }
  708. unsigned getReductionCost(unsigned Opcode, Type *Ty,
  709. bool IsPairwiseForm) override {
  710. return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
  711. }
  712. unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
  713. ArrayRef<Type *> Tys) override {
  714. return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
  715. }
  716. unsigned getCallInstrCost(Function *F, Type *RetTy,
  717. ArrayRef<Type *> Tys) override {
  718. return Impl.getCallInstrCost(F, RetTy, Tys);
  719. }
  720. unsigned getNumberOfParts(Type *Tp) override {
  721. return Impl.getNumberOfParts(Tp);
  722. }
  723. unsigned getAddressComputationCost(Type *Ty, bool IsComplex) override {
  724. return Impl.getAddressComputationCost(Ty, IsComplex);
  725. }
  726. unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
  727. return Impl.getCostOfKeepingLiveOverCall(Tys);
  728. }
  729. bool getTgtMemIntrinsic(IntrinsicInst *Inst,
  730. MemIntrinsicInfo &Info) override {
  731. return Impl.getTgtMemIntrinsic(Inst, Info);
  732. }
  733. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  734. Type *ExpectedType) override {
  735. return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
  736. }
  737. bool hasCompatibleFunctionAttributes(const Function *Caller,
  738. const Function *Callee) const override {
  739. return Impl.hasCompatibleFunctionAttributes(Caller, Callee);
  740. }
  741. };
  742. template <typename T>
  743. TargetTransformInfo::TargetTransformInfo(T Impl)
  744. : TTIImpl(new Model<T>(Impl)) {}
  745. /// \brief Analysis pass providing the \c TargetTransformInfo.
  746. ///
  747. /// The core idea of the TargetIRAnalysis is to expose an interface through
  748. /// which LLVM targets can analyze and provide information about the middle
  749. /// end's target-independent IR. This supports use cases such as target-aware
  750. /// cost modeling of IR constructs.
  751. ///
  752. /// This is a function analysis because much of the cost modeling for targets
  753. /// is done in a subtarget specific way and LLVM supports compiling different
  754. /// functions targeting different subtargets in order to support runtime
  755. /// dispatch according to the observed subtarget.
  756. class TargetIRAnalysis {
  757. public:
  758. typedef TargetTransformInfo Result;
  759. /// \brief Opaque, unique identifier for this analysis pass.
  760. static void *ID() { return (void *)&PassID; }
  761. /// \brief Provide access to a name for this pass for debugging purposes.
  762. static StringRef name() { return "TargetIRAnalysis"; }
  763. /// \brief Default construct a target IR analysis.
  764. ///
  765. /// This will use the module's datalayout to construct a baseline
  766. /// conservative TTI result.
  767. TargetIRAnalysis();
  768. /// \brief Construct an IR analysis pass around a target-provide callback.
  769. ///
  770. /// The callback will be called with a particular function for which the TTI
  771. /// is needed and must return a TTI object for that function.
  772. TargetIRAnalysis(std::function<Result(Function &)> TTICallback);
  773. // Value semantics. We spell out the constructors for MSVC.
  774. TargetIRAnalysis(const TargetIRAnalysis &Arg)
  775. : TTICallback(Arg.TTICallback) {}
  776. TargetIRAnalysis(TargetIRAnalysis &&Arg)
  777. : TTICallback(std::move(Arg.TTICallback)) {}
  778. TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
  779. TTICallback = RHS.TTICallback;
  780. return *this;
  781. }
  782. TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
  783. TTICallback = std::move(RHS.TTICallback);
  784. return *this;
  785. }
  786. Result run(Function &F);
  787. private:
  788. static char PassID;
  789. /// \brief The callback used to produce a result.
  790. ///
  791. /// We use a completely opaque callback so that targets can provide whatever
  792. /// mechanism they desire for constructing the TTI for a given function.
  793. ///
  794. /// FIXME: Should we really use std::function? It's relatively inefficient.
  795. /// It might be possible to arrange for even stateful callbacks to outlive
  796. /// the analysis and thus use a function_ref which would be lighter weight.
  797. /// This may also be less error prone as the callback is likely to reference
  798. /// the external TargetMachine, and that reference needs to never dangle.
  799. std::function<Result(Function &)> TTICallback;
  800. /// \brief Helper function used as the callback in the default constructor.
  801. static Result getDefaultTTI(Function &F);
  802. };
  803. /// \brief Wrapper pass for TargetTransformInfo.
  804. ///
  805. /// This pass can be constructed from a TTI object which it stores internally
  806. /// and is queried by passes.
  807. class TargetTransformInfoWrapperPass : public ImmutablePass {
  808. TargetIRAnalysis TIRA;
  809. Optional<TargetTransformInfo> TTI;
  810. virtual void anchor();
  811. public:
  812. static char ID;
  813. /// \brief We must provide a default constructor for the pass but it should
  814. /// never be used.
  815. ///
  816. /// Use the constructor below or call one of the creation routines.
  817. TargetTransformInfoWrapperPass();
  818. explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
  819. TargetTransformInfo &getTTI(Function &F);
  820. };
  821. /// \brief Create an analysis pass wrapper around a TTI object.
  822. ///
  823. /// This analysis pass just holds the TTI instance and makes it available to
  824. /// clients.
  825. ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
  826. } // End llvm namespace
  827. #endif