DxilCounters.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilCounters.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. ///////////////////////////////////////////////////////////////////////////////
  9. #include "dxc/DXIL/DxilCounters.h"
  10. #include "dxc/Support/Global.h"
  11. #include "llvm/IR/LLVMContext.h"
  12. #include "llvm/IR/Operator.h"
  13. #include "llvm/IR/Module.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "dxc/DXIL/DxilOperations.h"
  16. #include "dxc/DXIL/DxilInstructions.h"
  17. using namespace llvm;
  18. using namespace hlsl;
  19. using namespace hlsl::DXIL;
  20. namespace hlsl {
  21. namespace {
  22. struct PointerInfo {
  23. enum class MemType : unsigned {
  24. Unknown = 0,
  25. Global_Static,
  26. Global_TGSM,
  27. Alloca
  28. };
  29. MemType memType : 2;
  30. bool isArray : 1;
  31. PointerInfo() :
  32. memType(MemType::Unknown),
  33. isArray(false)
  34. {}
  35. };
  36. typedef DenseMap<Value*, PointerInfo> PointerInfoMap;
  37. PointerInfo GetPointerInfo(Value* V, PointerInfoMap &ptrInfoMap) {
  38. auto it = ptrInfoMap.find(V);
  39. if (it != ptrInfoMap.end())
  40. return it->second;
  41. Type *Ty = V->getType()->getPointerElementType();
  42. ptrInfoMap[V].isArray = Ty->isArrayTy();
  43. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
  44. if (GV->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace)
  45. ptrInfoMap[V].memType = PointerInfo::MemType::Global_TGSM;
  46. else if (!GV->isConstant() &&
  47. GV->getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
  48. GV->getType()->getPointerAddressSpace() == DXIL::kDefaultAddrSpace)
  49. ptrInfoMap[V].memType = PointerInfo::MemType::Global_Static;
  50. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
  51. ptrInfoMap[V].memType = PointerInfo::MemType::Alloca;
  52. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
  53. ptrInfoMap[V] = GetPointerInfo(GEP->getPointerOperand(), ptrInfoMap);
  54. } else if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
  55. ptrInfoMap[V] = GetPointerInfo(BC->getOperand(0), ptrInfoMap);
  56. } else if (AddrSpaceCastInst *AC = dyn_cast<AddrSpaceCastInst>(V)) {
  57. ptrInfoMap[V] = GetPointerInfo(AC->getOperand(0), ptrInfoMap);
  58. } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
  59. if (CE->getOpcode() == LLVMAddrSpaceCast)
  60. ptrInfoMap[V] = GetPointerInfo(AC->getOperand(0), ptrInfoMap);
  61. //} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
  62. // for (auto it = PN->value_op_begin(), e = PN->value_op_end(); it != e; ++it) {
  63. // PI = GetPointerInfo(*it, ptrInfoMap);
  64. // if (PI.memType != PointerInfo::MemType::Unknown)
  65. // break;
  66. // }
  67. }
  68. return ptrInfoMap[V];
  69. };
  70. struct ValueInfo {
  71. bool isCbuffer : 1;
  72. bool isConstant : 1;
  73. ValueInfo() :
  74. isCbuffer(false),
  75. isConstant(false)
  76. {}
  77. ValueInfo Combine(const ValueInfo &other) const {
  78. ValueInfo R;
  79. R.isCbuffer = isCbuffer && other.isCbuffer;
  80. R.isConstant = isConstant && other.isConstant;
  81. return R;
  82. }
  83. };
  84. /*<py>
  85. def tab_lines(text):
  86. return [' ' + line for line in text.splitlines()]
  87. def gen_count_dxil_op(counter):
  88. return (['bool CountDxilOp_%s(unsigned op) {' % counter] +
  89. tab_lines(
  90. hctdb_instrhelp.get_instrs_pred("op", hctdb_instrhelp.counter_pred(counter, True))) +
  91. ['}'])
  92. def gen_count_llvm_op(counter):
  93. return (['bool CountLlvmOp_%s(unsigned op) {' % counter] +
  94. tab_lines(
  95. hctdb_instrhelp.get_instrs_pred("op", hctdb_instrhelp.counter_pred(counter, False), 'llvm_id')) +
  96. ['}'])
  97. def gen_counter_functions():
  98. lines = ['// Counter functions for Dxil ops:']
  99. for counter in hctdb_instrhelp.get_dxil_op_counters():
  100. lines += gen_count_dxil_op(counter)
  101. lines.append('// Counter functions for llvm ops:')
  102. for counter in hctdb_instrhelp.get_llvm_op_counters():
  103. lines += gen_count_llvm_op(counter)
  104. return lines
  105. </py>*/
  106. // <py::lines('OPCODE-COUNTERS')>gen_counter_functions()</py>
  107. // OPCODE-COUNTERS:BEGIN
  108. // Counter functions for Dxil ops:
  109. bool CountDxilOp_atomic(unsigned op) {
  110. // Instructions: BufferUpdateCounter=70, AtomicBinOp=78,
  111. // AtomicCompareExchange=79
  112. return op == 70 || (78 <= op && op <= 79);
  113. }
  114. bool CountDxilOp_barrier(unsigned op) {
  115. // Instructions: Barrier=80
  116. return op == 80;
  117. }
  118. bool CountDxilOp_floats(unsigned op) {
  119. // Instructions: FAbs=6, Saturate=7, IsNaN=8, IsInf=9, IsFinite=10,
  120. // IsNormal=11, Cos=12, Sin=13, Tan=14, Acos=15, Asin=16, Atan=17, Hcos=18,
  121. // Hsin=19, Htan=20, Exp=21, Frc=22, Log=23, Sqrt=24, Rsqrt=25, Round_ne=26,
  122. // Round_ni=27, Round_pi=28, Round_z=29, FMax=35, FMin=36, Fma=47, Dot2=54,
  123. // Dot3=55, Dot4=56, Dot2AddHalf=162
  124. return (6 <= op && op <= 29) || (35 <= op && op <= 36) || op == 47 || (54 <= op && op <= 56) || op == 162;
  125. }
  126. bool CountDxilOp_gs_cut(unsigned op) {
  127. // Instructions: CutStream=98, EmitThenCutStream=99
  128. return (98 <= op && op <= 99);
  129. }
  130. bool CountDxilOp_gs_emit(unsigned op) {
  131. // Instructions: EmitStream=97, EmitThenCutStream=99
  132. return op == 97 || op == 99;
  133. }
  134. bool CountDxilOp_ints(unsigned op) {
  135. // Instructions: IMax=37, IMin=38, IMul=41, IMad=48, Ibfe=51,
  136. // Dot4AddI8Packed=163
  137. return (37 <= op && op <= 38) || op == 41 || op == 48 || op == 51 || op == 163;
  138. }
  139. bool CountDxilOp_sig_ld(unsigned op) {
  140. // Instructions: LoadInput=4, LoadOutputControlPoint=103, LoadPatchConstant=104
  141. return op == 4 || (103 <= op && op <= 104);
  142. }
  143. bool CountDxilOp_sig_st(unsigned op) {
  144. // Instructions: StoreOutput=5, StorePatchConstant=106, StoreVertexOutput=171,
  145. // StorePrimitiveOutput=172
  146. return op == 5 || op == 106 || (171 <= op && op <= 172);
  147. }
  148. bool CountDxilOp_tex_bias(unsigned op) {
  149. // Instructions: SampleBias=61
  150. return op == 61;
  151. }
  152. bool CountDxilOp_tex_cmp(unsigned op) {
  153. // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74,
  154. // TextureGatherCmpImm=223
  155. return (64 <= op && op <= 65) || op == 74 || op == 223;
  156. }
  157. bool CountDxilOp_tex_grad(unsigned op) {
  158. // Instructions: SampleGrad=63
  159. return op == 63;
  160. }
  161. bool CountDxilOp_tex_load(unsigned op) {
  162. // Instructions: TextureLoad=66, BufferLoad=68, RawBufferLoad=139
  163. return op == 66 || op == 68 || op == 139;
  164. }
  165. bool CountDxilOp_tex_norm(unsigned op) {
  166. // Instructions: Sample=60, SampleLevel=62, TextureGather=73,
  167. // TextureGatherImm=222
  168. return op == 60 || op == 62 || op == 73 || op == 222;
  169. }
  170. bool CountDxilOp_tex_store(unsigned op) {
  171. // Instructions: TextureStore=67, BufferStore=69, RawBufferStore=140,
  172. // WriteSamplerFeedback=174, WriteSamplerFeedbackBias=175,
  173. // WriteSamplerFeedbackLevel=176, WriteSamplerFeedbackGrad=177
  174. return op == 67 || op == 69 || op == 140 || (174 <= op && op <= 177);
  175. }
  176. bool CountDxilOp_uints(unsigned op) {
  177. // Instructions: Bfrev=30, Countbits=31, FirstbitLo=32, FirstbitHi=33,
  178. // FirstbitSHi=34, UMax=39, UMin=40, UMul=42, UDiv=43, UAddc=44, USubb=45,
  179. // UMad=49, Msad=50, Ubfe=52, Bfi=53, Dot4AddU8Packed=164
  180. return (30 <= op && op <= 34) || (39 <= op && op <= 40) || (42 <= op && op <= 45) || (49 <= op && op <= 50) || (52 <= op && op <= 53) || op == 164;
  181. }
  182. // Counter functions for llvm ops:
  183. bool CountLlvmOp_atomic(unsigned op) {
  184. // Instructions: AtomicCmpXchg=31, AtomicRMW=32
  185. return (31 <= op && op <= 32);
  186. }
  187. bool CountLlvmOp_fence(unsigned op) {
  188. // Instructions: Fence=30
  189. return op == 30;
  190. }
  191. bool CountLlvmOp_floats(unsigned op) {
  192. // Instructions: FAdd=9, FSub=11, FMul=13, FDiv=16, FRem=19, FPToUI=36,
  193. // FPToSI=37, UIToFP=38, SIToFP=39, FPTrunc=40, FPExt=41, FCmp=47
  194. return op == 9 || op == 11 || op == 13 || op == 16 || op == 19 || (36 <= op && op <= 41) || op == 47;
  195. }
  196. bool CountLlvmOp_ints(unsigned op) {
  197. // Instructions: Add=8, Sub=10, Mul=12, SDiv=15, SRem=18, AShr=22, Trunc=33,
  198. // SExt=35, ICmp=46
  199. return op == 8 || op == 10 || op == 12 || op == 15 || op == 18 || op == 22 || op == 33 || op == 35 || op == 46;
  200. }
  201. bool CountLlvmOp_uints(unsigned op) {
  202. // Instructions: UDiv=14, URem=17, Shl=20, LShr=21, And=23, Or=24, Xor=25,
  203. // ZExt=34
  204. return op == 14 || op == 17 || (20 <= op && op <= 21) || (23 <= op && op <= 25) || op == 34;
  205. }
  206. // OPCODE-COUNTERS:END
  207. void CountDxilOp(unsigned op, DxilCounters &counters) {
  208. // <py::lines('COUNT-DXIL-OPS')>['if (CountDxilOp_%s(op)) ++counters.%s;' % (c,c) for c in hctdb_instrhelp.get_dxil_op_counters()]</py>
  209. // COUNT-DXIL-OPS:BEGIN
  210. if (CountDxilOp_atomic(op)) ++counters.atomic;
  211. if (CountDxilOp_barrier(op)) ++counters.barrier;
  212. if (CountDxilOp_floats(op)) ++counters.floats;
  213. if (CountDxilOp_gs_cut(op)) ++counters.gs_cut;
  214. if (CountDxilOp_gs_emit(op)) ++counters.gs_emit;
  215. if (CountDxilOp_ints(op)) ++counters.ints;
  216. if (CountDxilOp_sig_ld(op)) ++counters.sig_ld;
  217. if (CountDxilOp_sig_st(op)) ++counters.sig_st;
  218. if (CountDxilOp_tex_bias(op)) ++counters.tex_bias;
  219. if (CountDxilOp_tex_cmp(op)) ++counters.tex_cmp;
  220. if (CountDxilOp_tex_grad(op)) ++counters.tex_grad;
  221. if (CountDxilOp_tex_load(op)) ++counters.tex_load;
  222. if (CountDxilOp_tex_norm(op)) ++counters.tex_norm;
  223. if (CountDxilOp_tex_store(op)) ++counters.tex_store;
  224. if (CountDxilOp_uints(op)) ++counters.uints;
  225. // COUNT-DXIL-OPS:END
  226. }
  227. void CountLlvmOp(unsigned op, DxilCounters &counters) {
  228. // <py::lines('COUNT-LLVM-OPS')>['if (CountLlvmOp_%s(op)) ++counters.%s;' % (c,c) for c in hctdb_instrhelp.get_llvm_op_counters()]</py>
  229. // COUNT-LLVM-OPS:BEGIN
  230. if (CountLlvmOp_atomic(op)) ++counters.atomic;
  231. if (CountLlvmOp_fence(op)) ++counters.fence;
  232. if (CountLlvmOp_floats(op)) ++counters.floats;
  233. if (CountLlvmOp_ints(op)) ++counters.ints;
  234. if (CountLlvmOp_uints(op)) ++counters.uints;
  235. // COUNT-LLVM-OPS:END
  236. }
  237. } // namespace
  238. void CountInstructions(llvm::Module &M, DxilCounters& counters) {
  239. const DataLayout &DL = M.getDataLayout();
  240. PointerInfoMap ptrInfoMap;
  241. for (auto &GV : M.globals()) {
  242. PointerInfo PI = GetPointerInfo(&GV, ptrInfoMap);
  243. if (PI.isArray) {
  244. // Count number of bytes used in global arrays.
  245. Type *pTy = GV.getType()->getPointerElementType();
  246. uint32_t size = DL.getTypeAllocSize(pTy);
  247. switch (PI.memType) {
  248. case PointerInfo::MemType::Global_Static: counters.array_static_bytes += size; break;
  249. case PointerInfo::MemType::Global_TGSM: counters.array_tgsm_bytes += size; break;
  250. default: break;
  251. }
  252. }
  253. }
  254. for (auto &F : M.functions()) {
  255. if (F.isDeclaration())
  256. continue;
  257. for (auto itBlock = F.begin(), endBlock = F.end(); itBlock != endBlock; ++itBlock) {
  258. for (auto itInst = itBlock->begin(), endInst = itBlock->end(); itInst != endInst; ++itInst) {
  259. Instruction* I = itInst;
  260. ++counters.insts;
  261. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
  262. Type *pTy = AI->getType()->getPointerElementType();
  263. // Count number of bytes used in alloca arrays.
  264. if (pTy->isArrayTy()) {
  265. counters.array_local_bytes += DL.getTypeAllocSize(pTy);
  266. }
  267. } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
  268. if (hlsl::OP::IsDxilOpFuncCallInst(CI)) {
  269. unsigned opcode = (unsigned)llvm::cast<llvm::ConstantInt>(I->getOperand(0))->getZExtValue();
  270. CountDxilOp(opcode, counters);
  271. }
  272. } else if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
  273. LoadInst *LI = dyn_cast<LoadInst>(I);
  274. StoreInst *SI = dyn_cast<StoreInst>(I);
  275. Value *PtrOp = LI ? LI->getPointerOperand() : SI->getPointerOperand();
  276. PointerInfo PI = GetPointerInfo(PtrOp, ptrInfoMap);
  277. // Count load/store on array elements.
  278. if (PI.isArray) {
  279. switch (PI.memType) {
  280. case PointerInfo::MemType::Alloca: ++counters.array_local_ldst; break;
  281. case PointerInfo::MemType::Global_Static: ++counters.array_static_ldst; break;
  282. case PointerInfo::MemType::Global_TGSM: ++counters.array_tgsm_ldst; break;
  283. default: break;
  284. }
  285. }
  286. } else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
  287. if (BI->getNumSuccessors() > 1) {
  288. // TODO: More sophisticated analysis to separate dynamic from static branching?
  289. ++counters.branches;
  290. }
  291. } else {
  292. // Count llvm ops:
  293. CountLlvmOp(I->getOpcode(), counters);
  294. }
  295. }
  296. }
  297. }
  298. }
  299. struct CounterOffsetByName {
  300. StringRef name;
  301. uint32_t DxilCounters::*ptr;
  302. };
  303. // Must be sorted case-sensitive:
  304. static const CounterOffsetByName CountersByName[] = {
  305. // <py::lines('COUNTER-MEMBER-PTRS')>['{ "%s", &DxilCounters::%s },' % (c,c) for c in hctdb_instrhelp.get_counters()]</py>
  306. // COUNTER-MEMBER-PTRS:BEGIN
  307. { "array_local_bytes", &DxilCounters::array_local_bytes },
  308. { "array_local_ldst", &DxilCounters::array_local_ldst },
  309. { "array_static_bytes", &DxilCounters::array_static_bytes },
  310. { "array_static_ldst", &DxilCounters::array_static_ldst },
  311. { "array_tgsm_bytes", &DxilCounters::array_tgsm_bytes },
  312. { "array_tgsm_ldst", &DxilCounters::array_tgsm_ldst },
  313. { "atomic", &DxilCounters::atomic },
  314. { "barrier", &DxilCounters::barrier },
  315. { "branches", &DxilCounters::branches },
  316. { "fence", &DxilCounters::fence },
  317. { "floats", &DxilCounters::floats },
  318. { "gs_cut", &DxilCounters::gs_cut },
  319. { "gs_emit", &DxilCounters::gs_emit },
  320. { "insts", &DxilCounters::insts },
  321. { "ints", &DxilCounters::ints },
  322. { "sig_ld", &DxilCounters::sig_ld },
  323. { "sig_st", &DxilCounters::sig_st },
  324. { "tex_bias", &DxilCounters::tex_bias },
  325. { "tex_cmp", &DxilCounters::tex_cmp },
  326. { "tex_grad", &DxilCounters::tex_grad },
  327. { "tex_load", &DxilCounters::tex_load },
  328. { "tex_norm", &DxilCounters::tex_norm },
  329. { "tex_store", &DxilCounters::tex_store },
  330. { "uints", &DxilCounters::uints },
  331. // COUNTER-MEMBER-PTRS:END
  332. };
  333. static int CounterOffsetByNameLess(const CounterOffsetByName &a, const CounterOffsetByName &b) {
  334. return a.name < b.name;
  335. }
  336. uint32_t *LookupByName(llvm::StringRef name, DxilCounters& counters) {
  337. CounterOffsetByName key = {name, nullptr};
  338. static const CounterOffsetByName *CounterEnd = CountersByName +_countof(CountersByName);
  339. auto result = std::lower_bound(CountersByName, CounterEnd, key, CounterOffsetByNameLess);
  340. if (result != CounterEnd && result->name == key.name)
  341. return &(counters.*(result->ptr));
  342. return nullptr;
  343. }
  344. } // namespace hlsl