hctdb.py 220 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173
  1. # Copyright (C) Microsoft Corporation. All rights reserved.
  2. # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
  3. ###############################################################################
  4. # DXIL information. #
  5. ###############################################################################
  6. import os
  7. all_stages = (
  8. 'vertex',
  9. 'pixel',
  10. 'geometry',
  11. 'compute',
  12. 'hull',
  13. 'domain',
  14. 'library',
  15. 'raygeneration',
  16. 'intersection',
  17. 'anyhit',
  18. 'closesthit',
  19. 'miss',
  20. 'callable',
  21. 'mesh',
  22. 'amplification',
  23. )
  24. # These counters aren't collected directly from instructions,
  25. # so they need to be added manually so they can be accessed
  26. # with custom code in DxilCounters.cpp.
  27. extra_counters = [
  28. 'insts',
  29. 'branches',
  30. 'array_tgsm_bytes',
  31. 'array_static_bytes',
  32. 'array_local_bytes',
  33. 'array_tgsm_ldst',
  34. 'array_static_ldst',
  35. 'array_local_ldst',
  36. ]
  37. class db_dxil_enum_value(object):
  38. "A representation for a value in an enumeration type"
  39. def __init__(self, name, value, doc):
  40. self.name = name # Name (identifier)
  41. self.value = value # Numeric value
  42. self.doc = doc # Documentation string
  43. self.category = None
  44. class db_dxil_enum(object):
  45. "A representation for an enumeration type"
  46. def __init__(self, name, doc, valNameDocTuples=()):
  47. self.name = name
  48. self.doc = doc
  49. self.values = [db_dxil_enum_value(n,v,d) for v,n,d in valNameDocTuples] # Note transmutation
  50. self.is_internal = False # whether this is never serialized
  51. def value_names(self):
  52. return [i.name for i in self.values]
  53. class db_dxil_inst(object):
  54. "A representation for a DXIL instruction"
  55. def __init__(self, name, **kwargs):
  56. self.name = name # short, unique name
  57. self.llvm_id = 0 # ID of LLVM instruction
  58. self.llvm_name = "" # name of LLVM instruction type
  59. self.is_bb_terminator = False # whether this is a basic block terminator
  60. self.is_binary = False # whether this is an arithmetic binary/logical operator
  61. self.is_memory = False # whether this is a memory manipulator operator
  62. self.is_cast = False # whether this is a casting operator
  63. self.is_dxil_op = False # whether this is a call into a built-in DXIL function
  64. self.dxil_op = "" # name of DXIL operation
  65. self.dxil_opid = 0 # ID of DXIL operation
  66. self.dxil_class = "" # name of the opcode class
  67. self.category = "" # classification for this instruction
  68. self.doc = "" # the documentation description of this instruction
  69. self.remarks = "" # long-form remarks on this instruction
  70. self.ops = [] # the operands that this instruction takes
  71. self.is_allowed = True # whether this instruction is allowed in a DXIL program
  72. self.oload_types = "" # overload types if applicable
  73. self.fn_attr = "" # attribute shorthands: rn=does not access memory,ro=only reads from memory,
  74. self.is_deriv = False # whether this is some kind of derivative
  75. self.is_gradient = False # whether this requires a gradient calculation
  76. self.is_feedback = False # whether this is a sampler feedback op
  77. self.is_wave = False # whether this requires in-wave, cross-lane functionality
  78. self.requires_uniform_inputs = False # whether this operation requires that all of its inputs are uniform across the wave
  79. self.shader_stages = () # shader stages to which this applies, empty for all.
  80. self.shader_model = 6,0 # minimum shader model required
  81. self.inst_helper_prefix = None
  82. self.fully_qualified_name_prefix = "hlsl::OP::OpCode"
  83. for k,v in list(kwargs.items()):
  84. setattr(self, k, v)
  85. self.is_dxil_op = self.dxil_op != "" # whether this is a DXIL operation
  86. self.is_reserved = self.dxil_class == "Reserved"
  87. self.shader_model_translated = () # minimum shader model required with translation by linker
  88. self.props = {} # extra properties
  89. def __str__(self):
  90. return self.name
  91. def fully_qualified_name(self):
  92. return "{}::{}".format(self.fully_qualified_name_prefix, self.name)
  93. class db_dxil_metadata(object):
  94. "A representation for a metadata record"
  95. def __init__(self, name, doc, **kwargs):
  96. self.name = name # named metadata, possibly empty
  97. self.doc = doc # the documentation description of this record
  98. for k,v in list(kwargs.items()):
  99. setattr(self, k, v)
  100. class db_dxil_param(object):
  101. "The parameter description for a DXIL instruction"
  102. def __init__(self, pos, llvm_type, name, doc, **kwargs):
  103. self.pos = pos # position in parameter list
  104. self.llvm_type = llvm_type # llvm type name, $o for overload, $r for resource type, $cb for legacy cbuffer, $u4 for u4 struct
  105. self.name = name # short, unique name
  106. self.doc = doc # the documentation description of this parameter
  107. self.is_const = False # whether this argument requires a constant value in the IR
  108. self.enum_name = "" # the name of the enum type if applicable
  109. self.max_value = None # the maximum value for this parameter if applicable
  110. for k,v in kwargs.items():
  111. setattr(self, k, v)
  112. class db_dxil_pass(object):
  113. "The description for a DXIL optimization pass"
  114. def __init__(self, name, **kwargs):
  115. self.name = name # name for the option, typically the command-line switch name
  116. self.args = [] # modifiers for the option
  117. self.type_name = "" # name of the class that implements the pass
  118. self.doc = "" # documentation for the pass
  119. self.category_lib = "" # lib which pass belongs to
  120. for k,v in kwargs.items():
  121. setattr(self, k, v)
  122. class db_dxil_pass_arg(object):
  123. "An argument to a DXIL optimization pass"
  124. def __init__(self, name, **kwargs):
  125. self.name = name # name for the option, typically the command-line switch name
  126. self.ident = "" # identifier for a parameter or global switch
  127. self.is_ctor_param = False # whether this is a constructor parameter
  128. for k,v in kwargs.items():
  129. setattr(self, k, v)
  130. if self.is_ctor_param:
  131. self.is_ctor_param = True
  132. class db_dxil_valrule(object):
  133. "The description of a validation rule."
  134. def __init__(self, name, id, **kwargs):
  135. self.name = name.upper() # short, unique name, eg META.KNOWN
  136. self.rule_id = id # unique identifier
  137. self.enum_name = name.replace(".", "") # remove period for enum name
  138. self.group_name = self.name[:self.name.index(".")] # group name, eg META
  139. self.rule_name = self.name[self.name.index(".")+1:] # rule name, eg KNOWN
  140. self.definition = "Check" + self.group_name + self.rule_name # function name that defines this constraint
  141. self.is_disabled = False # True if the validation rule does not apply
  142. self.err_msg = "" # error message associated with rule
  143. self.category = "" # classification for this rule
  144. self.doc = "" # the documentation description of this rule
  145. self.shader_stages = () # shader stages to which this applies, empty for all.
  146. self.shader_model = 6,0 # minimum shader model required
  147. for k,v in list(kwargs.items()):
  148. setattr(self, k, v)
  149. def __str__(self):
  150. return self.name
  151. class db_dxil(object):
  152. "A database of DXIL instruction data"
  153. def __init__(self):
  154. self.instr = [] # DXIL instructions
  155. self.enums = [] # enumeration types
  156. self.val_rules = [] # validation rules
  157. self.metadata = [] # named metadata (db_dxil_metadata)
  158. self.passes = [] # inventory of available passes (db_dxil_pass)
  159. self.name_idx = {} # DXIL instructions by name
  160. self.enum_idx = {} # enumerations by name
  161. self.dxil_version_info = {}
  162. # list of counters for instructions and dxil ops,
  163. # starting with extra ones specified here
  164. self.counters = extra_counters
  165. self.populate_llvm_instructions()
  166. self.call_instr = self.get_instr_by_llvm_name("CallInst")
  167. self.populate_dxil_operations()
  168. self.build_indices()
  169. self.populate_extended_docs()
  170. self.populate_categories_and_models()
  171. self.build_opcode_enum()
  172. self.mark_disallowed_operations()
  173. self.populate_metadata()
  174. self.populate_passes()
  175. self.build_valrules()
  176. self.build_semantics()
  177. self.build_indices()
  178. self.populate_counters()
  179. def __str__(self):
  180. return '\n'.join(str(i) for i in self.instr)
  181. def add_enum_type(self, name, doc, valNameDocTuples):
  182. "Adds a new enumeration type with name/value/doc tuples"
  183. self.enums.append(db_dxil_enum(name, doc, valNameDocTuples))
  184. def build_indices(self):
  185. "Build a name_idx dictionary with instructions and an enum_idx dictionary with enumeration types"
  186. self.name_idx = {}
  187. for i in self.instr:
  188. self.name_idx[i.name] = i
  189. self.enum_idx = {}
  190. for i in self.enums:
  191. self.enum_idx[i.name] = i
  192. def build_opcode_enum(self):
  193. # Build enumeration from instructions
  194. OpCodeEnum = db_dxil_enum("OpCode", "Enumeration for operations specified by DXIL")
  195. class_dict = {}
  196. class_dict["LlvmInst"] = "LLVM Instructions"
  197. for i in self.instr:
  198. if i.is_dxil_op:
  199. v = db_dxil_enum_value(i.dxil_op, i.dxil_opid, i.doc)
  200. v.category = i.category
  201. class_dict[i.dxil_class] = i.category
  202. OpCodeEnum.values.append(v)
  203. self.enums.append(OpCodeEnum);
  204. OpCodeClass = db_dxil_enum("OpCodeClass", "Groups for DXIL operations with equivalent function templates")
  205. OpCodeClass.is_internal = True
  206. for (k, v) in iter(class_dict.items()):
  207. ev = db_dxil_enum_value(k, 0, None)
  208. ev.category = v
  209. OpCodeClass.values.append(ev)
  210. self.enums.append(OpCodeClass);
  211. def mark_disallowed_operations(self):
  212. # Disallow indirect branching, unreachable instructions and support for exception unwinding.
  213. for i in "IndirectBr,Invoke,Resume,LandingPad,Unreachable".split(","):
  214. self.name_idx[i].is_allowed = False
  215. for i in "UserOp1,UserOp2,VAArg".split(","):
  216. self.name_idx[i].is_allowed = False
  217. # Disallow conversions used for pointer math; GEP is used exclusively in the current model.
  218. for i in "PtrToInt,IntToPtr".split(","):
  219. self.name_idx[i].is_allowed = False
  220. # Barrier supersedes Fence.
  221. self.name_idx["Fence"].is_allowed = False
  222. def verify_dense(self, it, pred, name_proj):
  223. val = None
  224. for i in it:
  225. i_val = pred(i)
  226. if not val is None:
  227. assert val + 1 == i_val, "values in predicate are not sequential and dense, %d follows %d for %s" % (i_val, val, name_proj(i))
  228. val = i_val
  229. def set_op_count_for_version(self, major, minor, op_count):
  230. info = self.dxil_version_info.setdefault((major, minor), dict())
  231. info['NumOpCodes'] = op_count
  232. info['NumOpClasses'] = len(set([op.dxil_class for op in self.instr]))
  233. def populate_categories_and_models(self):
  234. "Populate the category and shader_stages member of instructions."
  235. for i in "TempRegLoad,TempRegStore,MinPrecXRegLoad,MinPrecXRegStore,LoadInput,StoreOutput".split(","):
  236. self.name_idx[i].category = "Temporary, indexable, input, output registers"
  237. for i in "FAbs,Saturate,IsNaN,IsInf,IsFinite,IsNormal,Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Htan,Exp,Frc,Log,Sqrt,Rsqrt".split(","):
  238. self.name_idx[i].category = "Unary float"
  239. for i in "Round_ne,Round_ni,Round_pi,Round_z".split(","):
  240. self.name_idx[i].category = "Unary float - rounding"
  241. for i in "Bfrev,Countbits,FirstbitLo,FirstbitSHi".split(","):
  242. self.name_idx[i].category = "Unary int"
  243. for i in "FirstbitHi".split(","):
  244. self.name_idx[i].category = "Unary uint"
  245. for i in "FMax,FMin".split(","):
  246. self.name_idx[i].category = "Binary float"
  247. for i in "IMax,IMin,Add,Sub,Mul,SDiv,SRem,And,Or,Xor,AShr,LShr,Shl".split(","):
  248. self.name_idx[i].category = "Binary int"
  249. for i in "UMax,UMin,UMul,UDiv,URem".split(","):
  250. self.name_idx[i].category = "Binary uint"
  251. for i in "IMul".split(","):
  252. self.name_idx[i].category = "Binary int with two outputs"
  253. for i in "UMul,UDiv".split(","): # Rename this UDiv OpCode to UDivMod
  254. self.name_idx[i].category = "Binary uint with two outputs"
  255. for i in "UAddc,USubb".split(","):
  256. self.name_idx[i].category = "Binary uint with carry or borrow"
  257. for i in "FMad,Fma".split(","):
  258. self.name_idx[i].category = "Tertiary float"
  259. for i in "IMad,Msad,Ibfe".split(","):
  260. self.name_idx[i].category = "Tertiary int"
  261. for i in "UMad,Ubfe".split(","):
  262. self.name_idx[i].category = "Tertiary uint"
  263. for i in "Bfi".split(","):
  264. self.name_idx[i].category = "Quaternary"
  265. for i in "Dot2,Dot3,Dot4".split(","):
  266. self.name_idx[i].category = "Dot"
  267. for i in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split(","):
  268. self.name_idx[i].category = "Resources"
  269. for i in "Sample,SampleBias,SampleLevel,SampleGrad,SampleCmp,SampleCmpLevelZero,Texture2DMSGetSamplePosition,RenderTargetGetSamplePosition,RenderTargetGetSampleCount".split(","):
  270. self.name_idx[i].category = "Resources - sample"
  271. for i in "Sample,SampleBias,SampleCmp".split(","):
  272. self.name_idx[i].shader_stages = ("library", "pixel", "compute", "amplification", "mesh")
  273. for i in "RenderTargetGetSamplePosition,RenderTargetGetSampleCount".split(","):
  274. self.name_idx[i].shader_stages = ("pixel",)
  275. for i in "TextureGather,TextureGatherCmp".split(","):
  276. self.name_idx[i].category = "Resources - gather"
  277. for i in "AtomicBinOp,AtomicCompareExchange,Barrier".split(","):
  278. self.name_idx[i].category = "Synchronization"
  279. for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
  280. self.name_idx[i].category = "Derivatives"
  281. self.name_idx[i].shader_stages = ("library", "pixel", "compute", "amplification", "mesh")
  282. for i in "Discard,EvalSnapped,EvalSampleIndex,EvalCentroid,SampleIndex,Coverage,InnerCoverage,AttributeAtVertex".split(","):
  283. self.name_idx[i].category = "Pixel shader"
  284. self.name_idx[i].shader_stages = ("pixel",)
  285. for i in "ThreadId,GroupId,ThreadIdInGroup,FlattenedThreadIdInGroup".split(","):
  286. self.name_idx[i].category = "Compute/Mesh/Amplification shader"
  287. self.name_idx[i].shader_stages = ("compute", "mesh", "amplification")
  288. for i in "EmitStream,CutStream,EmitThenCutStream,GSInstanceID".split(","):
  289. self.name_idx[i].category = "Geometry shader"
  290. self.name_idx[i].shader_stages = ("geometry",)
  291. for i in "LoadOutputControlPoint,LoadPatchConstant".split(","):
  292. self.name_idx[i].category = "Domain and hull shader"
  293. self.name_idx[i].shader_stages = ("domain", "hull")
  294. for i in "DomainLocation".split(","):
  295. self.name_idx[i].category = "Domain shader"
  296. self.name_idx[i].shader_stages = ("domain",)
  297. for i in "StorePatchConstant,OutputControlPointID".split(","):
  298. self.name_idx[i].category = "Hull shader"
  299. self.name_idx[i].shader_stages = ("hull",)
  300. for i in "PrimitiveID".split(","):
  301. self.name_idx[i].category = "Hull, Domain and Geometry shaders"
  302. self.name_idx[i].shader_stages = ("geometry", "domain", "hull")
  303. for i in "ViewID".split(","):
  304. self.name_idx[i].category = "Graphics shader"
  305. self.name_idx[i].shader_stages = ("vertex", "hull", "domain", "geometry", "pixel", "mesh")
  306. for i in "MakeDouble,SplitDouble,LegacyDoubleToFloat,LegacyDoubleToSInt32,LegacyDoubleToUInt32".split(","):
  307. self.name_idx[i].category = "Double precision"
  308. for i in "CycleCounterLegacy".split(","):
  309. self.name_idx[i].category = "Other"
  310. for i in "LegacyF32ToF16,LegacyF16ToF32".split(","):
  311. self.name_idx[i].category = "Legacy floating-point"
  312. for i in self.instr:
  313. if i.name.startswith("Wave"):
  314. i.category = "Wave"
  315. i.is_wave = True
  316. i.shader_stages = (
  317. "library", "compute", "amplification", "mesh",
  318. "pixel", "vertex", "hull", "domain", "geometry",
  319. "raygeneration", "intersection", "anyhit", "closesthit", "miss", "callable")
  320. elif i.name.startswith("Quad"):
  321. i.category = "Quad Wave Ops"
  322. i.is_wave = True
  323. i.shader_stages = ("library", "compute", "amplification", "mesh", "pixel")
  324. elif i.name.startswith("Bitcast"):
  325. i.category = "Bitcasts with different sizes"
  326. for i in "ViewID,AttributeAtVertex".split(","):
  327. self.name_idx[i].shader_model = 6,1
  328. for i in "RawBufferLoad,RawBufferStore".split(","):
  329. self.name_idx[i].shader_model = 6,2
  330. self.name_idx[i].shader_model_translated = 6,0
  331. for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","):
  332. self.name_idx[i].category = "Ray Dispatch Arguments"
  333. self.name_idx[i].shader_model = 6,3
  334. self.name_idx[i].shader_stages = ("library", "raygeneration","intersection","anyhit", "closesthit","miss","callable")
  335. for i in "InstanceID,InstanceIndex,PrimitiveIndex".split(","):
  336. self.name_idx[i].category = "Raytracing object space uint System Values"
  337. self.name_idx[i].shader_model = 6,3
  338. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit")
  339. for i in "GeometryIndex".split(","):
  340. self.name_idx[i].category = "Raytracing object space uint System Values, raytracing tier 1.1"
  341. self.name_idx[i].shader_model = 6,5
  342. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit")
  343. for i in "HitKind".split(","):
  344. self.name_idx[i].category = "Raytracing hit uint System Values"
  345. self.name_idx[i].shader_model = 6,3
  346. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit",)
  347. for i in "RayFlags".split(","):
  348. self.name_idx[i].category = "Raytracing uint System Values"
  349. self.name_idx[i].shader_model = 6,3
  350. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit","miss")
  351. for i in "WorldRayOrigin,WorldRayDirection".split(","):
  352. self.name_idx[i].category = "Ray Vectors"
  353. self.name_idx[i].shader_model = 6,3
  354. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit","miss")
  355. for i in "ObjectRayOrigin,ObjectRayDirection".split(","):
  356. self.name_idx[i].category = "Ray object space Vectors"
  357. self.name_idx[i].shader_model = 6,3
  358. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit")
  359. for i in "ObjectToWorld,WorldToObject".split(","):
  360. self.name_idx[i].category = "Ray Transforms"
  361. self.name_idx[i].shader_model = 6,3
  362. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit")
  363. for i in "RayTMin,RayTCurrent".split(","):
  364. self.name_idx[i].category = "RayT"
  365. self.name_idx[i].shader_model = 6,3
  366. self.name_idx[i].shader_stages = ("library","intersection","anyhit","closesthit", "miss")
  367. for i in "IgnoreHit,AcceptHitAndEndSearch".split(","):
  368. self.name_idx[i].category = "AnyHit Terminals"
  369. self.name_idx[i].shader_model = 6,3
  370. self.name_idx[i].shader_stages = ("anyhit",)
  371. for i in "CallShader".split(","):
  372. self.name_idx[i].category = "Indirect Shader Invocation"
  373. self.name_idx[i].shader_model = 6,3
  374. self.name_idx[i].shader_stages = ("library", "closesthit","raygeneration","miss","callable")
  375. for i in "TraceRay".split(","):
  376. self.name_idx[i].category = "Indirect Shader Invocation"
  377. self.name_idx[i].shader_model = 6,3
  378. self.name_idx[i].shader_stages = ("library", "raygeneration","closesthit","miss")
  379. for i in "ReportHit".split(","):
  380. self.name_idx[i].category = "Indirect Shader Invocation"
  381. self.name_idx[i].shader_model = 6,3
  382. self.name_idx[i].shader_stages = ("library", "intersection")
  383. for i in "CreateHandleForLib".split(","):
  384. self.name_idx[i].category = "Library create handle from resource struct (like HL intrinsic)"
  385. self.name_idx[i].shader_model = 6,3
  386. self.name_idx[i].shader_model_translated = 6,0
  387. for i in "AnnotateHandle,CreateHandleFromBinding,CreateHandleFromHeap".split(","):
  388. self.name_idx[i].category = "Get handle from heap"
  389. self.name_idx[i].shader_model = 6,6
  390. for i in "Dot4AddU8Packed,Dot4AddI8Packed,Dot2AddHalf".split(","):
  391. self.name_idx[i].category = "Dot product with accumulate"
  392. self.name_idx[i].shader_model = 6,4
  393. for i in "WaveMatch,WaveMultiPrefixOp,WaveMultiPrefixBitCount".split(","):
  394. self.name_idx[i].category = "Wave"
  395. self.name_idx[i].shader_model = 6,5
  396. for i in "SetMeshOutputCounts,EmitIndices,GetMeshPayload,StoreVertexOutput,StorePrimitiveOutput".split(","):
  397. self.name_idx[i].category = "Mesh shader instructions"
  398. self.name_idx[i].shader_stages = ("mesh",)
  399. self.name_idx[i].shader_model = 6,5
  400. for i in "DispatchMesh".split(","):
  401. self.name_idx[i].category = "Amplification shader instructions"
  402. self.name_idx[i].shader_stages = ("amplification",)
  403. self.name_idx[i].shader_model = 6,5
  404. for i in "WriteSamplerFeedback,WriteSamplerFeedbackBias".split(","):
  405. self.name_idx[i].category = "Sampler Feedback"
  406. self.name_idx[i].is_feedback = True
  407. self.name_idx[i].is_gradient = True
  408. self.name_idx[i].shader_model = 6,5
  409. self.name_idx[i].shader_stages = ("library", "pixel",)
  410. for i in "WriteSamplerFeedbackLevel,WriteSamplerFeedbackGrad".split(","):
  411. self.name_idx[i].category = "Sampler Feedback"
  412. self.name_idx[i].is_feedback = True
  413. self.name_idx[i].shader_model = 6,5
  414. for i in ("AllocateRayQuery,RayQuery_TraceRayInline,RayQuery_Proceed,RayQuery_Abort,RayQuery_CommitNonOpaqueTriangleHit,RayQuery_CommitProceduralPrimitiveHit,RayQuery_RayFlags,RayQuery_WorldRayOrigin,RayQuery_WorldRayDirection,RayQuery_RayTMin,"+
  415. "RayQuery_CandidateTriangleRayT,RayQuery_CommittedRayT,RayQuery_CandidateInstanceIndex,RayQuery_CandidateInstanceID,RayQuery_CandidateGeometryIndex,RayQuery_CandidatePrimitiveIndex,"+
  416. "RayQuery_CandidateObjectRayOrigin,RayQuery_CandidateObjectRayDirection,RayQuery_CommittedInstanceIndex,RayQuery_CommittedInstanceID,RayQuery_CommittedGeometryIndex,RayQuery_CommittedPrimitiveIndex,"+
  417. "RayQuery_CommittedObjectRayOrigin,RayQuery_CommittedObjectRayDirection,RayQuery_CandidateProceduralPrimitiveNonOpaque,RayQuery_CandidateTriangleFrontFace,RayQuery_CommittedTriangleFrontFace,"+
  418. "RayQuery_CandidateTriangleBarycentrics,RayQuery_CommittedTriangleBarycentrics,RayQuery_CommittedStatus,RayQuery_CandidateType,RayQuery_CandidateObjectToWorld3x4,"+
  419. "RayQuery_CandidateWorldToObject3x4,RayQuery_CommittedObjectToWorld3x4,RayQuery_CommittedWorldToObject3x4,RayQuery_CandidateInstanceContributionToHitGroupIndex,RayQuery_CommittedInstanceContributionToHitGroupIndex").split(","):
  420. self.name_idx[i].category = "Inline Ray Query"
  421. self.name_idx[i].shader_model = 6,5
  422. for i in "Unpack4x8".split(","):
  423. self.name_idx[i].category = "Unpacking intrinsics"
  424. self.name_idx[i].shader_model = 6,6
  425. for i in "Pack4x8".split(","):
  426. self.name_idx[i].category = "Packing intrinsics"
  427. self.name_idx[i].shader_model = 6,6
  428. for i in "IsHelperLane".split(","):
  429. self.name_idx[i].category = "Helper Lanes"
  430. self.name_idx[i].shader_model = 6,6
  431. def populate_llvm_instructions(self):
  432. # Add instructions that map to LLVM instructions.
  433. # This is basically include\llvm\IR\Instruction.def
  434. #
  435. # Some instructions don't have their operands defined here because they are
  436. # very specific and expanding generality isn't worth it; for example,
  437. # branching refers to basic block arguments.
  438. retvoid_param = db_dxil_param(0, "v", "", "no return value")
  439. retoload_param = db_dxil_param(0, "$o", "", "no return value")
  440. oload_all_arith = "hfd1wil" # note that 8 is missing
  441. oload_all_arith_v = "v" + oload_all_arith
  442. oload_int_arith = "wil" # note that 8 is missing
  443. oload_int_arith_b = "1wil" # note that 8 is missing
  444. oload_float_arith = "hfd"
  445. oload_cast_params = [retoload_param, db_dxil_param(1, "$o", "value", "Value to cast/convert")]
  446. oload_binary_params = [retoload_param,
  447. db_dxil_param(1, "$o", "a", "first value"),
  448. db_dxil_param(2, "$o", "b", "second value")]
  449. self.add_llvm_instr("TERM", 1, "Ret", "ReturnInst", "returns a value (possibly void), from a function.", oload_all_arith_v, [retoload_param])
  450. self.add_llvm_instr("TERM", 2, "Br", "BranchInst", "branches (conditional or unconditional)", "", [])
  451. self.add_llvm_instr("TERM", 3, "Switch", "SwitchInst", "performs a multiway switch", "", [])
  452. self.add_llvm_instr("TERM", 4, "IndirectBr", "IndirectBrInst", "branches indirectly", "", [])
  453. self.add_llvm_instr("TERM", 5, "Invoke", "InvokeInst", "invokes function with normal and exceptional returns", "", [])
  454. self.add_llvm_instr("TERM", 6, "Resume", "ResumeInst", "resumes the propagation of an exception", "", [])
  455. self.add_llvm_instr("TERM", 7, "Unreachable", "UnreachableInst", "is unreachable", "", [])
  456. self.add_llvm_instr("BINARY", 8, "Add" , "BinaryOperator", "returns the sum of its two operands", oload_int_arith, oload_binary_params, counters=('ints',))
  457. self.add_llvm_instr("BINARY", 9, "FAdd" , "BinaryOperator", "returns the sum of its two operands", oload_float_arith, oload_binary_params, counters=('floats',))
  458. self.add_llvm_instr("BINARY", 10, "Sub" , "BinaryOperator", "returns the difference of its two operands", oload_int_arith, oload_binary_params, counters=('ints',))
  459. self.add_llvm_instr("BINARY", 11, "FSub" , "BinaryOperator", "returns the difference of its two operands", oload_float_arith, oload_binary_params, counters=('floats',))
  460. self.add_llvm_instr("BINARY", 12, "Mul" , "BinaryOperator", "returns the product of its two operands", oload_int_arith, oload_binary_params, counters=('ints',))
  461. self.add_llvm_instr("BINARY", 13, "FMul" , "BinaryOperator", "returns the product of its two operands", oload_float_arith, oload_binary_params, counters=('floats',))
  462. self.add_llvm_instr("BINARY", 14, "UDiv" , "BinaryOperator", "returns the quotient of its two unsigned operands", oload_int_arith, oload_binary_params, counters=('uints',))
  463. self.add_llvm_instr("BINARY", 15, "SDiv" , "BinaryOperator", "returns the quotient of its two signed operands", oload_int_arith, oload_binary_params, counters=('ints',))
  464. self.add_llvm_instr("BINARY", 16, "FDiv" , "BinaryOperator", "returns the quotient of its two operands", oload_float_arith, oload_binary_params, counters=('floats',))
  465. self.add_llvm_instr("BINARY", 17, "URem" , "BinaryOperator", "returns the remainder from the unsigned division of its two operands", oload_int_arith, oload_binary_params, counters=('uints',))
  466. self.add_llvm_instr("BINARY", 18, "SRem" , "BinaryOperator", "returns the remainder from the signed division of its two operands", oload_int_arith, oload_binary_params, counters=('ints',))
  467. self.add_llvm_instr("BINARY", 19, "FRem" , "BinaryOperator", "returns the remainder from the division of its two operands", oload_float_arith, oload_binary_params, counters=('floats',))
  468. self.add_llvm_instr("BINARY", 20, "Shl", "BinaryOperator", "shifts left (logical)", oload_int_arith, oload_binary_params, counters=('uints',))
  469. self.add_llvm_instr("BINARY", 21, "LShr", "BinaryOperator", "shifts right (logical), with zero bit fill", oload_int_arith, oload_binary_params, counters=('uints',))
  470. self.add_llvm_instr("BINARY", 22, "AShr", "BinaryOperator", "shifts right (arithmetic), with 'a' operand sign bit fill", oload_int_arith, oload_binary_params, counters=('ints',))
  471. self.add_llvm_instr("BINARY", 23, "And", "BinaryOperator", "returns a bitwise logical and of its two operands", oload_int_arith_b, oload_binary_params, counters=('uints',))
  472. self.add_llvm_instr("BINARY", 24, "Or", "BinaryOperator", "returns a bitwise logical or of its two operands", oload_int_arith_b, oload_binary_params, counters=('uints',))
  473. self.add_llvm_instr("BINARY", 25, "Xor", "BinaryOperator", "returns a bitwise logical xor of its two operands", oload_int_arith_b, oload_binary_params, counters=('uints',))
  474. self.add_llvm_instr("MEMORY", 26, "Alloca", "AllocaInst", "allocates memory on the stack frame of the currently executing function", "", [])
  475. self.add_llvm_instr("MEMORY", 27, "Load", "LoadInst", "reads from memory", "", [])
  476. self.add_llvm_instr("MEMORY", 28, "Store", "StoreInst", "writes to memory", "", [])
  477. self.add_llvm_instr("MEMORY", 29, "GetElementPtr", "GetElementPtrInst", "gets the address of a subelement of an aggregate value", "", [])
  478. self.add_llvm_instr("MEMORY", 30, "Fence", "FenceInst", "introduces happens-before edges between operations", "", [], counters=('fence',))
  479. self.add_llvm_instr("MEMORY", 31, "AtomicCmpXchg", "AtomicCmpXchgInst" , "atomically modifies memory", "", [], counters=('atomic',))
  480. self.add_llvm_instr("MEMORY", 32, "AtomicRMW", "AtomicRMWInst", "atomically modifies memory", "", [], counters=('atomic',))
  481. self.add_llvm_instr("CAST", 33, "Trunc", "TruncInst", "truncates an integer", oload_int_arith_b, oload_cast_params, counters=('ints',))
  482. self.add_llvm_instr("CAST", 34, "ZExt", "ZExtInst", "zero extends an integer", oload_int_arith_b, oload_cast_params, counters=('uints',))
  483. self.add_llvm_instr("CAST", 35, "SExt", "SExtInst", "sign extends an integer", oload_int_arith_b, oload_cast_params, counters=('ints',))
  484. self.add_llvm_instr("CAST", 36, "FPToUI", "FPToUIInst", "converts a floating point to UInt", oload_all_arith, oload_cast_params, counters=('floats',))
  485. self.add_llvm_instr("CAST", 37, "FPToSI", "FPToSIInst", "converts a floating point to SInt", oload_all_arith, oload_cast_params, counters=('floats',))
  486. self.add_llvm_instr("CAST", 38, "UIToFP", "UIToFPInst", "converts a UInt to floating point", oload_all_arith, oload_cast_params, counters=('floats',))
  487. self.add_llvm_instr("CAST", 39, "SIToFP" , "SIToFPInst", "converts a SInt to floating point", oload_all_arith, oload_cast_params, counters=('floats',))
  488. self.add_llvm_instr("CAST", 40, "FPTrunc", "FPTruncInst", "truncates a floating point", oload_float_arith, oload_cast_params, counters=('floats',))
  489. self.add_llvm_instr("CAST", 41, "FPExt", "FPExtInst", "extends a floating point", oload_float_arith, oload_cast_params, counters=('floats',))
  490. self.add_llvm_instr("CAST", 42, "PtrToInt", "PtrToIntInst", "converts a pointer to integer", "i", oload_cast_params)
  491. self.add_llvm_instr("CAST", 43, "IntToPtr", "IntToPtrInst", "converts an integer to Pointer", "i", oload_cast_params)
  492. self.add_llvm_instr("CAST", 44, "BitCast", "BitCastInst", "performs a bit-preserving type cast", oload_all_arith, oload_cast_params)
  493. self.add_llvm_instr("CAST", 45, "AddrSpaceCast", "AddrSpaceCastInst", "casts a value addrspace", "", oload_cast_params)
  494. self.add_llvm_instr("OTHER", 46, "ICmp", "ICmpInst", "compares integers", oload_int_arith_b, oload_binary_params, counters=('ints',))
  495. self.add_llvm_instr("OTHER", 47, "FCmp", "FCmpInst", "compares floating points", oload_float_arith, oload_binary_params, counters=('floats',))
  496. self.add_llvm_instr("OTHER", 48, "PHI", "PHINode", "is a PHI node instruction", "", [])
  497. self.add_llvm_instr("OTHER", 49, "Call", "CallInst", "calls a function", "", [])
  498. self.add_llvm_instr("OTHER", 50, "Select", "SelectInst", "selects an instruction", "", [])
  499. self.add_llvm_instr("OTHER", 51, "UserOp1", "Instruction", "may be used internally in a pass", "", [])
  500. self.add_llvm_instr("OTHER", 52, "UserOp2", "Instruction", "internal to passes only", "", [])
  501. self.add_llvm_instr("OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", [])
  502. self.add_llvm_instr("OTHER", 57, "ExtractValue", "ExtractValueInst", "extracts from aggregate", "", [])
  503. self.add_llvm_instr("OTHER", 59, "LandingPad", "LandingPadInst", "represents a landing pad", "", [])
  504. def populate_dxil_operations(self):
  505. # $o in a parameter type means the overload type
  506. # $r in a parameter type means the resource type
  507. # $cb in a parameter type means cbuffer legacy load return type
  508. # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong
  509. self.opcode_param = db_dxil_param(1, "i32", "opcode", "DXIL opcode")
  510. retvoid_param = db_dxil_param(0, "v", "", "no return value")
  511. next_op_idx = 0
  512. self.add_dxil_op("TempRegLoad", next_op_idx, "TempRegLoad", "helper load operation", "hfwi", "ro", [
  513. db_dxil_param(0, "$o", "", "register value"),
  514. db_dxil_param(2, "u32", "index", "linearized register index")])
  515. next_op_idx += 1
  516. self.add_dxil_op("TempRegStore", next_op_idx, "TempRegStore", "helper store operation", "hfwi", "", [
  517. retvoid_param,
  518. db_dxil_param(2, "u32", "index", "linearized register index"),
  519. db_dxil_param(3, "$o", "value", "value to store")])
  520. next_op_idx += 1
  521. self.add_dxil_op("MinPrecXRegLoad", next_op_idx, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
  522. db_dxil_param(0, "$o", "", "register value"),
  523. db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
  524. db_dxil_param(3, "i32", "index", "index"),
  525. db_dxil_param(4, "u8", "component", "component")])
  526. next_op_idx += 1
  527. self.add_dxil_op("MinPrecXRegStore", next_op_idx, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
  528. retvoid_param,
  529. db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
  530. db_dxil_param(3, "i32", "index", "index"),
  531. db_dxil_param(4, "u8", "component", "component"),
  532. db_dxil_param(5, "$o", "value", "value to store")])
  533. next_op_idx += 1
  534. self.add_dxil_op("LoadInput", next_op_idx, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
  535. db_dxil_param(0, "$o", "", "input value"),
  536. db_dxil_param(2, "u32", "inputSigId", "input signature element ID"),
  537. db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
  538. db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
  539. db_dxil_param(5, "i32", "gsVertexAxis", "gsVertexAxis")],
  540. counters=('sig_ld',))
  541. next_op_idx += 1
  542. self.add_dxil_op("StoreOutput", next_op_idx, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
  543. retvoid_param,
  544. db_dxil_param(2, "u32", "outputSigId", "output signature element ID"),
  545. db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
  546. db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
  547. db_dxil_param(5, "$o", "value", "value to store")],
  548. counters=('sig_st',))
  549. next_op_idx += 1
  550. def UFI(name, **mappings):
  551. name = name.upper()
  552. for k,v in mappings.items():
  553. if name.startswith(k):
  554. return v
  555. if name.upper().startswith('F'):
  556. return 'floats'
  557. elif name.upper().startswith('U'):
  558. return 'uints'
  559. else:
  560. return 'ints'
  561. # Unary float operations are regular.
  562. for i in "FAbs,Saturate".split(","):
  563. self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hfd", "rn", [
  564. db_dxil_param(0, "$o", "", "operation result"),
  565. db_dxil_param(2, "$o", "value", "input value")],
  566. counters=('floats',))
  567. next_op_idx += 1
  568. for i in "IsNaN,IsInf,IsFinite,IsNormal".split(","):
  569. self.add_dxil_op(i, next_op_idx, "IsSpecialFloat", "returns the " + i, "hf", "rn", [
  570. db_dxil_param(0, "i1", "", "operation result"),
  571. db_dxil_param(2, "$o", "value", "input value")],
  572. counters=('floats',))
  573. next_op_idx += 1
  574. for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Htan,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
  575. self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hf", "rn", [
  576. db_dxil_param(0, "$o", "", "operation result"),
  577. db_dxil_param(2, "$o", "value", "input value")],
  578. counters=('floats',))
  579. next_op_idx += 1
  580. # Unary int operations are regular.
  581. for i in "Bfrev".split(","):
  582. self.add_dxil_op(i, next_op_idx, "Unary", "returns the reverse bit pattern of the input value", "wil", "rn", [
  583. db_dxil_param(0, "$o", "", "operation result"),
  584. db_dxil_param(2, "$o", "value", "input value")],
  585. counters=('uints',))
  586. next_op_idx += 1
  587. for i in "Countbits,FirstbitLo".split(","):
  588. self.add_dxil_op(i, next_op_idx, "UnaryBits", "returns the " + i, "wil", "rn", [
  589. db_dxil_param(0, "i32", "", "operation result"),
  590. db_dxil_param(2, "$o", "value", "input value")],
  591. counters=('uints',))
  592. next_op_idx += 1
  593. for i in "FirstbitHi,FirstbitSHi".split(","):
  594. self.add_dxil_op(i, next_op_idx, "UnaryBits", "returns src != 0? (BitWidth-1 - " + i + ") : -1", "wil", "rn", [
  595. db_dxil_param(0, "i32", "", "operation result"),
  596. db_dxil_param(2, "$o", "value", "input value")],
  597. counters=('uints',))
  598. next_op_idx += 1
  599. # Binary float operations
  600. for i in "FMax,FMin".split(","):
  601. self.add_dxil_op(i, next_op_idx, "Binary", "returns the " + i + " of the input values", "hfd", "rn", [
  602. db_dxil_param(0, "$o", "", "operation result"),
  603. db_dxil_param(2, "$o", "a", "input value"),
  604. db_dxil_param(3, "$o", "b", "input value")],
  605. counters=('floats',))
  606. next_op_idx += 1
  607. # Binary int operations
  608. for i in "IMax,IMin,UMax,UMin".split(","):
  609. self.add_dxil_op(i, next_op_idx, "Binary", "returns the " + i + " of the input values", "wil", "rn", [
  610. db_dxil_param(0, "$o", "", "operation result"),
  611. db_dxil_param(2, "$o", "a", "input value"),
  612. db_dxil_param(3, "$o", "b", "input value")],
  613. counters=(UFI(i),))
  614. next_op_idx += 1
  615. # Binary int operations with two outputs
  616. for i in "IMul,UMul,UDiv".split(","):
  617. self.add_dxil_op(i, next_op_idx, "BinaryWithTwoOuts", "returns the " + i + " of the input values", "i", "rn", [
  618. db_dxil_param(0, "twoi32", "", "operation result"),
  619. db_dxil_param(2, "$o", "a", "input value"),
  620. db_dxil_param(3, "$o", "b", "input value")],
  621. counters=(UFI(i),))
  622. next_op_idx += 1
  623. # Binary int operations with carry
  624. for i in "UAddc,USubb".split(","):
  625. self.add_dxil_op(i, next_op_idx, "BinaryWithCarryOrBorrow", "returns the " + i + " of the input values", "i", "rn", [
  626. db_dxil_param(0, "i32c", "", "operation result with carry/borrow value"),
  627. db_dxil_param(2, "$o", "a", "input value"),
  628. db_dxil_param(3, "$o", "b", "input value")],
  629. counters=('uints',))
  630. next_op_idx += 1
  631. # Tertiary float.
  632. self.add_dxil_op("FMad", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
  633. db_dxil_param(0, "$o", "", "the fused multiply-addition of parameters a * b + c"),
  634. db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
  635. db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
  636. db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
  637. next_op_idx += 1
  638. self.add_dxil_op("Fma", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
  639. db_dxil_param(0, "$o", "", "the double-precision fused multiply-addition of parameters a * b + c, accurate to 0.5 units of least precision (ULP)"),
  640. db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
  641. db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
  642. db_dxil_param(4, "$o", "c", "third value for FMA, the addend")],
  643. counters=('floats',))
  644. next_op_idx += 1
  645. # Tertiary int.
  646. for i in "IMad,UMad".split(","):
  647. self.add_dxil_op(i, next_op_idx, "Tertiary", "performs an integral " + i, "wil", "rn", [
  648. db_dxil_param(0, "$o", "", "the operation result"),
  649. db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
  650. db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
  651. db_dxil_param(4, "$o", "c", "third value for FMA, the addend")],
  652. counters=(UFI(i),))
  653. next_op_idx += 1
  654. for i in "Msad,Ibfe,Ubfe".split(","):
  655. self.add_dxil_op(i, next_op_idx, "Tertiary", "performs an integral " + i, "il", "rn", [
  656. db_dxil_param(0, "$o", "", "the operation result"),
  657. db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
  658. db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
  659. db_dxil_param(4, "$o", "c", "third value for FMA, the addend")],
  660. counters=(UFI(i, M='uints'),))
  661. next_op_idx += 1
  662. # Quaternary
  663. self.add_dxil_op("Bfi", next_op_idx, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
  664. db_dxil_param(0, "$o", "", "the operation result"),
  665. db_dxil_param(2, "$o", "width", "the bitfield width to take from the value"),
  666. db_dxil_param(3, "$o", "offset", "the bitfield offset to replace in the value"),
  667. db_dxil_param(4, "$o", "value", "the number the bits are taken from"),
  668. db_dxil_param(5, "$o", "replacedValue", "the number with bits to be replaced")],
  669. counters=('uints',))
  670. next_op_idx += 1
  671. # Dot
  672. self.add_dxil_op("Dot2", next_op_idx, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
  673. db_dxil_param(0, "$o", "", "the operation result"),
  674. db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
  675. db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
  676. db_dxil_param(4, "$o", "bx", "the first component of the second vector"),
  677. db_dxil_param(5, "$o", "by", "the second component of the second vector")],
  678. counters=('floats',))
  679. next_op_idx += 1
  680. self.add_dxil_op("Dot3", next_op_idx, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
  681. db_dxil_param(0, "$o", "", "the operation result"),
  682. db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
  683. db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
  684. db_dxil_param(4, "$o", "az", "the third component of the first vector"),
  685. db_dxil_param(5, "$o", "bx", "the first component of the second vector"),
  686. db_dxil_param(6, "$o", "by", "the second component of the second vector"),
  687. db_dxil_param(7, "$o", "bz", "the third component of the second vector")],
  688. counters=('floats',))
  689. next_op_idx += 1
  690. self.add_dxil_op("Dot4", next_op_idx, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
  691. db_dxil_param(0, "$o", "", "the operation result"),
  692. db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
  693. db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
  694. db_dxil_param(4, "$o", "az", "the third component of the first vector"),
  695. db_dxil_param(5, "$o", "aw", "the fourth component of the first vector"),
  696. db_dxil_param(6, "$o", "bx", "the first component of the second vector"),
  697. db_dxil_param(7, "$o", "by", "the second component of the second vector"),
  698. db_dxil_param(8, "$o", "bz", "the third component of the second vector"),
  699. db_dxil_param(9, "$o", "bw", "the fourth component of the second vector")],
  700. counters=('floats',))
  701. next_op_idx += 1
  702. # Resources.
  703. self.add_dxil_op("CreateHandle", next_op_idx, "CreateHandle", "creates the handle to a resource", "v", "ro", [
  704. db_dxil_param(0, "res", "", "the handle to the resource"),
  705. db_dxil_param(2, "i8", "resourceClass", "the class of resource to create (SRV, UAV, CBuffer, Sampler)", is_const=True), # maps to DxilResourceBase::Class
  706. db_dxil_param(3, "i32", "rangeId", "range identifier for resource", is_const=True),
  707. db_dxil_param(4, "i32", "index", "zero-based index into range"),
  708. db_dxil_param(5, "i1", "nonUniformIndex", "non-uniform resource index", is_const=True)])
  709. next_op_idx += 1
  710. self.add_dxil_op("CBufferLoad", next_op_idx, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
  711. db_dxil_param(0, "$o", "", "the value for the constant buffer variable"),
  712. db_dxil_param(2, "res", "handle", "cbuffer handle"),
  713. db_dxil_param(3, "u32", "byteOffset", "linear byte offset of value"),
  714. db_dxil_param(4, "u32", "alignment", "load access alignment", is_const=True)])
  715. next_op_idx += 1
  716. self.add_dxil_op("CBufferLoadLegacy", next_op_idx, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwil", "ro", [
  717. db_dxil_param(0, "$cb", "", "the value for the constant buffer variable"),
  718. db_dxil_param(2, "res", "handle", "cbuffer handle"),
  719. db_dxil_param(3, "u32", "regIndex", "0-based index into cbuffer instance")])
  720. next_op_idx += 1
  721. self.add_dxil_op("Sample", next_op_idx, "Sample", "samples a texture", "hf", "ro", [
  722. db_dxil_param(0, "$r", "", "the sampled value"),
  723. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  724. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  725. db_dxil_param(4, "f", "coord0", "coordinate"),
  726. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  727. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  728. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  729. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  730. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  731. db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
  732. db_dxil_param(11, "f", "clamp", "clamp value")],
  733. counters=('tex_norm',))
  734. next_op_idx += 1
  735. self.add_dxil_op("SampleBias", next_op_idx, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
  736. db_dxil_param(0, "$r", "", "the sampled value"),
  737. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  738. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  739. db_dxil_param(4, "f", "coord0", "coordinate"),
  740. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  741. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  742. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  743. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  744. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  745. db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
  746. db_dxil_param(11, "f", "bias", "bias value"),
  747. db_dxil_param(12, "f", "clamp", "clamp value")],
  748. counters=('tex_bias',))
  749. next_op_idx += 1
  750. self.add_dxil_op("SampleLevel", next_op_idx, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
  751. db_dxil_param(0, "$r", "", "the sampled value"),
  752. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  753. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  754. db_dxil_param(4, "f", "coord0", "coordinate"),
  755. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  756. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  757. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  758. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  759. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  760. db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
  761. db_dxil_param(11, "f", "LOD", "level of detail, biggest map if less than or equal to zero; fraction used to interpolate across levels")],
  762. counters=('tex_norm',))
  763. next_op_idx += 1
  764. self.add_dxil_op("SampleGrad", next_op_idx, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
  765. db_dxil_param(0, "$r", "", "the sampled value"),
  766. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  767. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  768. db_dxil_param(4, "f", "coord0", "coordinate"),
  769. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  770. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  771. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  772. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  773. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  774. db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
  775. db_dxil_param(11, "f", "ddx0", "rate of change of the texture coordinate in the x direction"),
  776. db_dxil_param(12, "f", "ddx1", "rate of change of the texture coordinate in the x direction"),
  777. db_dxil_param(13, "f", "ddx2", "rate of change of the texture coordinate in the x direction"),
  778. db_dxil_param(14, "f", "ddy0", "rate of change of the texture coordinate in the y direction"),
  779. db_dxil_param(15, "f", "ddy1", "rate of change of the texture coordinate in the y direction"),
  780. db_dxil_param(16, "f", "ddy2", "rate of change of the texture coordinate in the y direction"),
  781. db_dxil_param(17, "f", "clamp", "clamp value")],
  782. counters=('tex_grad',))
  783. next_op_idx += 1
  784. self.add_dxil_op("SampleCmp", next_op_idx, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
  785. db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
  786. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  787. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  788. db_dxil_param(4, "f", "coord0", "coordinate"),
  789. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  790. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  791. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  792. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  793. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  794. db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
  795. db_dxil_param(11, "f", "compareValue", "the value to compare with"),
  796. db_dxil_param(12, "f", "clamp", "clamp value")],
  797. counters=('tex_cmp',))
  798. next_op_idx += 1
  799. self.add_dxil_op("SampleCmpLevelZero", next_op_idx, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
  800. db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
  801. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  802. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  803. db_dxil_param(4, "f", "coord0", "coordinate"),
  804. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  805. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  806. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  807. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  808. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  809. db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
  810. db_dxil_param(11, "f", "compareValue", "the value to compare with")],
  811. counters=('tex_cmp',))
  812. next_op_idx += 1
  813. self.add_dxil_op("TextureLoad", next_op_idx, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
  814. db_dxil_param(0, "$r", "", "the loaded value"),
  815. db_dxil_param(2, "res", "srv", "handle of SRV or UAV to sample"),
  816. db_dxil_param(3, "i32", "mipLevelOrSampleCount", "sample count for Texture2DMS, mip level otherwise"),
  817. db_dxil_param(4, "i32", "coord0", "coordinate"),
  818. db_dxil_param(5, "i32", "coord1", "coordinate"),
  819. db_dxil_param(6, "i32", "coord2", "coordinate"),
  820. db_dxil_param(7, "i32", "offset0", "optional offset"),
  821. db_dxil_param(8, "i32", "offset1", "optional offset"),
  822. db_dxil_param(9, "i32", "offset2", "optional offset")],
  823. counters=('tex_load',))
  824. next_op_idx += 1
  825. self.add_dxil_op("TextureStore", next_op_idx, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
  826. db_dxil_param(0, "v", "", ""),
  827. db_dxil_param(2, "res", "srv", "handle of UAV to store to"),
  828. db_dxil_param(3, "i32", "coord0", "coordinate"),
  829. db_dxil_param(4, "i32", "coord1", "coordinate"),
  830. db_dxil_param(5, "i32", "coord2", "coordinate"),
  831. db_dxil_param(6, "$o", "value0", "value"),
  832. db_dxil_param(7, "$o", "value1", "value"),
  833. db_dxil_param(8, "$o", "value2", "value"),
  834. db_dxil_param(9, "$o", "value3", "value"),
  835. db_dxil_param(10,"i8", "mask", "written value mask")],
  836. counters=('tex_store',))
  837. next_op_idx += 1
  838. self.add_dxil_op("BufferLoad", next_op_idx, "BufferLoad", "reads from a TypedBuffer", "hfwi", "ro", [
  839. db_dxil_param(0, "$r", "", "the loaded value"),
  840. db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
  841. db_dxil_param(3, "i32", "index", "element index"),
  842. db_dxil_param(4, "i32", "wot", "coordinate")],
  843. counters=('tex_load',))
  844. next_op_idx += 1
  845. self.add_dxil_op("BufferStore", next_op_idx, "BufferStore", "writes to a RWTypedBuffer", "hfwi", "", [
  846. db_dxil_param(0, "v", "", ""),
  847. db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
  848. db_dxil_param(3, "i32", "coord0", "coordinate in elements"),
  849. db_dxil_param(4, "i32", "coord1", "coordinate (unused?)"),
  850. db_dxil_param(5, "$o", "value0", "value"),
  851. db_dxil_param(6, "$o", "value1", "value"),
  852. db_dxil_param(7, "$o", "value2", "value"),
  853. db_dxil_param(8, "$o", "value3", "value"),
  854. db_dxil_param(9, "i8", "mask", "written value mask")],
  855. counters=('tex_store',))
  856. next_op_idx += 1
  857. self.add_dxil_op("BufferUpdateCounter", next_op_idx, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
  858. db_dxil_param(0, "i32", "", "the new value in the buffer"),
  859. db_dxil_param(2, "res", "uav", "handle to a structured buffer UAV with the count or append flag"),
  860. db_dxil_param(3, "i8", "inc", "1 to increase, 0 to decrease")],
  861. counters=('atomic',))
  862. next_op_idx += 1
  863. self.add_dxil_op("CheckAccessFullyMapped", next_op_idx, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
  864. db_dxil_param(0, "i1", "", "nonzero if all values accessed mapped tiles in a tiled resource"),
  865. db_dxil_param(2, "u32", "status", "status result from the Sample, Gather or Load operation")])
  866. next_op_idx += 1
  867. self.add_dxil_op("GetDimensions", next_op_idx, "GetDimensions", "gets texture size information", "v", "ro", [
  868. db_dxil_param(0, "dims", "", "dimension information for texture"),
  869. db_dxil_param(2, "res", "handle", "resource handle to query"),
  870. db_dxil_param(3, "i32", "mipLevel", "mip level to query")])
  871. next_op_idx += 1
  872. self.add_dxil_op("TextureGather", next_op_idx, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "hfwi", "ro", [
  873. db_dxil_param(0, "$r", "", "dimension information for texture"),
  874. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  875. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  876. db_dxil_param(4, "f", "coord0", "coordinate"),
  877. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  878. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  879. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  880. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  881. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  882. db_dxil_param(10, "i32", "channel", "channel to sample")],
  883. counters=('tex_norm',))
  884. next_op_idx += 1
  885. self.add_dxil_op("TextureGatherCmp", next_op_idx, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "hfwi", "ro", [
  886. db_dxil_param(0, "$r", "", "gathered texels"),
  887. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  888. db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
  889. db_dxil_param(4, "f", "coord0", "coordinate"),
  890. db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
  891. db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
  892. db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
  893. db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
  894. db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
  895. db_dxil_param(10, "i32", "channel", "channel to sample"),
  896. db_dxil_param(11, "f", "compareVale", "value to compare with")],
  897. counters=('tex_cmp',))
  898. next_op_idx += 1
  899. self.add_dxil_op("Texture2DMSGetSamplePosition", next_op_idx, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
  900. db_dxil_param(0, "SamplePos", "", "sample position"),
  901. db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
  902. db_dxil_param(3, "i32", "index", "zero-based sample index")])
  903. next_op_idx += 1
  904. self.add_dxil_op("RenderTargetGetSamplePosition", next_op_idx, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
  905. db_dxil_param(0, "SamplePos", "", "sample position"),
  906. db_dxil_param(2, "i32", "index", "zero-based sample index")])
  907. next_op_idx += 1
  908. self.add_dxil_op("RenderTargetGetSampleCount", next_op_idx, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
  909. db_dxil_param(0, "u32", "", "number of sampling locations for a render target")])
  910. next_op_idx += 1
  911. # Atomics. Note that on TGSM, atomics are performed with LLVM instructions.
  912. self.add_dxil_op("AtomicBinOp", next_op_idx, "AtomicBinOp", "performs an atomic operation on two operands", "li", "", [
  913. db_dxil_param(0, "$o", "", "the original value in the location updated"),
  914. db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
  915. db_dxil_param(3, "i32", "atomicOp", "atomic operation as per DXIL::AtomicBinOpCode"),
  916. db_dxil_param(4, "i32", "offset0", "offset in elements"),
  917. db_dxil_param(5, "i32", "offset1", "offset"),
  918. db_dxil_param(6, "i32", "offset2", "offset"),
  919. db_dxil_param(7, "$o", "newValue", "new value")],
  920. counters=('atomic',))
  921. next_op_idx += 1
  922. self.add_dxil_op("AtomicCompareExchange", next_op_idx, "AtomicCompareExchange", "atomic compare and exchange to memory", "li", "", [
  923. db_dxil_param(0, "$o", "", "the original value in the location updated"),
  924. db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
  925. db_dxil_param(3, "i32", "offset0", "offset in elements"),
  926. db_dxil_param(4, "i32", "offset1", "offset"),
  927. db_dxil_param(5, "i32", "offset2", "offset"),
  928. db_dxil_param(6, "$o", "compareValue", "value to compare for exchange"),
  929. db_dxil_param(7, "$o", "newValue", "new value")],
  930. counters=('atomic',))
  931. next_op_idx += 1
  932. # Synchronization.
  933. self.add_dxil_op("Barrier", next_op_idx, "Barrier", "inserts a memory barrier in the shader", "v", "nd", [
  934. retvoid_param,
  935. db_dxil_param(2, "i32", "barrierMode", "a mask of DXIL::BarrierMode values", is_const=True)],
  936. counters=('barrier',))
  937. next_op_idx += 1
  938. # Pixel shader
  939. self.add_dxil_op("CalculateLOD", next_op_idx, "CalculateLOD", "calculates the level of detail", "f", "ro", [
  940. db_dxil_param(0, "f", "", "level of detail"),
  941. db_dxil_param(2, "res", "handle", "resource handle"),
  942. db_dxil_param(3, "res", "sampler", "sampler handle"),
  943. db_dxil_param(4, "f", "coord0", "coordinate"),
  944. db_dxil_param(5, "f", "coord1", "coordinate"),
  945. db_dxil_param(6, "f", "coord2", "coordinate"),
  946. db_dxil_param(7, "i1", "clamped", "1 if clampled LOD should be calculated, 0 for unclamped")])
  947. next_op_idx += 1
  948. self.add_dxil_op("Discard", next_op_idx, "Discard", "discard the current pixel", "v", "", [
  949. retvoid_param,
  950. db_dxil_param(2, "i1", "condition", "condition for conditional discard")])
  951. next_op_idx += 1
  952. self.add_dxil_op("DerivCoarseX", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
  953. db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
  954. db_dxil_param(2, "$o", "value", "input to rate of change")])
  955. next_op_idx += 1
  956. self.add_dxil_op("DerivCoarseY", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
  957. db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
  958. db_dxil_param(2, "$o", "value", "input to rate of change")])
  959. next_op_idx += 1
  960. self.add_dxil_op("DerivFineX", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
  961. db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
  962. db_dxil_param(2, "$o", "value", "input to rate of change")])
  963. next_op_idx += 1
  964. self.add_dxil_op("DerivFineY", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
  965. db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
  966. db_dxil_param(2, "$o", "value", "input to rate of change")])
  967. next_op_idx += 1
  968. self.add_dxil_op("EvalSnapped", next_op_idx, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
  969. db_dxil_param(0, "$o", "", "result"),
  970. db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
  971. db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
  972. db_dxil_param(4, "i8", "inputColIndex", "column index of an input attribute"),
  973. db_dxil_param(5, "i32", "offsetX", "2D offset from the pixel center using a 16x16 grid"),
  974. db_dxil_param(6, "i32", "offsetY", "2D offset from the pixel center using a 16x16 grid")])
  975. next_op_idx += 1
  976. self.add_dxil_op("EvalSampleIndex", next_op_idx, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
  977. db_dxil_param(0, "$o", "", "result"),
  978. db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
  979. db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
  980. db_dxil_param(4, "i8", "inputColIndex", "column index of an input attribute"),
  981. db_dxil_param(5, "i32", "sampleIndex", "sample location")])
  982. next_op_idx += 1
  983. self.add_dxil_op("EvalCentroid", next_op_idx, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
  984. db_dxil_param(0, "$o", "", "result"),
  985. db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
  986. db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
  987. db_dxil_param(4, "i8", "inputColIndex", "column index of an input attribute")])
  988. next_op_idx += 1
  989. self.add_dxil_op("SampleIndex", next_op_idx, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
  990. db_dxil_param(0, "i32", "", "result")])
  991. next_op_idx += 1
  992. self.add_dxil_op("Coverage", next_op_idx, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
  993. db_dxil_param(0, "i32", "", "result")])
  994. next_op_idx += 1
  995. self.add_dxil_op("InnerCoverage", next_op_idx, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
  996. db_dxil_param(0, "i32", "", "result")])
  997. next_op_idx += 1
  998. # Compute shader.
  999. self.add_dxil_op("ThreadId", next_op_idx, "ThreadId", "reads the thread ID", "i", "rn", [
  1000. db_dxil_param(0, "i32", "", "thread ID component"),
  1001. db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
  1002. next_op_idx += 1
  1003. self.add_dxil_op("GroupId", next_op_idx, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
  1004. db_dxil_param(0, "i32", "", "group ID component"),
  1005. db_dxil_param(2, "i32", "component", "component to read")])
  1006. next_op_idx += 1
  1007. self.add_dxil_op("ThreadIdInGroup", next_op_idx, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
  1008. db_dxil_param(0, "i32", "", "thread ID in group component"),
  1009. db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
  1010. next_op_idx += 1
  1011. self.add_dxil_op("FlattenedThreadIdInGroup", next_op_idx, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
  1012. db_dxil_param(0, "i32", "", "result")])
  1013. next_op_idx += 1
  1014. # Geometry shader
  1015. self.add_dxil_op("EmitStream", next_op_idx, "EmitStream", "emits a vertex to a given stream", "v", "", [
  1016. retvoid_param,
  1017. db_dxil_param(2, "i8", "streamId", "target stream ID for operation")],
  1018. counters=('gs_emit',))
  1019. next_op_idx += 1
  1020. self.add_dxil_op("CutStream", next_op_idx, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
  1021. retvoid_param,
  1022. db_dxil_param(2, "i8", "streamId", "target stream ID for operation")],
  1023. counters=('gs_cut',))
  1024. next_op_idx += 1
  1025. self.add_dxil_op("EmitThenCutStream", next_op_idx, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
  1026. retvoid_param,
  1027. db_dxil_param(2, "i8", "streamId", "target stream ID for operation")],
  1028. counters=('gs_emit','gs_cut'))
  1029. next_op_idx += 1
  1030. self.add_dxil_op("GSInstanceID", next_op_idx, "GSInstanceID", "GSInstanceID", "i", "rn", [
  1031. db_dxil_param(0, "i32", "", "result")])
  1032. next_op_idx += 1
  1033. # Double precision
  1034. self.add_dxil_op("MakeDouble", next_op_idx, "MakeDouble", "creates a double value", "d", "rn", [
  1035. db_dxil_param(0, "d", "", "result"),
  1036. db_dxil_param(2, "i32", "lo", "low part of double"),
  1037. db_dxil_param(3, "i32", "hi", "high part of double")])
  1038. next_op_idx += 1
  1039. self.add_dxil_op("SplitDouble", next_op_idx, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
  1040. db_dxil_param(0, "splitdouble", "", "result"),
  1041. db_dxil_param(2, "d", "value", "value to split")])
  1042. next_op_idx += 1
  1043. # Domain & Hull shader.
  1044. self.add_dxil_op("LoadOutputControlPoint", next_op_idx, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
  1045. db_dxil_param(0, "$o", "", "result"),
  1046. db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
  1047. db_dxil_param(3, "i32", "row", "row, relative to the element"),
  1048. db_dxil_param(4, "i8", "col", "column, relative to the element"),
  1049. db_dxil_param(5, "i32", "index", "vertex/point index")],
  1050. counters=('sig_ld',))
  1051. next_op_idx += 1
  1052. self.add_dxil_op("LoadPatchConstant", next_op_idx, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
  1053. db_dxil_param(0, "$o", "", "result"),
  1054. db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
  1055. db_dxil_param(3, "i32", "row", "row, relative to the element"),
  1056. db_dxil_param(4, "i8", "col", "column, relative to the element")],
  1057. counters=('sig_ld',))
  1058. next_op_idx += 1
  1059. # Domain shader.
  1060. self.add_dxil_op("DomainLocation", next_op_idx, "DomainLocation", "DomainLocation", "f", "rn", [
  1061. db_dxil_param(0, "f", "", "result"),
  1062. db_dxil_param(2, "i8", "component", "input", is_const=True)])
  1063. next_op_idx += 1
  1064. # Hull shader.
  1065. self.add_dxil_op("StorePatchConstant", next_op_idx, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
  1066. retvoid_param,
  1067. db_dxil_param(2, "i32", "outputSigID", "output signature element ID"),
  1068. db_dxil_param(3, "i32", "row", "row, relative to the element"),
  1069. db_dxil_param(4, "i8", "col", "column, relative to the element"),
  1070. db_dxil_param(5, "$o", "value", "value to store")],
  1071. counters=('sig_st',))
  1072. next_op_idx += 1
  1073. self.add_dxil_op("OutputControlPointID", next_op_idx, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
  1074. db_dxil_param(0, "i32", "", "result")])
  1075. next_op_idx += 1
  1076. self.add_dxil_op("PrimitiveID", next_op_idx, "PrimitiveID", "PrimitiveID", "i", "rn", [
  1077. db_dxil_param(0, "i32", "", "result")])
  1078. next_op_idx += 1
  1079. self.add_dxil_op("CycleCounterLegacy", next_op_idx, "CycleCounterLegacy", "CycleCounterLegacy", "v", "", [
  1080. db_dxil_param(0, "twoi32", "", "result")])
  1081. next_op_idx += 1
  1082. # Add wave intrinsics.
  1083. self.add_dxil_op("WaveIsFirstLane", next_op_idx, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "", [
  1084. db_dxil_param(0, "i1", "", "operation result")])
  1085. next_op_idx += 1
  1086. self.add_dxil_op("WaveGetLaneIndex", next_op_idx, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "rn", [
  1087. db_dxil_param(0, "i32", "", "operation result")])
  1088. next_op_idx += 1
  1089. self.add_dxil_op("WaveGetLaneCount", next_op_idx, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "rn", [
  1090. db_dxil_param(0, "i32", "", "operation result")])
  1091. next_op_idx += 1
  1092. self.add_dxil_op("WaveAnyTrue", next_op_idx, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "", [
  1093. db_dxil_param(0, "i1", "", "operation result"),
  1094. db_dxil_param(2, "i1", "cond", "condition to test")])
  1095. next_op_idx += 1
  1096. self.add_dxil_op("WaveAllTrue", next_op_idx, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "", [
  1097. db_dxil_param(0, "i1", "", "operation result"),
  1098. db_dxil_param(2, "i1", "cond", "condition to test")])
  1099. next_op_idx += 1
  1100. self.add_dxil_op("WaveActiveAllEqual", next_op_idx, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "", [
  1101. db_dxil_param(0, "i1", "", "operation result"),
  1102. db_dxil_param(2, "$o", "value", "value to compare")])
  1103. next_op_idx += 1
  1104. self.add_dxil_op("WaveActiveBallot", next_op_idx, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "", [
  1105. db_dxil_param(0, "fouri32", "", "operation result"),
  1106. db_dxil_param(2, "i1", "cond", "condition to ballot on")])
  1107. next_op_idx += 1
  1108. self.add_dxil_op("WaveReadLaneAt", next_op_idx, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "", [
  1109. db_dxil_param(0, "$o", "", "operation result"),
  1110. db_dxil_param(2, "$o", "value", "value to read"),
  1111. db_dxil_param(3, "i32", "lane", "lane index")])
  1112. next_op_idx += 1
  1113. self.add_dxil_op("WaveReadLaneFirst", next_op_idx, "WaveReadLaneFirst", "returns the value from the first lane", "hfd18wil", "", [
  1114. db_dxil_param(0, "$o", "", "operation result"),
  1115. db_dxil_param(2, "$o", "value", "value to read")])
  1116. next_op_idx += 1
  1117. self.add_dxil_op("WaveActiveOp", next_op_idx, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "", [
  1118. db_dxil_param(0, "$o", "", "operation result"),
  1119. db_dxil_param(2, "$o", "value", "input value"),
  1120. db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveOpKind", is_const=True),
  1121. db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
  1122. next_op_idx += 1
  1123. self.add_enum_type("SignedOpKind", "Sign vs. unsigned operands for operation", [
  1124. (0, "Signed", "signed integer or floating-point operands"),
  1125. (1, "Unsigned", "unsigned integer operands")])
  1126. self.add_enum_type("WaveOpKind", "Kind of cross-lane operation", [
  1127. (0, "Sum", "sum of values"),
  1128. (1, "Product", "product of values"),
  1129. (2, "Min", "minimum value"),
  1130. (3, "Max", "maximum value")])
  1131. self.add_dxil_op("WaveActiveBit", next_op_idx, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "", [
  1132. db_dxil_param(0, "$o", "", "operation result"),
  1133. db_dxil_param(2, "$o", "value", "input value"),
  1134. db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveBitOpKind", is_const=True)])
  1135. next_op_idx += 1
  1136. self.add_enum_type("WaveBitOpKind", "Kind of bitwise cross-lane operation", [
  1137. (0, "And", "bitwise and of values"),
  1138. (1, "Or", "bitwise or of values"),
  1139. (2, "Xor", "bitwise xor of values")])
  1140. self.add_dxil_op("WavePrefixOp", next_op_idx, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "", [
  1141. db_dxil_param(0, "$o", "", "operation result"),
  1142. db_dxil_param(2, "$o", "value", "input value"),
  1143. db_dxil_param(3, "i8", "op", "0=sum,1=product", enum_name="WaveOpKind", is_const=True),
  1144. db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
  1145. next_op_idx += 1
  1146. self.add_dxil_op("QuadReadLaneAt", next_op_idx, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "", [
  1147. db_dxil_param(0, "$o", "", "operation result"),
  1148. db_dxil_param(2, "$o", "value", "value to read"),
  1149. db_dxil_param(3, "u32", "quadLane", "lane to read from (0-4)", max_value = 3, is_const=True)])
  1150. next_op_idx += 1
  1151. self.add_enum_type("QuadOpKind", "Kind of quad-level operation", [
  1152. (0, "ReadAcrossX", "returns the value from the other lane in the quad in the horizontal direction"),
  1153. (1, "ReadAcrossY", "returns the value from the other lane in the quad in the vertical direction"),
  1154. (2, "ReadAcrossDiagonal", "returns the value from the lane across the quad in horizontal and vertical direction")])
  1155. self.add_dxil_op("QuadOp", next_op_idx, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "", [
  1156. db_dxil_param(0, "$o", "", "operation result"),
  1157. db_dxil_param(2, "$o", "value", "value for operation"),
  1158. db_dxil_param(3, "i8", "op", "operation", enum_name = "QuadOpKind", is_const=True)])
  1159. next_op_idx += 1
  1160. # Add bitcasts
  1161. self.add_dxil_op("BitcastI16toF16", next_op_idx, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
  1162. db_dxil_param(0, "h", "", "operation result"),
  1163. db_dxil_param(2, "i16", "value", "input value")])
  1164. next_op_idx += 1
  1165. self.add_dxil_op("BitcastF16toI16", next_op_idx, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
  1166. db_dxil_param(0, "i16", "", "operation result"),
  1167. db_dxil_param(2, "h", "value", "input value")])
  1168. next_op_idx += 1
  1169. self.add_dxil_op("BitcastI32toF32", next_op_idx, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
  1170. db_dxil_param(0, "f", "", "operation result"),
  1171. db_dxil_param(2, "i32", "value", "input value")])
  1172. next_op_idx += 1
  1173. self.add_dxil_op("BitcastF32toI32", next_op_idx, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
  1174. db_dxil_param(0, "i32", "", "operation result"),
  1175. db_dxil_param(2, "f", "value", "input value")])
  1176. next_op_idx += 1
  1177. self.add_dxil_op("BitcastI64toF64", next_op_idx, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
  1178. db_dxil_param(0, "d", "", "operation result"),
  1179. db_dxil_param(2, "i64", "value", "input value")])
  1180. next_op_idx += 1
  1181. self.add_dxil_op("BitcastF64toI64", next_op_idx, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
  1182. db_dxil_param(0, "i64", "", "operation result"),
  1183. db_dxil_param(2, "d", "value", "input value")])
  1184. next_op_idx += 1
  1185. self.add_dxil_op("LegacyF32ToF16", next_op_idx, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
  1186. db_dxil_param(0, "i32", "", "low 16 bits - half value, high 16 bits - zeroes"),
  1187. db_dxil_param(2, "f", "value", "float value to convert")])
  1188. next_op_idx += 1
  1189. self.add_dxil_op("LegacyF16ToF32", next_op_idx, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
  1190. db_dxil_param(0, "f", "", "converted float value"),
  1191. db_dxil_param(2, "i32", "value", "half value to convert")])
  1192. next_op_idx += 1
  1193. self.add_dxil_op("LegacyDoubleToFloat", next_op_idx, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
  1194. db_dxil_param(0, "f", "", "float value"),
  1195. db_dxil_param(2, "d", "value", "double value to convert")])
  1196. next_op_idx += 1
  1197. self.add_dxil_op("LegacyDoubleToSInt32", next_op_idx, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
  1198. db_dxil_param(0, "i32", "", "i32 value"),
  1199. db_dxil_param(2, "d", "value", "double value to convert")])
  1200. next_op_idx += 1
  1201. self.add_dxil_op("LegacyDoubleToUInt32", next_op_idx, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
  1202. db_dxil_param(0, "i32", "", "i32 value"),
  1203. db_dxil_param(2, "d", "value", "double value to convert")])
  1204. next_op_idx += 1
  1205. self.add_dxil_op("WaveAllBitCount", next_op_idx, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "", [
  1206. db_dxil_param(0, "i32", "", "operation result"),
  1207. db_dxil_param(2, "i1", "value", "input value")])
  1208. next_op_idx += 1
  1209. # WavePrefixBitCount has different signature compare to WavePrefixOp, set its opclass to WavePrefixOp is not correct.
  1210. # It works now because WavePrefixOp and WavePrefixBitCount don't interfere on overload types.
  1211. # Keep it unchanged for back-compat.
  1212. self.add_dxil_op("WavePrefixBitCount", next_op_idx, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "", [
  1213. db_dxil_param(0, "i32", "", "operation result"),
  1214. db_dxil_param(2, "i1", "value", "input value")])
  1215. next_op_idx += 1
  1216. # End of DXIL 1.0 opcodes.
  1217. self.set_op_count_for_version(1, 0, next_op_idx)
  1218. self.add_dxil_op("AttributeAtVertex", next_op_idx, "AttributeAtVertex", "returns the values of the attributes at the vertex.", "hfiw", "rn", [
  1219. db_dxil_param(0, "$o", "", "result"),
  1220. db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
  1221. db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
  1222. db_dxil_param(4, "i8", "inputColIndex", "column index of an input attribute"),
  1223. db_dxil_param(5, "i8", "VertexID", "Vertex Index")
  1224. ])
  1225. next_op_idx += 1
  1226. self.add_dxil_op("ViewID", next_op_idx, "ViewID", "returns the view index", "i", "rn", [
  1227. db_dxil_param(0, "i32", "", "result")])
  1228. next_op_idx += 1
  1229. # End of DXIL 1.1 opcodes.
  1230. self.set_op_count_for_version(1, 1, next_op_idx)
  1231. self.add_dxil_op("RawBufferLoad", next_op_idx, "RawBufferLoad", "reads from a raw buffer and structured buffer", "hfwidl", "ro", [
  1232. db_dxil_param(0, "$r", "", "the loaded value"),
  1233. db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
  1234. db_dxil_param(3, "i32", "index", "element index for StructuredBuffer, or byte offset for ByteAddressBuffer"),
  1235. db_dxil_param(4, "i32", "elementOffset", "offset into element for StructuredBuffer, or undef for ByteAddressBuffer"),
  1236. db_dxil_param(5, "i8", "mask", "loading value mask", is_const=True),
  1237. db_dxil_param(6, "i32", "alignment", "relative load access alignment", is_const=True)],
  1238. counters=('tex_load',))
  1239. next_op_idx += 1
  1240. self.add_dxil_op("RawBufferStore", next_op_idx, "RawBufferStore", "writes to a RWByteAddressBuffer or RWStructuredBuffer", "hfwidl", "", [
  1241. db_dxil_param(0, "v", "", ""),
  1242. db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
  1243. db_dxil_param(3, "i32", "index", "element index for StructuredBuffer, or byte offset for ByteAddressBuffer"),
  1244. db_dxil_param(4, "i32", "elementOffset", "offset into element for StructuredBuffer, or undef for ByteAddressBuffer"),
  1245. db_dxil_param(5, "$o", "value0", "value"),
  1246. db_dxil_param(6, "$o", "value1", "value"),
  1247. db_dxil_param(7, "$o", "value2", "value"),
  1248. db_dxil_param(8, "$o", "value3", "value"),
  1249. db_dxil_param(9, "i8", "mask", "mask of contiguous components stored starting at first component (valid: 1, 3, 7, 15)", is_const=True),
  1250. db_dxil_param(10, "i32", "alignment", "relative store access alignment", is_const=True)],
  1251. counters=('tex_store',))
  1252. next_op_idx += 1
  1253. # End of DXIL 1.2 opcodes.
  1254. self.set_op_count_for_version(1, 2, next_op_idx)
  1255. assert next_op_idx == 141, "next operation index is %d rather than 141 and thus opcodes are broken" % next_op_idx
  1256. self.add_dxil_op("InstanceID", next_op_idx, "InstanceID", "The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure", "i", "rn", [
  1257. db_dxil_param(0, "i32", "", "result")])
  1258. next_op_idx += 1
  1259. self.add_dxil_op("InstanceIndex", next_op_idx, "InstanceIndex", "The autogenerated index of the current instance in the top-level structure", "i", "rn", [
  1260. db_dxil_param(0, "i32", "", "result")])
  1261. next_op_idx += 1
  1262. self.add_dxil_op("HitKind", next_op_idx, "HitKind", "Returns the value passed as HitKind in ReportIntersection(). If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.", "i", "rn", [
  1263. db_dxil_param(0, "i32", "", "result")])
  1264. next_op_idx += 1
  1265. self.add_dxil_op("RayFlags", next_op_idx, "RayFlags", "uint containing the current ray flags.", "i", "rn", [
  1266. db_dxil_param(0, "i32", "", "result")])
  1267. next_op_idx += 1
  1268. self.add_dxil_op("DispatchRaysIndex", next_op_idx, "DispatchRaysIndex", "The current x and y location within the Width and Height", "i", "rn", [
  1269. db_dxil_param(0, "i32", "", "result"),
  1270. db_dxil_param(2, "i8", "col", "column, relative to the element")])
  1271. next_op_idx += 1
  1272. self.add_dxil_op("DispatchRaysDimensions", next_op_idx, "DispatchRaysDimensions", "The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.", "i", "rn", [
  1273. db_dxil_param(0, "i32", "", "result"),
  1274. db_dxil_param(2, "i8", "col", "column, relative to the element")])
  1275. next_op_idx += 1
  1276. self.add_dxil_op("WorldRayOrigin", next_op_idx, "WorldRayOrigin", "The world-space origin for the current ray.", "f", "rn", [
  1277. db_dxil_param(0, "f", "", "result"),
  1278. db_dxil_param(2, "i8", "col", "column, relative to the element")])
  1279. next_op_idx += 1
  1280. self.add_dxil_op("WorldRayDirection", next_op_idx, "WorldRayDirection", "The world-space direction for the current ray.", "f", "rn", [
  1281. db_dxil_param(0, "f", "", "result"),
  1282. db_dxil_param(2, "i8", "col", "column, relative to the element")])
  1283. next_op_idx += 1
  1284. self.add_dxil_op("ObjectRayOrigin", next_op_idx, "ObjectRayOrigin", "Object-space origin for the current ray.", "f", "rn", [
  1285. db_dxil_param(0, "f", "", "result"),
  1286. db_dxil_param(2, "i8", "col", "column, relative to the element")])
  1287. next_op_idx += 1
  1288. self.add_dxil_op("ObjectRayDirection", next_op_idx, "ObjectRayDirection", "Object-space direction for the current ray.", "f", "rn", [
  1289. db_dxil_param(0, "f", "", "result"),
  1290. db_dxil_param(2, "i8", "col", "column, relative to the element")])
  1291. next_op_idx += 1
  1292. self.add_dxil_op("ObjectToWorld", next_op_idx, "ObjectToWorld", "Matrix for transforming from object-space to world-space.", "f", "rn", [
  1293. db_dxil_param(0, "f", "", "result"),
  1294. db_dxil_param(2, "i32", "row", "row, relative to the element"),
  1295. db_dxil_param(3, "i8", "col", "column, relative to the element")])
  1296. next_op_idx += 1
  1297. self.add_dxil_op("WorldToObject", next_op_idx, "WorldToObject", "Matrix for transforming from world-space to object-space.", "f", "rn", [
  1298. db_dxil_param(0, "f", "", "result"),
  1299. db_dxil_param(2, "i32", "row", "row, relative to the element"),
  1300. db_dxil_param(3, "i8", "col", "column, relative to the element")])
  1301. next_op_idx += 1
  1302. self.add_dxil_op("RayTMin", next_op_idx, "RayTMin", "float representing the parametric starting point for the ray.", "f", "rn", [
  1303. db_dxil_param(0, "f", "", "result")])
  1304. next_op_idx += 1
  1305. self.add_dxil_op("RayTCurrent", next_op_idx, "RayTCurrent", "float representing the current parametric ending point for the ray", "f", "ro", [
  1306. db_dxil_param(0, "f", "", "result")])
  1307. next_op_idx += 1
  1308. self.add_dxil_op("IgnoreHit", next_op_idx, "IgnoreHit", "Used in an any hit shader to reject an intersection and terminate the shader", "v", "nr", [
  1309. db_dxil_param(0, "v", "", "")])
  1310. next_op_idx += 1
  1311. self.add_dxil_op("AcceptHitAndEndSearch", next_op_idx, "AcceptHitAndEndSearch", "Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far", "v", "nr", [
  1312. db_dxil_param(0, "v", "", "")])
  1313. next_op_idx += 1
  1314. self.add_dxil_op("TraceRay", next_op_idx, "TraceRay", "initiates raytrace", "u", "", [
  1315. db_dxil_param(0, "v", "", ""),
  1316. db_dxil_param(2, "res", "AccelerationStructure", "Top-level acceleration structure to use"),
  1317. db_dxil_param(3, "i32", "RayFlags", "Valid combination of Ray_flags"),
  1318. db_dxil_param(4, "i32", "InstanceInclusionMask", "Bottom 8 bits of InstanceInclusionMask are used to include/rejectgeometry instances based on the InstanceMask in each instance: if(!((InstanceInclusionMask & InstanceMask) & 0xff)) { ignore intersection }"),
  1319. db_dxil_param(5, "i32", "RayContributionToHitGroupIndex", "Offset to add into Addressing calculations within shader tables for hit group indexing. Only the bottom 4 bits of this value are used"),
  1320. db_dxil_param(6, "i32", "MultiplierForGeometryContributionToShaderIndex", "Stride to multiply by per-geometry GeometryContributionToHitGroupIndex in Addressing calculations within shader tables for hit group indexing. Only the bottom 4 bits of this value are used"),
  1321. db_dxil_param(7, "i32", "MissShaderIndex", "Miss shader index in Addressing calculations within shader tables. Only the bottom 16 bits of this value are used"),
  1322. db_dxil_param(8, "f", "Origin_X", "Origin x of the ray"),
  1323. db_dxil_param(9, "f", "Origin_Y", "Origin y of the ray"),
  1324. db_dxil_param(10, "f", "Origin_Z", "Origin z of the ray"),
  1325. db_dxil_param(11, "f", "TMin", "Tmin of the ray"),
  1326. db_dxil_param(12, "f", "Direction_X", "Direction x of the ray"),
  1327. db_dxil_param(13, "f", "Direction_Y", "Direction y of the ray"),
  1328. db_dxil_param(14, "f", "Direction_Z", "Direction z of the ray"),
  1329. db_dxil_param(15, "f", "TMax", "Tmax of the ray"),
  1330. db_dxil_param(16, "udt", "payload", "User-defined intersection attribute structure")])
  1331. next_op_idx += 1
  1332. self.add_dxil_op("ReportHit", next_op_idx, "ReportHit", "returns true if hit was accepted", "u", "", [
  1333. db_dxil_param(0, "i1", "", "result"),
  1334. db_dxil_param(2, "f", "THit", "parametric distance of the intersection"),
  1335. db_dxil_param(3, "i32", "HitKind", "User-specified value in range of 0-127 to identify the type of hit. Read by any_hit or closes_hit shaders with HitKind()"),
  1336. db_dxil_param(4, "udt", "Attributes", "User-defined intersection attribute structure")])
  1337. next_op_idx += 1
  1338. self.add_dxil_op("CallShader", next_op_idx, "CallShader", "Call a shader in the callable shader table supplied through the DispatchRays() API", "u", "", [
  1339. db_dxil_param(0, "v", "", "result"),
  1340. db_dxil_param(2, "i32", "ShaderIndex", "Provides index into the callable shader table supplied through the DispatchRays() API"),
  1341. db_dxil_param(3, "udt", "Parameter", "User-defined parameters to pass to the callable shader,This parameter structure must match the parameter structure used in the callable shader pointed to in the shader table")])
  1342. next_op_idx += 1
  1343. self.add_dxil_op("CreateHandleForLib", next_op_idx, "CreateHandleForLib", "create resource handle from resource struct for library", "o", "ro", [
  1344. db_dxil_param(0, "res", "", "result"),
  1345. db_dxil_param(2, "obj", "Resource", "resource to create the handle")])
  1346. next_op_idx += 1
  1347. # Maps to PrimitiveIndex() intrinsics for raytracing (same meaning as PrimitiveID)
  1348. self.add_dxil_op("PrimitiveIndex", next_op_idx, "PrimitiveIndex", "PrimitiveIndex for raytracing shaders", "i", "rn", [
  1349. db_dxil_param(0, "i32", "", "result")])
  1350. next_op_idx += 1
  1351. # End of DXIL 1.3 opcodes.
  1352. self.set_op_count_for_version(1, 3, next_op_idx)
  1353. assert next_op_idx == 162, "next operation index is %d rather than 162 and thus opcodes are broken" % next_op_idx
  1354. self.add_dxil_op("Dot2AddHalf", next_op_idx, "Dot2AddHalf", "2D half dot product with accumulate to float", "f", "rn", [
  1355. db_dxil_param(0, "$o", "", "accumulated result"),
  1356. db_dxil_param(2, "$o", "acc", "input accumulator"),
  1357. db_dxil_param(3, "h", "ax", "the first component of the first vector"),
  1358. db_dxil_param(4, "h", "ay", "the second component of the first vector"),
  1359. db_dxil_param(5, "h", "bx", "the first component of the second vector"),
  1360. db_dxil_param(6, "h", "by", "the second component of the second vector")],
  1361. counters=('floats',))
  1362. next_op_idx += 1
  1363. self.add_dxil_op("Dot4AddI8Packed", next_op_idx, "Dot4AddPacked", "signed dot product of 4 x i8 vectors packed into i32, with accumulate to i32", "i", "rn", [
  1364. db_dxil_param(0, "i32", "", "accumulated result"),
  1365. db_dxil_param(2, "i32", "acc", "input accumulator"),
  1366. db_dxil_param(3, "i32", "a", "first packed 4 x i8 for dot product"),
  1367. db_dxil_param(4, "i32", "b", "second packed 4 x i8 for dot product")],
  1368. counters=('ints',))
  1369. next_op_idx += 1
  1370. self.add_dxil_op("Dot4AddU8Packed", next_op_idx, "Dot4AddPacked", "unsigned dot product of 4 x u8 vectors packed into i32, with accumulate to i32", "i", "rn", [
  1371. db_dxil_param(0, "i32", "", "accumulated result"),
  1372. db_dxil_param(2, "i32", "acc", "input accumulator"),
  1373. db_dxil_param(3, "i32", "a", "first packed 4 x u8 for dot product"),
  1374. db_dxil_param(4, "i32", "b", "second packed 4 x u8 for dot product")],
  1375. counters=('uints',))
  1376. next_op_idx += 1
  1377. # End of DXIL 1.4 opcodes.
  1378. self.set_op_count_for_version(1, 4, next_op_idx)
  1379. assert next_op_idx == 165, "next operation index is %d rather than 165 and thus opcodes are broken" % next_op_idx
  1380. self.add_dxil_op("WaveMatch", next_op_idx, "WaveMatch", "returns the bitmask of active lanes that have the same value", "hfd8wil", "", [
  1381. db_dxil_param(0, "fouri32", "", "operation result"),
  1382. db_dxil_param(2, "$o", "value", "input value")])
  1383. next_op_idx += 1
  1384. self.add_dxil_op("WaveMultiPrefixOp", next_op_idx, "WaveMultiPrefixOp", "returns the result of the operation on groups of lanes identified by a bitmask", "hfd8wil", "", [
  1385. db_dxil_param(0, "$o", "", "operation result"),
  1386. db_dxil_param(2, "$o", "value", "input value"),
  1387. db_dxil_param(3, "i32", "mask0", "mask 0"),
  1388. db_dxil_param(4, "i32", "mask1", "mask 1"),
  1389. db_dxil_param(5, "i32", "mask2", "mask 2"),
  1390. db_dxil_param(6, "i32", "mask3", "mask 3"),
  1391. db_dxil_param(7, "i8", "op", "operation", enum_name="WaveMultiPrefixOpKind", is_const=True),
  1392. db_dxil_param(8, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
  1393. next_op_idx += 1
  1394. self.add_enum_type("WaveMultiPrefixOpKind", "Kind of cross-lane for multi-prefix operation", [
  1395. (0, "Sum", "sum of values"),
  1396. (1, "And", "bitwise and of values"),
  1397. (2, "Or", "bitwise or of values"),
  1398. (3, "Xor", "bitwise xor of values"),
  1399. (4, "Product", "product of values")])
  1400. self.add_dxil_op("WaveMultiPrefixBitCount", next_op_idx, "WaveMultiPrefixBitCount", "returns the count of bits set to 1 on groups of lanes identified by a bitmask", "v", "", [
  1401. db_dxil_param(0, "i32", "", "operation result"),
  1402. db_dxil_param(2, "i1", "value", "input value"),
  1403. db_dxil_param(3, "i32", "mask0", "mask 0"),
  1404. db_dxil_param(4, "i32", "mask1", "mask 1"),
  1405. db_dxil_param(5, "i32", "mask2", "mask 2"),
  1406. db_dxil_param(6, "i32", "mask3", "mask 3")])
  1407. next_op_idx += 1
  1408. # Mesh Shader
  1409. self.add_dxil_op("SetMeshOutputCounts", next_op_idx, "SetMeshOutputCounts", "Mesh shader intrinsic SetMeshOutputCounts", "v", "", [
  1410. retvoid_param,
  1411. db_dxil_param(2, "i32", "numVertices", "number of output vertices"),
  1412. db_dxil_param(3, "i32", "numPrimitives", "number of output primitives")])
  1413. next_op_idx += 1
  1414. self.add_dxil_op("EmitIndices", next_op_idx, "EmitIndices", "emit a primitive's vertex indices in a mesh shader", "v", "", [
  1415. retvoid_param,
  1416. db_dxil_param(2, "u32", "PrimitiveIndex", "a primitive's index"),
  1417. db_dxil_param(3, "u32", "VertexIndex0", "a primitive's first vertex index"),
  1418. db_dxil_param(4, "u32", "VertexIndex1", "a primitive's second vertex index"),
  1419. db_dxil_param(5, "u32", "VertexIndex2", "a primitive's third vertex index")])
  1420. next_op_idx += 1
  1421. self.add_dxil_op("GetMeshPayload", next_op_idx, "GetMeshPayload", "get the mesh payload which is from amplification shader", "u", "ro", [
  1422. db_dxil_param(0, "$o", "", "mesh payload result")])
  1423. next_op_idx += 1
  1424. self.add_dxil_op("StoreVertexOutput", next_op_idx, "StoreVertexOutput", "stores the value to mesh shader vertex output", "hfwi", "", [
  1425. retvoid_param,
  1426. db_dxil_param(2, "u32", "outputSigId", "vertex output signature element ID"),
  1427. db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
  1428. db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
  1429. db_dxil_param(5, "$o", "value", "value to store"),
  1430. db_dxil_param(6, "u32", "vertexIndex", "vertex index")],
  1431. counters=('sig_st',))
  1432. next_op_idx += 1
  1433. self.add_dxil_op("StorePrimitiveOutput", next_op_idx, "StorePrimitiveOutput", "stores the value to mesh shader primitive output", "hfwi", "", [
  1434. retvoid_param,
  1435. db_dxil_param(2, "u32", "outputSigId", "primitive output signature element ID"),
  1436. db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
  1437. db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
  1438. db_dxil_param(5, "$o", "value", "value to store"),
  1439. db_dxil_param(6, "u32", "primitiveIndex", "primitive index")],
  1440. counters=('sig_st',))
  1441. next_op_idx += 1
  1442. # Amplification Shader
  1443. self.add_dxil_op("DispatchMesh", next_op_idx, "DispatchMesh", "Amplification shader intrinsic DispatchMesh", "u", "", [
  1444. retvoid_param,
  1445. db_dxil_param(2, "i32", "threadGroupCountX", "thread group count x"),
  1446. db_dxil_param(3, "i32", "threadGroupCountY", "thread group count y"),
  1447. db_dxil_param(4, "i32", "threadGroupCountZ", "thread group count z"),
  1448. db_dxil_param(5, "$o", "payload", "payload")])
  1449. next_op_idx += 1
  1450. # Sampler feedback
  1451. self.add_dxil_op("WriteSamplerFeedback", next_op_idx, "WriteSamplerFeedback", "updates a feedback texture for a sampling operation", "v", "", [
  1452. db_dxil_param(0, "v", "", ""),
  1453. db_dxil_param(2, "res", "feedbackTex", "handle of feedback texture UAV"),
  1454. db_dxil_param(3, "res", "sampledTex", "handled of sampled texture SRV"),
  1455. db_dxil_param(4, "res", "sampler", "handle of sampler"),
  1456. db_dxil_param(5, "f", "c0", "coordinate c0"),
  1457. db_dxil_param(6, "f", "c1", "coordinate c1"),
  1458. db_dxil_param(7, "f", "c2", "coordinate c2"),
  1459. db_dxil_param(8, "f", "c3", "coordinate c3"),
  1460. db_dxil_param(9, "f", "clamp", "clamp")],
  1461. counters=('tex_store',))
  1462. next_op_idx += 1
  1463. self.add_dxil_op("WriteSamplerFeedbackBias", next_op_idx, "WriteSamplerFeedbackBias", "updates a feedback texture for a sampling operation with a bias on the mipmap level", "v", "", [
  1464. db_dxil_param(0, "v", "", ""),
  1465. db_dxil_param(2, "res", "feedbackTex", "handle of feedback texture UAV"),
  1466. db_dxil_param(3, "res", "sampledTex", "handled of sampled texture SRV"),
  1467. db_dxil_param(4, "res", "sampler", "handle of sampler"),
  1468. db_dxil_param(5, "f", "c0", "coordinate c0"),
  1469. db_dxil_param(6, "f", "c1", "coordinate c1"),
  1470. db_dxil_param(7, "f", "c2", "coordinate c2"),
  1471. db_dxil_param(8, "f", "c3", "coordinate c3"),
  1472. db_dxil_param(9, "f", "bias", "bias in [-16.f,15.99f]"),
  1473. db_dxil_param(10, "f", "clamp", "clamp")],
  1474. counters=('tex_store',))
  1475. next_op_idx += 1
  1476. self.add_dxil_op("WriteSamplerFeedbackLevel", next_op_idx, "WriteSamplerFeedbackLevel", "updates a feedback texture for a sampling operation with a mipmap-level offset", "v", "", [
  1477. db_dxil_param(0, "v", "", ""),
  1478. db_dxil_param(2, "res", "feedbackTex", "handle of feedback texture UAV"),
  1479. db_dxil_param(3, "res", "sampledTex", "handled of sampled texture SRV"),
  1480. db_dxil_param(4, "res", "sampler", "handle of sampler"),
  1481. db_dxil_param(5, "f", "c0", "coordinate c0"),
  1482. db_dxil_param(6, "f", "c1", "coordinate c1"),
  1483. db_dxil_param(7, "f", "c2", "coordinate c2"),
  1484. db_dxil_param(8, "f", "c3", "coordinate c3"),
  1485. db_dxil_param(9, "f", "lod", "LOD")],
  1486. counters=('tex_store',))
  1487. next_op_idx += 1
  1488. self.add_dxil_op("WriteSamplerFeedbackGrad", next_op_idx, "WriteSamplerFeedbackGrad", "updates a feedback texture for a sampling operation with explicit gradients", "v", "", [
  1489. db_dxil_param(0, "v", "", ""),
  1490. db_dxil_param(2, "res", "feedbackTex", "handle of feedback texture UAV"),
  1491. db_dxil_param(3, "res", "sampledTex", "handled of sampled texture SRV"),
  1492. db_dxil_param(4, "res", "sampler", "handle of sampler"),
  1493. db_dxil_param(5, "f", "c0", "coordinate c0"),
  1494. db_dxil_param(6, "f", "c1", "coordinate c1"),
  1495. db_dxil_param(7, "f", "c2", "coordinate c2"),
  1496. db_dxil_param(8, "f", "c3", "coordinate c3"),
  1497. db_dxil_param(9, "f", "ddx0", "rate of change of coordinate c0 in the x direction"),
  1498. db_dxil_param(10, "f", "ddx1", "rate of change of coordinate c1 in the x direction"),
  1499. db_dxil_param(11, "f", "ddx2", "rate of change of coordinate c2 in the x direction"),
  1500. db_dxil_param(12, "f", "ddy0", "rate of change of coordinate c0 in the y direction"),
  1501. db_dxil_param(13, "f", "ddy1", "rate of change of coordinate c1 in the y direction"),
  1502. db_dxil_param(14, "f", "ddy2", "rate of change of coordinate c2 in the y direction"),
  1503. db_dxil_param(15, "f", "clamp", "clamp")],
  1504. counters=('tex_store',))
  1505. next_op_idx += 1
  1506. # RayQuery
  1507. self.add_dxil_op("AllocateRayQuery", next_op_idx, "AllocateRayQuery", "allocates space for RayQuery and return handle", "v", "", [
  1508. db_dxil_param(0, "i32", "", "handle to RayQuery state"),
  1509. db_dxil_param(2, "u32", "constRayFlags", "Valid combination of RAY_FLAGS", is_const=True)])
  1510. next_op_idx += 1
  1511. self.add_dxil_op("RayQuery_TraceRayInline", next_op_idx, "RayQuery_TraceRayInline", "initializes RayQuery for raytrace", "v", "", [
  1512. db_dxil_param(0, "v", "", ""),
  1513. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1514. db_dxil_param(3, "res", "accelerationStructure", "Top-level acceleration structure to use"),
  1515. db_dxil_param(4, "i32", "rayFlags", "Valid combination of RAY_FLAGS, combined with constRayFlags provided to AllocateRayQuery"),
  1516. db_dxil_param(5, "i32", "instanceInclusionMask", "Bottom 8 bits of InstanceInclusionMask are used to include/rejectgeometry instances based on the InstanceMask in each instance: if(!((InstanceInclusionMask & InstanceMask) & 0xff)) { ignore intersection }"),
  1517. db_dxil_param(6, "f", "origin_X", "Origin x of the ray"),
  1518. db_dxil_param(7, "f", "origin_Y", "Origin y of the ray"),
  1519. db_dxil_param(8, "f", "origin_Z", "Origin z of the ray"),
  1520. db_dxil_param(9, "f", "tMin", "Tmin of the ray"),
  1521. db_dxil_param(10, "f", "direction_X", "Direction x of the ray"),
  1522. db_dxil_param(11, "f", "direction_Y", "Direction y of the ray"),
  1523. db_dxil_param(12, "f", "direction_Z", "Direction z of the ray"),
  1524. db_dxil_param(13, "f", "tMax", "Tmax of the ray")])
  1525. next_op_idx += 1
  1526. self.add_dxil_op("RayQuery_Proceed", next_op_idx, "RayQuery_Proceed", "advances a ray query", "1", "", [
  1527. db_dxil_param(0, "i1", "", "operation result"),
  1528. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1529. next_op_idx += 1
  1530. self.add_dxil_op("RayQuery_Abort", next_op_idx, "RayQuery_Abort", "aborts a ray query", "v", "", [
  1531. db_dxil_param(0, "v", "", ""),
  1532. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1533. next_op_idx += 1
  1534. self.add_dxil_op("RayQuery_CommitNonOpaqueTriangleHit", next_op_idx, "RayQuery_CommitNonOpaqueTriangleHit", "commits a non opaque triangle hit", "v", "", [
  1535. db_dxil_param(0, "v", "", ""),
  1536. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1537. next_op_idx += 1
  1538. self.add_dxil_op("RayQuery_CommitProceduralPrimitiveHit", next_op_idx, "RayQuery_CommitProceduralPrimitiveHit", "commits a procedural primitive hit", "v", "", [
  1539. db_dxil_param(0, "v", "", ""),
  1540. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1541. db_dxil_param(3, "f", "t", "Procedural primitive hit distance (t) to commit.")])
  1542. next_op_idx += 1
  1543. self.add_dxil_op("RayQuery_CommittedStatus", next_op_idx, "RayQuery_StateScalar", "returns uint status (COMMITTED_STATUS) of the committed hit in a ray query", "i", "ro", [
  1544. db_dxil_param(0, "i32", "", "operation result"),
  1545. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1546. next_op_idx += 1
  1547. self.add_dxil_op("RayQuery_CandidateType", next_op_idx, "RayQuery_StateScalar", "returns uint candidate type (CANDIDATE_TYPE) of the current hit candidate in a ray query, after Proceed() has returned true", "i", "ro", [
  1548. db_dxil_param(0, "i32", "", "operation result"),
  1549. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1550. next_op_idx += 1
  1551. self.add_dxil_op("RayQuery_CandidateObjectToWorld3x4", next_op_idx, "RayQuery_StateMatrix", "returns matrix for transforming from object-space to world-space for a candidate hit.", "f", "ro", [
  1552. db_dxil_param(0, "f", "", "operation result"),
  1553. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1554. db_dxil_param(3, "i32", "row", "row [0..2], relative to the element"),
  1555. db_dxil_param(4, "i8", "col", "column [0..3], relative to the element")])
  1556. next_op_idx += 1
  1557. self.add_dxil_op("RayQuery_CandidateWorldToObject3x4", next_op_idx, "RayQuery_StateMatrix", "returns matrix for transforming from world-space to object-space for a candidate hit.", "f", "ro", [
  1558. db_dxil_param(0, "f", "", "operation result"),
  1559. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1560. db_dxil_param(3, "i32", "row", "row [0..2], relative to the element"),
  1561. db_dxil_param(4, "i8", "col", "column [0..3], relative to the element")])
  1562. next_op_idx += 1
  1563. self.add_dxil_op("RayQuery_CommittedObjectToWorld3x4", next_op_idx, "RayQuery_StateMatrix", "returns matrix for transforming from object-space to world-space for a Committed hit.", "f", "ro", [
  1564. db_dxil_param(0, "f", "", "operation result"),
  1565. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1566. db_dxil_param(3, "i32", "row", "row [0..2], relative to the element"),
  1567. db_dxil_param(4, "i8", "col", "column [0..3], relative to the element")])
  1568. next_op_idx += 1
  1569. self.add_dxil_op("RayQuery_CommittedWorldToObject3x4", next_op_idx, "RayQuery_StateMatrix", "returns matrix for transforming from world-space to object-space for a Committed hit.", "f", "ro", [
  1570. db_dxil_param(0, "f", "", "operation result"),
  1571. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1572. db_dxil_param(3, "i32", "row", "row [0..2], relative to the element"),
  1573. db_dxil_param(4, "i8", "col", "column [0..3], relative to the element")])
  1574. next_op_idx += 1
  1575. self.add_dxil_op("RayQuery_CandidateProceduralPrimitiveNonOpaque", next_op_idx, "RayQuery_StateScalar", "returns if current candidate procedural primitive is non opaque", "1", "ro", [
  1576. db_dxil_param(0, "i1", "", "operation result"),
  1577. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1578. next_op_idx += 1
  1579. self.add_dxil_op("RayQuery_CandidateTriangleFrontFace", next_op_idx, "RayQuery_StateScalar", "returns if current candidate triangle is front facing", "1", "ro", [
  1580. db_dxil_param(0, "i1", "", "operation result"),
  1581. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1582. next_op_idx += 1
  1583. self.add_dxil_op("RayQuery_CommittedTriangleFrontFace", next_op_idx, "RayQuery_StateScalar", "returns if current committed triangle is front facing", "1", "ro", [
  1584. db_dxil_param(0, "i1", "", "operation result"),
  1585. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1586. next_op_idx += 1
  1587. self.add_dxil_op("RayQuery_CandidateTriangleBarycentrics", next_op_idx, "RayQuery_StateVector", "returns candidate triangle hit barycentrics", "f", "ro", [
  1588. db_dxil_param(0, "f", "", "operation result"),
  1589. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1590. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1591. next_op_idx += 1
  1592. self.add_dxil_op("RayQuery_CommittedTriangleBarycentrics", next_op_idx, "RayQuery_StateVector", "returns committed triangle hit barycentrics", "f", "ro", [
  1593. db_dxil_param(0, "f", "", "operation result"),
  1594. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1595. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1596. next_op_idx += 1
  1597. self.add_dxil_op("RayQuery_RayFlags", next_op_idx, "RayQuery_StateScalar", "returns ray flags", "i", "ro", [
  1598. db_dxil_param(0, "i32", "", "operation result"),
  1599. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1600. next_op_idx += 1
  1601. self.add_dxil_op("RayQuery_WorldRayOrigin", next_op_idx, "RayQuery_StateVector", "returns world ray origin", "f", "ro", [
  1602. db_dxil_param(0, "f", "", "operation result"),
  1603. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1604. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1605. next_op_idx += 1
  1606. self.add_dxil_op("RayQuery_WorldRayDirection", next_op_idx, "RayQuery_StateVector", "returns world ray direction", "f", "ro", [
  1607. db_dxil_param(0, "f", "", "operation result"),
  1608. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1609. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1610. next_op_idx += 1
  1611. self.add_dxil_op("RayQuery_RayTMin", next_op_idx, "RayQuery_StateScalar", "returns float representing the parametric starting point for the ray.", "f", "ro", [
  1612. db_dxil_param(0, "f", "", "operation result"),
  1613. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1614. next_op_idx += 1
  1615. self.add_dxil_op("RayQuery_CandidateTriangleRayT", next_op_idx, "RayQuery_StateScalar", "returns float representing the parametric point on the ray for the current candidate triangle hit.", "f", "ro", [
  1616. db_dxil_param(0, "f", "", "operation result"),
  1617. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1618. next_op_idx += 1
  1619. self.add_dxil_op("RayQuery_CommittedRayT", next_op_idx, "RayQuery_StateScalar", "returns float representing the parametric point on the ray for the current committed hit.", "f", "ro", [
  1620. db_dxil_param(0, "f", "", "operation result"),
  1621. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1622. next_op_idx += 1
  1623. self.add_dxil_op("RayQuery_CandidateInstanceIndex", next_op_idx, "RayQuery_StateScalar", "returns candidate hit instance index", "i", "ro", [
  1624. db_dxil_param(0, "i32", "", "operation result"),
  1625. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1626. next_op_idx += 1
  1627. self.add_dxil_op("RayQuery_CandidateInstanceID", next_op_idx, "RayQuery_StateScalar", "returns candidate hit instance ID", "i", "ro", [
  1628. db_dxil_param(0, "i32", "", "operation result"),
  1629. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1630. next_op_idx += 1
  1631. self.add_dxil_op("RayQuery_CandidateGeometryIndex", next_op_idx, "RayQuery_StateScalar", "returns candidate hit geometry index", "i", "ro", [
  1632. db_dxil_param(0, "i32", "", "operation result"),
  1633. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1634. next_op_idx += 1
  1635. self.add_dxil_op("RayQuery_CandidatePrimitiveIndex", next_op_idx, "RayQuery_StateScalar", "returns candidate hit geometry index", "i", "ro", [
  1636. db_dxil_param(0, "i32", "", "operation result"),
  1637. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1638. next_op_idx += 1
  1639. self.add_dxil_op("RayQuery_CandidateObjectRayOrigin", next_op_idx, "RayQuery_StateVector", "returns candidate hit object ray origin", "f", "ro", [
  1640. db_dxil_param(0, "f", "", "operation result"),
  1641. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1642. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1643. next_op_idx += 1
  1644. self.add_dxil_op("RayQuery_CandidateObjectRayDirection", next_op_idx, "RayQuery_StateVector", "returns candidate object ray direction", "f", "ro", [
  1645. db_dxil_param(0, "f", "", "operation result"),
  1646. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1647. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1648. next_op_idx += 1
  1649. self.add_dxil_op("RayQuery_CommittedInstanceIndex", next_op_idx, "RayQuery_StateScalar", "returns committed hit instance index", "i", "ro", [
  1650. db_dxil_param(0, "i32", "", "operation result"),
  1651. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1652. next_op_idx += 1
  1653. self.add_dxil_op("RayQuery_CommittedInstanceID", next_op_idx, "RayQuery_StateScalar", "returns committed hit instance ID", "i", "ro", [
  1654. db_dxil_param(0, "i32", "", "operation result"),
  1655. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1656. next_op_idx += 1
  1657. self.add_dxil_op("RayQuery_CommittedGeometryIndex", next_op_idx, "RayQuery_StateScalar", "returns committed hit geometry index", "i", "ro", [
  1658. db_dxil_param(0, "i32", "", "operation result"),
  1659. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1660. next_op_idx += 1
  1661. self.add_dxil_op("RayQuery_CommittedPrimitiveIndex", next_op_idx, "RayQuery_StateScalar", "returns committed hit geometry index", "i", "ro", [
  1662. db_dxil_param(0, "i32", "", "operation result"),
  1663. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1664. next_op_idx += 1
  1665. self.add_dxil_op("RayQuery_CommittedObjectRayOrigin", next_op_idx, "RayQuery_StateVector", "returns committed hit object ray origin", "f", "ro", [
  1666. db_dxil_param(0, "f", "", "operation result"),
  1667. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1668. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1669. next_op_idx += 1
  1670. self.add_dxil_op("RayQuery_CommittedObjectRayDirection", next_op_idx, "RayQuery_StateVector", "returns committed object ray direction", "f", "ro", [
  1671. db_dxil_param(0, "f", "", "operation result"),
  1672. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
  1673. db_dxil_param(3, "i8", "component", "component [0..2]",is_const=True)])
  1674. next_op_idx += 1
  1675. self.add_dxil_op("GeometryIndex", next_op_idx, "GeometryIndex", "The autogenerated index of the current geometry in the bottom-level structure", "i", "rn", [
  1676. db_dxil_param(0, "i32", "", "result")])
  1677. next_op_idx += 1
  1678. self.add_dxil_op("RayQuery_CandidateInstanceContributionToHitGroupIndex", next_op_idx, "RayQuery_StateScalar", "returns candidate hit InstanceContributionToHitGroupIndex", "i", "ro", [
  1679. db_dxil_param(0, "i32", "", "operation result"),
  1680. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1681. next_op_idx += 1
  1682. self.add_dxil_op("RayQuery_CommittedInstanceContributionToHitGroupIndex", next_op_idx, "RayQuery_StateScalar", "returns committed hit InstanceContributionToHitGroupIndex", "i", "ro", [
  1683. db_dxil_param(0, "i32", "", "operation result"),
  1684. db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle")])
  1685. next_op_idx += 1
  1686. # End of DXIL 1.5 opcodes.
  1687. self.set_op_count_for_version(1, 5, next_op_idx)
  1688. assert next_op_idx == 216, "216 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
  1689. self.add_dxil_op("AnnotateHandle", next_op_idx, "AnnotateHandle", "annotate handle with resource properties", "v", "rn", [
  1690. db_dxil_param(0, "res", "", "annotated handle"),
  1691. db_dxil_param(2, "res", "res", "input handle"),
  1692. db_dxil_param(3, "resproperty", "props", "details like component type, strutrure stride...", is_const=True)])
  1693. next_op_idx += 1
  1694. self.add_dxil_op("CreateHandleFromBinding", next_op_idx, "CreateHandleFromBinding", "create resource handle from binding", "v", "rn", [
  1695. db_dxil_param(0, "res", "", "result"),
  1696. db_dxil_param(2, "resbind", "bind", "resource binding", is_const=True), #{ rangeLowerBound, rangeUpperBound, spaceID, resourceClass }
  1697. db_dxil_param(3, "i32", "index", "index"),
  1698. db_dxil_param(4, "i1", "nonUniformIndex", "non-uniform resource index", is_const=True)])
  1699. next_op_idx += 1
  1700. self.add_dxil_op("CreateHandleFromHeap", next_op_idx, "CreateHandleFromHeap", "create resource handle from heap", "v", "rn", [
  1701. db_dxil_param(0, "res", "", "result"),
  1702. db_dxil_param(2, "i32", "index", "heap index"),
  1703. db_dxil_param(3, "i1", "samplerHeap", "If samplerHeap is 1, the heap indexed is the sampler descriptor heap, otherwise it is the CBV_SRV_UAV (resource) descriptor heap", is_const=True),
  1704. db_dxil_param(4, "i1", "nonUniformIndex", "non-uniform resource index", is_const=True)])
  1705. next_op_idx += 1
  1706. self.add_dxil_op("Unpack4x8", next_op_idx, "Unpack4x8", "unpacks 4 8-bit signed or unsigned values into int32 or int16 vector", "iw", "rn", [
  1707. db_dxil_param(0, "$vec4", "", "result"),
  1708. db_dxil_param(2, "i8", "unpackMode", "signed/unsigned"),
  1709. db_dxil_param(3, "i32", "pk", "packed 4 x i8")])
  1710. next_op_idx += 1
  1711. self.add_dxil_op("Pack4x8", next_op_idx, "Pack4x8", "packs vector of 4 signed or unsigned values into a packed datatype, drops or clamps unused bits", "iw", "rn", [
  1712. db_dxil_param(0, "i32", "", "result packed 4 x i8"),
  1713. db_dxil_param(2, "i8", "packMode", "trunc/unsigned clamp/signed clamp"),
  1714. db_dxil_param(3, "$o", "x", "the first component of the vector"),
  1715. db_dxil_param(4, "$o", "y", "the second component of the vector"),
  1716. db_dxil_param(5, "$o", "z", "the third component of the vector"),
  1717. db_dxil_param(6, "$o", "w", "the fourth component of the vector")])
  1718. next_op_idx += 1
  1719. self.add_dxil_op("IsHelperLane", next_op_idx, "IsHelperLane", "returns true on helper lanes in pixel shaders", "1", "ro", [
  1720. db_dxil_param(0, "i1", "", "result")])
  1721. next_op_idx += 1
  1722. # End of DXIL 1.6 opcodes.
  1723. self.set_op_count_for_version(1, 6, next_op_idx)
  1724. assert next_op_idx == 222, "222 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
  1725. # Set interesting properties.
  1726. self.build_indices()
  1727. for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp".split(","):
  1728. self.name_idx[i].is_gradient = True
  1729. for i in "DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
  1730. assert self.name_idx[i].is_gradient == True, "all derivatives are marked as requiring gradients"
  1731. self.name_idx[i].is_deriv = True
  1732. # TODO - some arguments are required to be immediate constants in DXIL, eg resource kinds; add this information
  1733. # consider - report instructions that are overloaded on a single type, then turn them into non-overloaded version of that type
  1734. self.verify_dense(self.get_dxil_insts(), lambda x : x.dxil_opid, lambda x : x.name)
  1735. for i in self.instr:
  1736. self.verify_dense(i.ops, lambda x : x.pos, lambda x : i.name)
  1737. for i in self.instr:
  1738. if i.is_dxil_op:
  1739. assert i.oload_types != "", "overload for DXIL operation %s should not be empty - use void if n/a" % (i.name)
  1740. assert i.oload_types == "v" or i.oload_types.find("v") < 0, "void overload should be exclusive to other types (%s)" % i.name
  1741. assert type(i.oload_types) is str, "overload for %s should be a string - use empty if n/a" % (i.name)
  1742. # Verify that all operations in each class have the same signature.
  1743. import itertools
  1744. class_sort_func = lambda x, y: x < y
  1745. class_key_func = lambda x : x.dxil_class
  1746. instr_ordered_by_class = sorted([i for i in self.instr if i.is_dxil_op], key=class_key_func)
  1747. instr_grouped_by_class = itertools.groupby(instr_ordered_by_class, key=class_key_func)
  1748. def calc_oload_sig(inst):
  1749. result = ""
  1750. for o in inst.ops:
  1751. result += o.llvm_type
  1752. return result
  1753. for k, g in instr_grouped_by_class:
  1754. group = list(g)
  1755. if len(group) > 1:
  1756. first = group[0]
  1757. first_group = calc_oload_sig(first)
  1758. for other in group[1:]:
  1759. other_group = calc_oload_sig(other)
  1760. # TODO: uncomment assert when opcodes are fixed
  1761. #assert first_group == other_group, "overload signature %s for instruction %s differs from %s in %s" % (first.name, first_group, other.name, other_group)
  1762. def populate_extended_docs(self):
  1763. "Update the documentation with text from external files."
  1764. inst_starter = "* Inst: "
  1765. block_starter = "* BLOCK-BEGIN"
  1766. block_end = "* BLOCK-END"
  1767. thisdir = os.path.dirname(os.path.realpath(__file__))
  1768. with open(os.path.join(thisdir, "hctdb_inst_docs.txt")) as ops_file:
  1769. inst_name = ""
  1770. inst_doc = ""
  1771. inst_remarks = ""
  1772. is_block = False
  1773. for idx, line in enumerate(ops_file):
  1774. if line.startswith("#"): continue
  1775. if line.startswith(block_starter):
  1776. assert is_block == False, "unexpected block begin at line %i" % (idx+1)
  1777. is_block = True
  1778. continue
  1779. if line.startswith(block_end):
  1780. assert is_block == True, "unexpected block end at line %i" % (idx+1)
  1781. is_block = False
  1782. continue
  1783. if line.startswith(inst_starter):
  1784. if inst_name:
  1785. # print(inst_name + " - " + inst_remarks.strip())
  1786. self.name_idx[inst_name].doc = inst_doc
  1787. self.name_idx[inst_name].remarks = inst_remarks.strip()
  1788. inst_remarks = ""
  1789. line = line[len(inst_starter):]
  1790. sep_idx = line.find("-")
  1791. inst_name = line[:sep_idx].strip()
  1792. inst_doc = line[sep_idx+1:].strip()
  1793. else:
  1794. inst_remarks += line if is_block else "\n" + line.strip()
  1795. if inst_name:
  1796. self.name_idx[inst_name].remarks = inst_remarks.strip()
  1797. def populate_metadata(self):
  1798. # For now, simply describe the allowed named metadata.
  1799. m = self.metadata
  1800. m.append(db_dxil_metadata("dx.controlflow.hints", "Provides control flow hints to an instruction."))
  1801. m.append(db_dxil_metadata("dx.entryPoints", "Entry point functions."))
  1802. m.append(db_dxil_metadata("dx.precise", "Marks an instruction as precise."))
  1803. m.append(db_dxil_metadata("dx.resources", "Resources used by the entry point shaders."))
  1804. m.append(db_dxil_metadata("dx.shaderModel", "Shader model for the module."))
  1805. m.append(db_dxil_metadata("dx.typeAnnotations", "Provides annotations for types."))
  1806. m.append(db_dxil_metadata("dx.typevar.*", "."))
  1807. m.append(db_dxil_metadata("dx.valver", "Optional validator version."))
  1808. m.append(db_dxil_metadata("dx.version", "Optional DXIL version for the module."))
  1809. # dx.typevar.* is not the name of metadata, but the prefix for global variables
  1810. # that will be referenced by structure type annotations.
  1811. def populate_passes(self):
  1812. # Populate passes and their options.
  1813. p = self.passes
  1814. category_lib = "set this before add_pass"
  1815. def add_pass(name, type_name, doc, opts):
  1816. apass = db_dxil_pass(name, type_name=type_name, doc=doc, category_lib=category_lib)
  1817. for o in opts:
  1818. assert 'n' in o, "option in %s has no 'n' member" % name
  1819. apass.args.append(db_dxil_pass_arg(o['n'], ident=o.get('i'), type_name=o.get('t'), is_ctor_param=o.get('c'), doc=o.get('d')))
  1820. p.append(apass)
  1821. category_lib = "llvm"
  1822. # Add discriminators is a DWARF 4 thing, useful for the profiler.
  1823. # Consider removing lib\Transforms\Utils\AddDiscriminators.cpp altogether
  1824. add_pass("add-discriminators", "AddDiscriminators", "Add DWARF path discriminators",
  1825. [{'n':"no-discriminators", 'i':"NoDiscriminators", 't':"bool"}])
  1826. # Sample profile is part of the sample profiling infrastructure.
  1827. # Consider removing lib\Transforms\Scalar\SampleProfile.cpp
  1828. add_pass("sample-profile", "SampleProfileLoader", "Sample Profile loader", [
  1829. {'n':"sample-profile-file", 'i':"SampleProfileFile", 't':"string"},
  1830. {'n':"sample-profile-max-propagate-iterations", 'i':"SampleProfileMaxPropagateIterations", 't':"unsigned"}])
  1831. # inline and always-inline share a base class - those are the arguments we document for each of them.
  1832. inliner_args = [
  1833. {'n':'InsertLifetime', 't':'bool', 'c':1, 'd':'Insert @llvm.lifetime intrinsics'},
  1834. {'n':'InlineThreshold', 't':'unsigned', 'c':1, 'd':'Insert @llvm.lifetime intrinsics'}]
  1835. add_pass("inline", "SimpleInliner", "Function Integration/Inlining", inliner_args)
  1836. # {'n':"OptLevel", 't':"unsigned", 'c':1},
  1837. # {'n':"SizeOptLevel", 't':'unsigned', 'c':1}
  1838. add_pass('always-inline', 'AlwaysInliner', 'Inliner for always_inline functions', inliner_args)
  1839. # {'n':'InsertLifetime', 't':'bool', 'c':1, 'd':'Insert @llvm.lifetime intrinsics'}
  1840. # Consider a review of the target-specific wrapper.
  1841. add_pass("tti", "TargetTransformInfoWrapperPass", "Target Transform Information", [
  1842. {'n':'TIRA', 't':'TargetIRAnalysis', 'c':1}])
  1843. add_pass("verify", "VerifierLegacyPass", "Module Verifier", [
  1844. {'n':'FatalErrors', 't':'bool', 'c':1},
  1845. {'n':'verify-debug-info', 'i':'VerifyDebugInfo', 't':'bool'}])
  1846. add_pass("targetlibinfo", "TargetLibraryInfoWrapperPass", "Target Library Information", [
  1847. {'n':'TLIImpl', 't':'TargetLibraryInfoImpl', 'c':1},
  1848. {'n':'vector-library', 'i':'ClVectorLibrary', 't':'TargetLibraryInfoImpl::VectorLibrary'}])
  1849. add_pass("cfl-aa", "CFLAliasAnalysis", "CFL-Based AA implementation", [])
  1850. add_pass("tbaa", "TypeBasedAliasAnalysis", "Type-Based Alias Analysis", [
  1851. {'n':"enable-tbaa", 'i':'EnableTBAA', 't':'bool', 'd':'Use to disable TBAA functionality'}])
  1852. add_pass("scoped-noalias", "ScopedNoAliasAA", "Scoped NoAlias Alias Analysis", [
  1853. {'n':"enable-scoped-noalias", 'i':'EnableScopedNoAlias', 't':'bool', 'd':'Use to disable scoped no-alias'}])
  1854. add_pass("basicaa", "BasicAliasAnalysis", "Basic Alias Analysis (stateless AA impl)", [])
  1855. add_pass("simplifycfg", "CFGSimplifyPass", "Simplify the CFG", [
  1856. {'n':'Threshold', 't':'int', 'c':1},
  1857. {'n':'Ftor', 't':'std::function<bool(const Function &)>', 'c':1},
  1858. {'n':'bonus-inst-threshold', 'i':'UserBonusInstThreshold', 't':'unsigned', 'd':'Control the number of bonus instructions (default = 1)'}])
  1859. # UseNewSROA is used by PassManagerBuilder::populateFunctionPassManager, not a pass per se.
  1860. add_pass("sroa", "SROA", "Scalar Replacement Of Aggregates", [
  1861. {'n':'RequiresDomTree', 't':'bool', 'c':1},
  1862. {'n':'SkipHLSLMat', 't':'bool', 'c':1},
  1863. {'n':'force-ssa-updater', 'i':'ForceSSAUpdater', 't':'bool', 'd':'Force the pass to not use DomTree and mem2reg, insteadforming SSA values through the SSAUpdater infrastructure.'},
  1864. {'n':'sroa-random-shuffle-slices', 'i':'SROARandomShuffleSlices', 't':'bool', 'd':'Enable randomly shuffling the slices to help uncover instability in their order.'},
  1865. {'n':'sroa-strict-inbounds', 'i':'SROAStrictInbounds', 't':'bool', 'd':'Experiment with completely strict handling of inbounds GEPs.'}])
  1866. add_pass("dxil-cond-mem2reg", "DxilConditionalMem2Reg", "Dxil Conditional Mem2Reg", [
  1867. {'n':'NoOpt', 't':'bool', 'c':1},
  1868. ])
  1869. add_pass('scalarrepl', 'SROA_DT', 'Scalar Replacement of Aggregates (DT)', [
  1870. {'n':'Threshold', 't':'int', 'c':1},
  1871. {'n':'StructMemberThreshold', 't':'int', 'c':1},
  1872. {'n':'ArrayElementThreshold', 't':'int', 'c':1},
  1873. {'n':'ScalarLoadThreshold', 't':'int', 'c':1}])
  1874. add_pass('scalarrepl-ssa', 'SROA_SSAUp', 'Scalar Replacement of Aggregates (SSAUp)', [
  1875. {'n':'Threshold', 't':'int', 'c':1},
  1876. {'n':'StructMemberThreshold', 't':'int', 'c':1},
  1877. {'n':'ArrayElementThreshold', 't':'int', 'c':1},
  1878. {'n':'ScalarLoadThreshold', 't':'int', 'c':1}])
  1879. add_pass('early-cse', 'EarlyCSELegacyPass', 'Early CSE', [])
  1880. # More branch weight support.
  1881. add_pass('lower-expect', 'LowerExpectIntrinsic', "Lower 'expect' Intrinsics", [
  1882. {'n':'likely-branch-weight', 'i':'LikelyBranchWeight', 't':'uint32_t', 'd':'Weight of the branch likely to be taken (default = 64)'},
  1883. {'n':'unlikely-branch-weight', 'i':'UnlikelyBranchWeight', 't':'uint32_t', 'd':'Weight of the branch unlikely to be taken (default = 4)'}])
  1884. # Consider removing lib\Transforms\Utils\SymbolRewriter.cpp
  1885. add_pass('rewrite-symbols', 'RewriteSymbols', 'Rewrite Symbols', [
  1886. {'n':'DL', 't':'SymbolRewriter::RewriteDescriptorList', 'c':1},
  1887. {'n':'rewrite-map-file', 'i':'RewriteMapFiles', 't':'string'}])
  1888. add_pass('mergefunc', 'MergeFunctions', 'Merge Functions', [
  1889. {'n':'mergefunc-sanity', 'i':'NumFunctionsForSanityCheck', 't':'unsigned', 'd':"How many functions in module could be used for MergeFunctions pass sanity check. '0' disables this check. Works only with '-debug' key."}])
  1890. # Consider removing GlobalExtensions globals altogether.
  1891. add_pass('barrier', 'BarrierNoop', 'A No-Op Barrier Pass', [])
  1892. add_pass('dce', 'DCE', 'Dead Code Elimination', [])
  1893. add_pass('die', 'DeadInstElimination', 'Dead Instruction Elimination', [])
  1894. add_pass('globaldce', 'GlobalDCE', 'Dead Global Elimination', [])
  1895. add_pass('mem2reg', 'PromotePass', 'Promote Memory to Register', [])
  1896. add_pass('scalarizer', 'Scalarizer', 'Scalarize vector operations', [])
  1897. category_lib="pix"
  1898. add_pass('hlsl-dxil-add-pixel-hit-instrmentation', 'DxilAddPixelHitInstrumentation', 'DXIL Count completed PS invocations and costs', [
  1899. {'n':'force-early-z','t':'int','c':1},
  1900. {'n':'add-pixel-cost','t':'int','c':1},
  1901. {'n':'rt-width','t':'int','c':1},
  1902. {'n':'sv-position-index','t':'int','c':1},
  1903. {'n':'num-pixels','t':'int','c':1}])
  1904. add_pass('hlsl-dxil-constantColor', 'DxilOutputColorBecomesConstant', 'DXIL Constant Color Mod', [
  1905. {'n':'mod-mode','t':'int','c':1},
  1906. {'n':'constant-red','t':'float','c':1},
  1907. {'n':'constant-green','t':'float','c':1},
  1908. {'n':'constant-blue','t':'float','c':1},
  1909. {'n':'constant-alpha','t':'float','c':1}])
  1910. add_pass('hlsl-dxil-remove-discards', 'DxilRemoveDiscards', 'HLSL DXIL Remove all discard instructions', [])
  1911. add_pass('hlsl-dxil-force-early-z', 'DxilForceEarlyZ', 'HLSL DXIL Force the early Z global flag, if shader has no discard calls', [])
  1912. add_pass('hlsl-dxil-pix-meshshader-output-instrumentation', 'DxilPIXMeshShaderOutputInstrumentation', 'DXIL mesh shader output instrumentation for PIX', [
  1913. {'n':'UAVSize','t':'int','c':1}])
  1914. add_pass('hlsl-dxil-pix-shader-access-instrumentation', 'DxilShaderAccessTracking', 'HLSL DXIL shader access tracking for PIX', [
  1915. {'n':'config','t':'int','c':1},
  1916. {'n':'checkForDynamicIndexing','t':'bool','c':1}])
  1917. add_pass('hlsl-dxil-debug-instrumentation', 'DxilDebugInstrumentation', 'HLSL DXIL debug instrumentation for PIX', [
  1918. {'n':'UAVSize','t':'int','c':1},
  1919. {'n':'parameter0','t':'int','c':1},
  1920. {'n':'parameter1','t':'int','c':1},
  1921. {'n':'parameter2','t':'int','c':1}])
  1922. add_pass('dxil-annotate-with-virtual-regs', 'DxilAnnotateWithVirtualRegister', 'Annotates each instruction in the DXIL module with a virtual register number', [])
  1923. add_pass('dxil-dbg-value-to-dbg-declare', 'DxilDbgValueToDbgDeclare', 'Converts llvm.dbg.value uses to llvm.dbg.declare.', [])
  1924. add_pass('hlsl-dxil-reduce-msaa-to-single', 'DxilReduceMSAAToSingleSample', 'HLSL DXIL Reduce all MSAA reads to single-sample reads', [])
  1925. category_lib="dxil_gen"
  1926. add_pass('hlsl-hlemit', 'HLEmitMetadata', 'HLSL High-Level Metadata Emit.', [])
  1927. add_pass("hl-expand-store-intrinsics", "HLExpandStoreIntrinsics", "Expand HLSL store intrinsics", [])
  1928. add_pass("hl-legalize-parameter", "HLLegalizeParameter", "Legalize parameter", [])
  1929. add_pass('scalarrepl-param-hlsl', 'SROA_Parameter_HLSL', 'Scalar Replacement of Aggregates HLSL (parameters)', [])
  1930. add_pass('static-global-to-alloca', 'LowerStaticGlobalIntoAlloca', 'Lower static global into Alloca', [])
  1931. add_pass('hlmatrixlower', 'HLMatrixLowerPass', 'HLSL High-Level Matrix Lower', [])
  1932. add_pass('matrixbitcastlower', 'MatrixBitcastLowerPass', 'Matrix Bitcast lower', [])
  1933. add_pass("reg2mem_hlsl", "RegToMemHlsl", "Demote values with phi-node usage to stack slots", [])
  1934. add_pass('dynamic-vector-to-array', 'DynamicIndexingVectorToArray', 'Replace dynamic indexing vector with array', [
  1935. {'n':'ReplaceAllVectors','t':'bool','c':1}])
  1936. add_pass('hlsl-dxil-promote-local-resources', 'DxilPromoteLocalResources', 'DXIL promote local resource use', [])
  1937. add_pass('hlsl-dxil-promote-static-resources', 'DxilPromoteStaticResources', 'DXIL promote static resource use', [])
  1938. add_pass('hlsl-dxil-legalize-resources', 'DxilLegalizeResources', 'DXIL legalize resource use', [])
  1939. add_pass('hlsl-dxil-legalize-eval-operations', 'DxilLegalizeEvalOperations', 'DXIL legalize eval operations', [])
  1940. add_pass('dxilgen', 'DxilGenerationPass', 'HLSL DXIL Generation', [
  1941. {'n':'NotOptimized','t':'bool','c':1}])
  1942. add_pass('invalidate-undef-resource', 'InvalidateUndefResources', 'Invalidate undef resources', [])
  1943. add_pass('simplify-inst', 'SimplifyInst', 'Simplify Instructions', [])
  1944. add_pass('hlsl-dxil-precise', 'DxilPrecisePropagatePass', 'DXIL precise attribute propagate', [])
  1945. add_pass('dxil-legalize-sample-offset', 'DxilLegalizeSampleOffsetPass', 'DXIL legalize sample offset', [])
  1946. add_pass('dxil-gvn-hoist', 'DxilSimpleGVNHoist', 'DXIL simple gvn hoist', [])
  1947. add_pass('hlsl-hlensure', 'HLEnsureMetadata', 'HLSL High-Level Metadata Ensure', [])
  1948. add_pass('multi-dim-one-dim', 'MultiDimArrayToOneDimArray', 'Flatten multi-dim array into one-dim array', [])
  1949. add_pass('resource-handle', 'ResourceToHandle', 'Lower resource into handle', [])
  1950. add_pass('hlsl-passes-nopause', 'NoPausePasses', 'Clears metadata used for pause and resume', [])
  1951. add_pass('hlsl-passes-pause', 'PausePasses', 'Prepare to pause passes', [])
  1952. add_pass('hlsl-passes-resume', 'ResumePasses', 'Prepare to resume passes', [])
  1953. add_pass('hlsl-dxil-condense', 'DxilCondenseResources', 'DXIL Condense Resources', [])
  1954. add_pass('hlsl-dxil-lower-handle-for-lib', 'DxilLowerCreateHandleForLib', 'DXIL Lower createHandleForLib', [])
  1955. add_pass('hlsl-dxil-allocate-resources-for-lib', 'DxilAllocateResourcesForLib', 'DXIL Allocate Resources For Library', [])
  1956. add_pass('hlsl-dxil-convergent-mark', 'DxilConvergentMark', 'Mark convergent', [])
  1957. add_pass('hlsl-dxil-convergent-clear', 'DxilConvergentClear', 'Clear convergent before dxil emit', [])
  1958. add_pass('hlsl-dxil-eliminate-output-dynamic', 'DxilEliminateOutputDynamicIndexing', 'DXIL eliminate ouptut dynamic indexing', [])
  1959. add_pass('hlsl-dxilfinalize', 'DxilFinalizeModule', 'HLSL DXIL Finalize Module', [])
  1960. add_pass('hlsl-dxilemit', 'DxilEmitMetadata', 'HLSL DXIL Metadata Emit', [])
  1961. add_pass('hlsl-dxilload', 'DxilLoadMetadata', 'HLSL DXIL Metadata Load', [])
  1962. add_pass('dxil-dfe', 'DxilDeadFunctionElimination', 'Remove all unused function except entry from DxilModule', [])
  1963. add_pass('hl-dfe', 'HLDeadFunctionElimination', 'Remove all unused function except entry from HLModule', [])
  1964. add_pass('hl-preprocess', 'HLPreprocess', 'Preprocess HLModule after inline', [])
  1965. add_pass('hlsl-dxil-expand-trig', 'DxilExpandTrigIntrinsics', 'DXIL expand trig intrinsics', [])
  1966. add_pass('hlsl-hca', 'HoistConstantArray', 'HLSL constant array hoisting', [])
  1967. add_pass('hlsl-dxil-preserve-all-outputs', 'DxilPreserveAllOutputs', 'DXIL write to all outputs in signature', [])
  1968. add_pass('red', 'ReducibilityAnalysis', 'Reducibility Analysis', [])
  1969. add_pass('viewid-state', 'ComputeViewIdState', 'Compute information related to ViewID', [])
  1970. add_pass('hlsl-translate-dxil-opcode-version', 'DxilTranslateRawBuffer', 'Translates one version of dxil to another', [])
  1971. add_pass('hlsl-dxil-cleanup-addrspacecast', 'DxilCleanupAddrSpaceCast', 'HLSL DXIL Cleanup Address Space Cast (part of hlsl-dxilfinalize)', [])
  1972. add_pass('dxil-fix-array-init', 'DxilFixConstArrayInitializer', 'Dxil Fix Array Initializer', [])
  1973. add_pass('hlsl-validate-wave-sensitivity', 'DxilValidateWaveSensitivity', 'HLSL DXIL wave sensitiveity validation', [])
  1974. add_pass('dxil-elim-vector', 'DxilEliminateVector', 'Dxil Eliminate Vectors', [])
  1975. add_pass('dxil-rewrite-output-arg-debug-info', 'DxilRewriteOutputArgDebugInfo', 'Dxil Rewrite Output Arg Debug Info', [])
  1976. add_pass('dxil-finalize-preserves', 'DxilFinalizePreserves', 'Dxil Finalize Preserves', [])
  1977. add_pass('dxil-insert-preserves', 'DxilInsertPreserves', 'Dxil Insert Noops', [
  1978. {'n':'AllowPreserves', 't':'bool', 'c':1},
  1979. ])
  1980. add_pass('dxil-preserves-to-select', 'DxilPreserveToSelect', 'Dxil Preserves To Select', [])
  1981. add_pass('dxil-delete-loop', 'DxilLoopDeletion', 'Dxil Loop Deletion', [])
  1982. add_pass('dxil-value-cache', 'DxilValueCache', 'Dxil Value Cache',[])
  1983. add_pass('hlsl-cleanup-dxbreak', 'CleanupDxBreak', 'HLSL Remove unnecessary dx.break conditions', [])
  1984. add_pass('dxil-rename-resources', 'DxilRenameResources', 'Rename resources to prevent merge by name during linking', [
  1985. {'n':'prefix', 'i':'Prefix', 't':'string', 'd':'Prefix to add to resource names'},
  1986. {'n':'from-binding', 'i':'FromBinding', 't':'bool', 'c':1, 'd':'Append binding to name when bound'},
  1987. {'n':'keep-name', 'i':'KeepName', 't':'bool', 'c':1, 'd':'Keep name when appending binding'},
  1988. ])
  1989. add_pass('hlsl-dxil-resources-to-handle', 'DxilMutateResourceToHandle', 'Mutate resource to handle',[])
  1990. category_lib="llvm"
  1991. add_pass('ipsccp', 'IPSCCP', 'Interprocedural Sparse Conditional Constant Propagation', [])
  1992. add_pass('globalopt', 'GlobalOpt', 'Global Variable Optimizer', [])
  1993. add_pass('deadargelim', 'DAE', 'Dead Argument Elimination', [])
  1994. # Should we get rid of this, or invest in bugpoint support?
  1995. add_pass('deadarghaX0r', 'DAH', 'Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)', [])
  1996. add_pass('instcombine', 'InstructionCombiningPass', 'Combine redundant instructions', [])
  1997. add_pass('prune-eh', 'PruneEH', 'Remove unused exception handling info', [])
  1998. add_pass('functionattrs', 'FunctionAttrs', 'Deduce function attributes', [])
  1999. add_pass('argpromotion', 'ArgPromotion', "Promote 'by reference' arguments to scalars", [
  2000. {'n':'maxElements', 't':'unsigned', 'c':1}])
  2001. add_pass('jump-threading', 'JumpThreading', 'Jump Threading', [
  2002. {'n':'Threshold', 't':'int', 'c':1},
  2003. {'n':'jump-threading-threshold', 'i':'BBDuplicateThreshold', 't':'unsigned', 'd':'Max block size to duplicate for jump threading'}])
  2004. add_pass('correlated-propagation', 'CorrelatedValuePropagation', 'Value Propagation', [])
  2005. # createTailCallEliminationPass is removed - but is this checked before?
  2006. add_pass('reassociate', 'Reassociate', 'Reassociate expressions', [])
  2007. add_pass('loop-rotate', 'LoopRotate', 'Rotate Loops', [
  2008. {'n':'MaxHeaderSize', 't':'int', 'c':1},
  2009. {'n':'rotation-max-header-size', 'i':'DefaultRotationThreshold', 't':'unsigned', 'd':'The default maximum header size for automatic loop rotation'}])
  2010. add_pass('licm', 'LICM', 'Loop Invariant Code Motion', [
  2011. {'n':'disable-licm-promotion', 'i':'DisablePromotion', 't':'bool', 'd':'Disable memory promotion in LICM pass'}])
  2012. add_pass('loop-unswitch', 'LoopUnswitch', 'Unswitch loops', [
  2013. {'n':'Os', 't':'bool', 'c':1, 'd':'Optimize for size'},
  2014. {'n':'loop-unswitch-threshold', 'i':'Threshold', 't':'unsigned', 'd':'Max loop size to unswitch'}])
  2015. # C:\nobackup\work\HLSLonLLVM\lib\Transforms\IPO\PassManagerBuilder.cpp:353
  2016. add_pass('indvars', 'IndVarSimplify', "Induction Variable Simplification", [])
  2017. add_pass('loop-idiom', 'LoopIdiomRecognize', "Recognize loop idioms", [])
  2018. add_pass('dxil-loop-unroll', 'DxilLoopUnroll', 'DxilLoopUnroll', [
  2019. {'n':'MaxIterationAttempt', 't':'unsigned', 'c':1, 'd':'Maximum number of iterations to attempt when iteratively unrolling.'},
  2020. {'n':'OnlyWarnOnFail', 't':'bool', 'c':1, 'd':'Whether to just warn when unrolling fails.'},
  2021. ])
  2022. add_pass('dxil-erase-dead-region', 'DxilEraseDeadRegion', 'DxilEraseDeadRegion', [])
  2023. add_pass('dxil-remove-dead-blocks', 'DxilRemoveDeadBlocks', 'DxilRemoveDeadBlocks', [])
  2024. add_pass('dxil-o0-legalize', 'DxilNoOptLegalize', 'DXIL No-Opt Legalize', [])
  2025. add_pass('dxil-o0-simplify-inst', 'DxilNoOptSimplifyInstructions', 'DXIL No-Opt Simplify Inst', [])
  2026. add_pass('loop-deletion', 'LoopDeletion', "Delete dead loops", [])
  2027. add_pass('loop-interchange', 'LoopInterchange', 'Interchanges loops for cache reuse', [])
  2028. add_pass('loop-unroll', 'LoopUnroll', 'Unroll loops', [
  2029. {'n':'Threshold', 't':'int', 'c':1},
  2030. {'n':'Count', 't':'int', 'c':1},
  2031. {'n':'AllowPartial', 't':'int', 'c':1},
  2032. {'n':'Runtime', 't':'int', 'c':1},
  2033. {'n':'unroll-threshold', 'i':'UnrollThreshold', 't':'unsigned', 'd':'The baseline cost threshold for loop unrolling'},
  2034. {'n':'unroll-percent-dynamic-cost-saved-threshold', 'i':'UnrollPercentDynamicCostSavedThreshold', 't':'unsigned', 'd':'The percentage of estimated dynamic cost which must be saved by unrolling to allow unrolling up to the max threshold.'},
  2035. {'n':'unroll-dynamic-cost-savings-discount', 'i':'UnrollDynamicCostSavingsDiscount', 't':'unsigned', 'd':"This is the amount discounted from the total unroll cost when the unrolled form has a high dynamic cost savings (triggered by the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."},
  2036. {'n':'unroll-max-iteration-count-to-analyze', 'i':'UnrollMaxIterationsCountToAnalyze', 't':'unsigned', 'd':"Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll profitability"},
  2037. {'n':'unroll-count', 'i':'UnrollCount', 't':'unsigned', 'd':'Use this unroll count for all loops including those with unroll_count pragma values, for testing purposes'},
  2038. {'n':'unroll-allow-partial', 'i':'UnrollAllowPartial', 't':'bool', 'd':'Allows loops to be partially unrolled until -unroll-threshold loop size is reached.'},
  2039. {'n':'unroll-runtime', 'i':'UnrollRuntime', 't':'bool', 'd':'Unroll loops with run-time trip counts'},
  2040. {'n':'pragma-unroll-threshold', 'i':'PragmaUnrollThreshold', 't':'unsigned', 'd':'Unrolled size limit for loops with an unroll(full) or unroll_count pragma.'}])
  2041. add_pass('mldst-motion', 'MergedLoadStoreMotion', 'MergedLoadStoreMotion', [])
  2042. add_pass('gvn', 'GVN', 'Global Value Numbering', [
  2043. {'n':'noloads', 't':'bool', 'c':1},
  2044. {'n':'enable-pre', 'i':'EnablePRE', 't':'bool'},
  2045. {'n':'enable-load-pre', 'i':'EnableLoadPRE', 't':'bool'},
  2046. {'n':'max-recurse-depth', 'i':'MaxRecurseDepth', 't':'uint32_t', 'd':'Max recurse depth'}])
  2047. add_pass('sccp', 'SCCP', 'Sparse Conditional Constant Propagation', [])
  2048. add_pass('bdce', 'BDCE', 'Bit-Tracking Dead Code Elimination', [])
  2049. add_pass('dse', 'DSE', 'Dead Store Elimination', [])
  2050. add_pass('loop-reroll', 'LoopReroll', 'Reroll loops', [
  2051. {'n':'max-reroll-increment', 'i':'MaxInc', 't':'unsigned', 'd':'The maximum increment for loop rerolling'},
  2052. {'n':'reroll-num-tolerated-failed-matches', 'i':'NumToleratedFailedMatches', 't':'unsigned', 'd':'The maximum number of failures to tolerate during fuzzy matching.'}])
  2053. add_pass('load-combine', 'LoadCombine', 'Combine Adjacent Loads', [])
  2054. add_pass('adce', 'ADCE', 'Aggressive Dead Code Elimination', [])
  2055. add_pass('float2int', 'Float2Int', 'Float to int', [
  2056. {'n':'float2int-max-integer-bw', 'i':'MaxIntegerBW', 't':'unsigned', 'd':'Max integer bitwidth to consider in float2int'}])
  2057. add_pass('loop-distribute', 'LoopDistribute', 'Loop Distribition', [
  2058. {'n':'loop-distribute-verify', 'i':'LDistVerify', 't':'bool', 'd':'Turn on DominatorTree and LoopInfo verification after Loop Distribution'},
  2059. {'n':'loop-distribute-non-if-convertible', 'i':'DistributeNonIfConvertible', 't':'bool', 'd':'Whether to distribute into a loop that may not be if-convertible by the loop vectorizer'}])
  2060. add_pass('alignment-from-assumptions', 'AlignmentFromAssumptions', 'Alignment from assumptions', [])
  2061. add_pass('strip-dead-prototypes', 'StripDeadPrototypesPass', 'Strip Unused Function Prototypes', [])
  2062. add_pass('elim-avail-extern', 'EliminateAvailableExternally', 'Eliminate Available Externally Globals', [])
  2063. add_pass('constmerge', 'ConstantMerge', 'Merge Duplicate Global Constants', [])
  2064. add_pass('lowerbitsets', 'LowerBitSets', 'Lower bitset metadata', [
  2065. {'n':'lowerbitsets-avoid-reuse', 'i':'AvoidReuse', 't':'bool', 'd':'Try to avoid reuse of byte array addresses using aliases'}])
  2066. # TODO: turn STATISTICS macros into ETW events
  2067. # assert no duplicate names
  2068. self.pass_idx_args = set()
  2069. p_names = set()
  2070. p_ids = set()
  2071. for ap in p:
  2072. assert ap.name not in p_names
  2073. p_names.add(ap.name)
  2074. for anarg in ap.args:
  2075. assert anarg.is_ctor_param or anarg.name not in p_ids, "argument %s in %s is not ctor and is duplicate" % (anarg.name, ap.name)
  2076. if not anarg.is_ctor_param:
  2077. p_ids.add(anarg.name)
  2078. self.pass_idx_args.add(anarg.name)
  2079. def build_semantics(self):
  2080. SemanticKind = db_dxil_enum("SemanticKind", "Semantic kind; Arbitrary or specific system value.", [
  2081. (0, "Arbitrary", ""),
  2082. (1, "VertexID", ""),
  2083. (2, "InstanceID", ""),
  2084. (3, "Position", ""),
  2085. (4, "RenderTargetArrayIndex", ""),
  2086. (5, "ViewPortArrayIndex", ""),
  2087. (6, "ClipDistance", ""),
  2088. (7, "CullDistance", ""),
  2089. (8, "OutputControlPointID", ""),
  2090. (9, "DomainLocation", ""),
  2091. (10, "PrimitiveID", ""),
  2092. (11, "GSInstanceID", ""),
  2093. (12, "SampleIndex", ""),
  2094. (13, "IsFrontFace", ""),
  2095. (14, "Coverage", ""),
  2096. (15, "InnerCoverage", ""),
  2097. (16, "Target", ""),
  2098. (17, "Depth", ""),
  2099. (18, "DepthLessEqual", ""),
  2100. (19, "DepthGreaterEqual", ""),
  2101. (20, "StencilRef", ""),
  2102. (21, "DispatchThreadID", ""),
  2103. (22, "GroupID", ""),
  2104. (23, "GroupIndex", ""),
  2105. (24, "GroupThreadID", ""),
  2106. (25, "TessFactor", ""),
  2107. (26, "InsideTessFactor", ""),
  2108. (27, "ViewID", ""),
  2109. (28, "Barycentrics", ""),
  2110. (29, "ShadingRate", ""),
  2111. (30, "CullPrimitive", ""),
  2112. (31, "Invalid", ""),
  2113. ])
  2114. self.enums.append(SemanticKind)
  2115. SigPointKind = db_dxil_enum("SigPointKind", "Signature Point is more specific than shader stage or signature as it is unique in both stage and item dimensionality or frequency.", [
  2116. (0, "VSIn", "Ordinary Vertex Shader input from Input Assembler"),
  2117. (1, "VSOut", "Ordinary Vertex Shader output that may feed Rasterizer"),
  2118. (2, "PCIn", "Patch Constant function non-patch inputs"),
  2119. (3, "HSIn", "Hull Shader function non-patch inputs"),
  2120. (4, "HSCPIn", "Hull Shader patch inputs - Control Points"),
  2121. (5, "HSCPOut", "Hull Shader function output - Control Point"),
  2122. (6, "PCOut", "Patch Constant function output - Patch Constant data passed to Domain Shader"),
  2123. (7, "DSIn", "Domain Shader regular input - Patch Constant data plus system values"),
  2124. (8, "DSCPIn", "Domain Shader patch input - Control Points"),
  2125. (9, "DSOut", "Domain Shader output - vertex data that may feed Rasterizer"),
  2126. (10, "GSVIn", "Geometry Shader vertex input - qualified with primitive type"),
  2127. (11, "GSIn", "Geometry Shader non-vertex inputs (system values)"),
  2128. (12, "GSOut", "Geometry Shader output - vertex data that may feed Rasterizer"),
  2129. (13, "PSIn", "Pixel Shader input"),
  2130. (14, "PSOut", "Pixel Shader output"),
  2131. (15, "CSIn", "Compute Shader input"),
  2132. (16, "MSIn", "Mesh Shader input"),
  2133. (17, "MSOut", "Mesh Shader vertices output"),
  2134. (18, "MSPOut", "Mesh Shader primitives output"),
  2135. (19, "ASIn", "Amplification Shader input"),
  2136. (21, "Invalid", ""),
  2137. ])
  2138. self.enums.append(SigPointKind)
  2139. PackingKind = db_dxil_enum("PackingKind", "Kind of signature point", [
  2140. (0, "None", "No packing should be performed"),
  2141. (1, "InputAssembler", "Vertex Shader input from Input Assembler"),
  2142. (2, "Vertex", "Vertex that may feed the Rasterizer"),
  2143. (3, "PatchConstant", "Patch constant signature"),
  2144. (4, "Target", "Render Target (Pixel Shader Output)"),
  2145. (5, "Invalid", ""),
  2146. ])
  2147. Float32DenormMode = db_dxil_enum("Float32DenormMode", "float32 denorm behavior", [
  2148. (0, "Any", "Undefined behavior for denormal numbers"),
  2149. (1, "Preserve", "Preserve both input and output"),
  2150. (2, "FTZ", "Preserve denormal inputs. Flush denorm outputs"),
  2151. (3, "Reserve3", "Reserved Value. Not used for now"),
  2152. (4, "Reserve4", "Reserved Value. Not used for now"),
  2153. (5, "Reserve5", "Reserved Value. Not used for now"),
  2154. (6, "Reserve6", "Reserved Value. Not used for now"),
  2155. (7, "Reserve7", "Reserved Value. Not used for now"),
  2156. ])
  2157. self.enums.append(Float32DenormMode)
  2158. SigPointCSV = """
  2159. SigPoint, Related, ShaderKind, PackingKind, SignatureKind
  2160. VSIn, Invalid, Vertex, InputAssembler, Input
  2161. VSOut, Invalid, Vertex, Vertex, Output
  2162. PCIn, HSCPIn, Hull, None, Invalid
  2163. HSIn, HSCPIn, Hull, None, Invalid
  2164. HSCPIn, Invalid, Hull, Vertex, Input
  2165. HSCPOut, Invalid, Hull, Vertex, Output
  2166. PCOut, Invalid, Hull, PatchConstant, PatchConstOrPrim
  2167. DSIn, Invalid, Domain, PatchConstant, PatchConstOrPrim
  2168. DSCPIn, Invalid, Domain, Vertex, Input
  2169. DSOut, Invalid, Domain, Vertex, Output
  2170. GSVIn, Invalid, Geometry, Vertex, Input
  2171. GSIn, GSVIn, Geometry, None, Invalid
  2172. GSOut, Invalid, Geometry, Vertex, Output
  2173. PSIn, Invalid, Pixel, Vertex, Input
  2174. PSOut, Invalid, Pixel, Target, Output
  2175. CSIn, Invalid, Compute, None, Invalid
  2176. MSIn, Invalid, Mesh, None, Invalid
  2177. MSOut, Invalid, Mesh, Vertex, Output
  2178. MSPOut, Invalid, Mesh, Vertex, PatchConstOrPrim
  2179. ASIn, Invalid, Amplification, None, Invalid
  2180. Invalid, Invalid, Invalid, Invalid, Invalid
  2181. """
  2182. table = [list(map(str.strip, line.split(','))) for line in SigPointCSV.splitlines() if line.strip()]
  2183. for row in table[1:]: assert(len(row) == len(table[0])) # Ensure table is rectangular
  2184. # Make sure labels match enums, otherwise the table isn't aligned or in-sync
  2185. if not ([row[0] for row in table[1:]] == SigPointKind.value_names()):
  2186. assert(False and 'SigPointKind does not align with SigPointCSV row labels')
  2187. self.sigpoint_table = table
  2188. self.enums.append(PackingKind)
  2189. SemanticInterpretationKind = db_dxil_enum("SemanticInterpretationKind", "Defines how a semantic is interpreted at a particular SignaturePoint", [
  2190. (0, "NA", "Not Available"),
  2191. (1, "SV", "Normal System Value"),
  2192. (2, "SGV", "System Generated Value (sorted last)"),
  2193. (3, "Arb", "Treated as Arbitrary"),
  2194. (4, "NotInSig", "Not included in signature (intrinsic access)"),
  2195. (5, "NotPacked", "Included in signature, but does not contribute to packing"),
  2196. (6, "Target", "Special handling for SV_Target"),
  2197. (7, "TessFactor", "Special handling for tessellation factors"),
  2198. (8, "Shadow", "Shadow element must be added to a signature for compatibility"),
  2199. (8, "ClipCull", "Special packing rules for SV_ClipDistance or SV_CullDistance"),
  2200. (9, "Invalid", ""),
  2201. ])
  2202. self.enums.append(SemanticInterpretationKind)
  2203. # The following has SampleIndex, Coverage, and InnerCoverage as loaded with instructions rather than from the signature
  2204. SemanticInterpretationCSV = """
  2205. Semantic,VSIn,VSOut,PCIn,HSIn,HSCPIn,HSCPOut,PCOut,DSIn,DSCPIn,DSOut,GSVIn,GSIn,GSOut,PSIn,PSOut,CSIn,MSIn,MSOut,MSPOut,ASIn
  2206. Arbitrary,Arb,Arb,NA,NA,Arb,Arb,Arb,Arb,Arb,Arb,Arb,NA,Arb,Arb,NA,NA,NA,Arb,Arb,NA
  2207. VertexID,SV,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
  2208. InstanceID,SV,Arb,NA,NA,Arb,Arb,NA,NA,Arb,Arb,Arb,NA,Arb,Arb,NA,NA,NA,NA,NA,NA
  2209. Position,Arb,SV,NA,NA,SV,SV,Arb,Arb,SV,SV,SV,NA,SV,SV,NA,NA,NA,SV,NA,NA
  2210. RenderTargetArrayIndex,Arb,SV,NA,NA,SV,SV,Arb,Arb,SV,SV,SV,NA,SV,SV,NA,NA,NA,NA,SV,NA
  2211. ViewPortArrayIndex,Arb,SV,NA,NA,SV,SV,Arb,Arb,SV,SV,SV,NA,SV,SV,NA,NA,NA,NA,SV,NA
  2212. ClipDistance,Arb,ClipCull,NA,NA,ClipCull,ClipCull,Arb,Arb,ClipCull,ClipCull,ClipCull,NA,ClipCull,ClipCull,NA,NA,NA,ClipCull,NA,NA
  2213. CullDistance,Arb,ClipCull,NA,NA,ClipCull,ClipCull,Arb,Arb,ClipCull,ClipCull,ClipCull,NA,ClipCull,ClipCull,NA,NA,NA,ClipCull,NA,NA
  2214. OutputControlPointID,NA,NA,NA,NotInSig,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
  2215. DomainLocation,NA,NA,NA,NA,NA,NA,NA,NotInSig,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
  2216. PrimitiveID,NA,NA,NotInSig,NotInSig,NA,NA,NA,NotInSig,NA,NA,NA,Shadow,SGV,SGV,NA,NA,NA,NA,SV,NA
  2217. GSInstanceID,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig,NA,NA,NA,NA,NA,NA,NA,NA
  2218. SampleIndex,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Shadow _41,NA,NA,NA,NA,NA,NA
  2219. IsFrontFace,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,SGV,SGV,NA,NA,NA,NA,NA,NA
  2220. Coverage,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig _50,NotPacked _41,NA,NA,NA,NA,NA
  2221. InnerCoverage,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig _50,NA,NA,NA,NA,NA,NA
  2222. Target,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Target,NA,NA,NA,NA,NA
  2223. Depth,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotPacked,NA,NA,NA,NA,NA
  2224. DepthLessEqual,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotPacked _50,NA,NA,NA,NA,NA
  2225. DepthGreaterEqual,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotPacked _50,NA,NA,NA,NA,NA
  2226. StencilRef,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotPacked _50,NA,NA,NA,NA,NA
  2227. DispatchThreadID,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig,NotInSig,NA,NA,NotInSig
  2228. GroupID,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig,NotInSig,NA,NA,NotInSig
  2229. GroupIndex,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig,NotInSig,NA,NA,NotInSig
  2230. GroupThreadID,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig,NotInSig,NA,NA,NotInSig
  2231. TessFactor,NA,NA,NA,NA,NA,NA,TessFactor,TessFactor,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
  2232. InsideTessFactor,NA,NA,NA,NA,NA,NA,TessFactor,TessFactor,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
  2233. ViewID,NotInSig _61,NA,NotInSig _61,NotInSig _61,NA,NA,NA,NotInSig _61,NA,NA,NA,NotInSig _61,NA,NotInSig _61,NA,NA,NotInSig,NA,NA,NA
  2234. Barycentrics,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotPacked _61,NA,NA,NA,NA,NA,NA
  2235. ShadingRate,NA,SV _64,NA,NA,SV _64,SV _64,NA,NA,SV _64,SV _64,SV _64,NA,SV _64,SV _64,NA,NA,NA,NA,SV,NA
  2236. CullPrimitive,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotInSig,NA,NA,NA,NA,NotPacked,NA
  2237. """
  2238. table = [list(map(str.strip, line.split(','))) for line in SemanticInterpretationCSV.splitlines() if line.strip()]
  2239. for row in table[1:]: assert(len(row) == len(table[0])) # Ensure table is rectangular
  2240. # Make sure labels match enums, otherwise the table isn't aligned or in-sync
  2241. assert(table[0][1:] == SigPointKind.value_names()[:-1]) # exclude Invalid
  2242. if not ([row[0] for row in table[1:]] == SemanticKind.value_names()[:-1]): # exclude Invalid
  2243. assert(False and 'SemanticKind does not align with SemanticInterpretationCSV row labels')
  2244. self.interpretation_table = table
  2245. def build_valrules(self):
  2246. self.add_valrule_msg("Bitcode.Valid", "TODO - Module must be bitcode-valid", "Module bitcode is invalid.")
  2247. self.add_valrule_msg("Container.PartMatches", "DXIL Container Parts must match Module", "Container part '%0' does not match expected for module.")
  2248. self.add_valrule_msg("Container.PartRepeated", "DXIL Container must have only one of each part type", "More than one container part '%0'.")
  2249. self.add_valrule_msg("Container.PartMissing", "DXIL Container requires certain parts, corresponding to module", "Missing part '%0' required by module.")
  2250. self.add_valrule_msg("Container.PartInvalid", "DXIL Container must not contain unknown parts", "Unknown part '%0' found in DXIL container.")
  2251. self.add_valrule_msg("Container.RootSignatureIncompatible", "Root Signature in DXIL Container must be compatible with shader", "Root Signature in DXIL container is not compatible with shader.")
  2252. self.add_valrule("Meta.Required", "TODO - Required metadata missing.")
  2253. self.add_valrule_msg("Meta.Known", "Named metadata should be known", "Named metadata '%0' is unknown.")
  2254. self.add_valrule("Meta.Used", "All metadata must be used by dxil.")
  2255. self.add_valrule_msg("Meta.Target", "Target triple must be 'dxil-ms-dx'", "Unknown target triple '%0'.")
  2256. self.add_valrule("Meta.WellFormed", "TODO - Metadata must be well-formed in operand count and types.")
  2257. self.add_valrule("Meta.SemanticLen", "Semantic length must be at least 1 and at most 64.")
  2258. self.add_valrule_msg("Meta.InterpModeValid", "Interpolation mode must be valid", "Invalid interpolation mode for '%0'.")
  2259. self.add_valrule_msg("Meta.SemaKindValid", "Semantic kind must be valid", "Semantic kind for '%0' is invalid.")
  2260. self.add_valrule_msg("Meta.NoSemanticOverlap", "Semantics must not overlap", "Semantic '%0' overlap at %1.")
  2261. self.add_valrule_msg("Meta.SemaKindMatchesName", "Semantic name must match system value, when defined.", "Semantic name %0 does not match System Value kind %1.")
  2262. self.add_valrule_msg("Meta.DuplicateSysValue", "System value may only appear once in signature", "System value %0 appears more than once in the same signature.")
  2263. self.add_valrule_msg("Meta.SemanticIndexMax", "System value semantics have a maximum valid semantic index", "%0 semantic index exceeds maximum (%1).")
  2264. self.add_valrule_msg("Meta.SystemValueRows", "System value may only have 1 row", "rows for system value semantic %0 must be 1.")
  2265. self.add_valrule_msg("Meta.SemanticShouldBeAllocated", "Semantic should have a valid packing location", "%0 Semantic '%1' should have a valid packing location.")
  2266. self.add_valrule_msg("Meta.SemanticShouldNotBeAllocated", "Semantic should have a packing location of -1", "%0 Semantic '%1' should have a packing location of -1.")
  2267. self.add_valrule("Meta.ValueRange", "Metadata value must be within range.")
  2268. self.add_valrule("Meta.FlagsUsage", "Flags must match usage.")
  2269. self.add_valrule("Meta.DenseResIDs", "Resource identifiers must be zero-based and dense.")
  2270. self.add_valrule_msg("Meta.SignatureOverlap", "Signature elements may not overlap in packing location.", "signature element %0 at location (%1,%2) size (%3,%4) overlaps another signature element.")
  2271. self.add_valrule_msg("Meta.SignatureOutOfRange", "Signature elements must fit within maximum signature size", "signature element %0 at location (%1,%2) size (%3,%4) is out of range.")
  2272. self.add_valrule_msg("Meta.SignatureIndexConflict", "Only elements with compatible indexing rules may be packed together", "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.")
  2273. self.add_valrule_msg("Meta.SignatureIllegalComponentOrder", "Component ordering for packed elements must be: arbitrary < system value < system generated value", "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).")
  2274. self.add_valrule_msg("Meta.SignatureDataWidth", "Data width must be identical for all elements packed into the same row.", "signature element %0 at location (%1, %2) size (%3, %4) has data width that differs from another element packed into the same row.")
  2275. self.add_valrule_msg("Meta.IntegerInterpMode", "Interpolation mode on integer must be Constant", "signature element %0 specifies invalid interpolation mode for integer component type.")
  2276. self.add_valrule_msg("Meta.InterpModeInOneRow", "Interpolation mode must be identical for all elements packed into the same row.", "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.")
  2277. self.add_valrule("Meta.SemanticCompType", "%0 must be %1.")
  2278. self.add_valrule_msg("Meta.ClipCullMaxRows", "Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.", "ClipDistance and CullDistance occupy more than the maximum of 2 rows combined.")
  2279. self.add_valrule_msg("Meta.ClipCullMaxComponents", "Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components", "ClipDistance and CullDistance use more than the maximum of 8 components combined.")
  2280. self.add_valrule("Meta.SignatureCompType", "signature %0 specifies unrecognized or invalid component type.")
  2281. self.add_valrule("Meta.TessellatorPartition", "Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.")
  2282. self.add_valrule("Meta.TessellatorOutputPrimitive", "Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.")
  2283. self.add_valrule("Meta.MaxTessFactor", "Hull Shader MaxTessFactor must be [%0..%1]. %2 specified.")
  2284. self.add_valrule("Meta.ValidSamplerMode", "Invalid sampler mode on sampler .")
  2285. self.add_valrule("Meta.GlcNotOnAppendConsume", "globallycoherent cannot be used with append/consume buffers: '%0'.")
  2286. self.add_valrule_msg("Meta.StructBufAlignment", "StructuredBuffer stride not aligned","structured buffer element size must be a multiple of %0 bytes (actual size %1 bytes).")
  2287. self.add_valrule_msg("Meta.StructBufAlignmentOutOfBound", "StructuredBuffer stride out of bounds","structured buffer elements cannot be larger than %0 bytes (actual size %1 bytes).")
  2288. self.add_valrule("Meta.EntryFunction", "entrypoint not found.")
  2289. self.add_valrule("Meta.InvalidControlFlowHint", "Invalid control flow hint.")
  2290. self.add_valrule("Meta.BranchFlatten", "Can't use branch and flatten attributes together.")
  2291. self.add_valrule("Meta.ForceCaseOnSwitch", "Attribute forcecase only works for switch.")
  2292. self.add_valrule("Meta.ControlFlowHintNotOnControlFlow", "Control flow hint only works on control flow inst.")
  2293. self.add_valrule("Meta.TextureType", "elements of typed buffers and textures must fit in four 32-bit quantities.")
  2294. self.add_valrule("Meta.BarycentricsInterpolation", "SV_Barycentrics cannot be used with 'nointerpolation' type.")
  2295. self.add_valrule("Meta.BarycentricsFloat3", "only 'float3' type is allowed for SV_Barycentrics.")
  2296. self.add_valrule("Meta.BarycentricsTwoPerspectives", "There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.")
  2297. self.add_valrule("Meta.NoEntryPropsForEntry", "Entry point %0 must have entry properties.")
  2298. self.add_valrule("Instr.Oload", "DXIL intrinsic overload must be valid.")
  2299. self.add_valrule_msg("Instr.CallOload", "Call to DXIL intrinsic must match overload signature", "Call to DXIL intrinsic '%0' does not match an allowed overload signature.")
  2300. self.add_valrule("Instr.PtrBitCast", "Pointer type bitcast must be have same size.")
  2301. self.add_valrule("Instr.MinPrecisonBitCast", "Bitcast on minprecison types is not allowed.")
  2302. self.add_valrule("Instr.StructBitCast", "Bitcast on struct types is not allowed.")
  2303. self.add_valrule("Instr.Status", "Resource status should only be used by CheckAccessFullyMapped.")
  2304. self.add_valrule("Instr.CheckAccessFullyMapped", "CheckAccessFullyMapped should only be used on resource status.")
  2305. self.add_valrule_msg("Instr.OpConst", "DXIL intrinsic requires an immediate constant operand", "%0 of %1 must be an immediate constant.")
  2306. self.add_valrule("Instr.Allowed", "Instructions must be of an allowed type.")
  2307. self.add_valrule("Instr.OpCodeReserved", "Instructions must not reference reserved opcodes.")
  2308. self.add_valrule_msg("Instr.OperandRange", "DXIL intrinsic operand must be within defined range", "expect %0 between %1, got %2.")
  2309. self.add_valrule("Instr.NoReadingUninitialized", "Instructions should not read uninitialized value.")
  2310. self.add_valrule("Instr.NoGenericPtrAddrSpaceCast", "Address space cast between pointer types must have one part to be generic address space.")
  2311. self.add_valrule("Instr.InBoundsAccess", "Access to out-of-bounds memory is disallowed.")
  2312. self.add_valrule("Instr.OpConstRange", "Constant values must be in-range for operation.")
  2313. self.add_valrule("Instr.ImmBiasForSampleB", "bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.")
  2314. # If streams have not been declared, you must use cut instead of cut_stream in GS - is there an equivalent rule here?
  2315. # Need to clean up all error messages and actually implement.
  2316. # Midlevel
  2317. self.add_valrule("Instr.NoIndefiniteLog", "No indefinite logarithm.")
  2318. self.add_valrule("Instr.NoIndefiniteAsin", "No indefinite arcsine.")
  2319. self.add_valrule("Instr.NoIndefiniteAcos", "No indefinite arccosine.")
  2320. self.add_valrule("Instr.NoIDivByZero", "No signed integer division by zero.")
  2321. self.add_valrule("Instr.NoUDivByZero", "No unsigned integer division by zero.")
  2322. self.add_valrule("Instr.NoIndefiniteDsxy", "No indefinite derivative calculation.")
  2323. self.add_valrule("Instr.MinPrecisionNotPrecise", "Instructions marked precise may not refer to minprecision values.")
  2324. # Backend
  2325. self.add_valrule("Instr.OnlyOneAllocConsume", "RWStructuredBuffers may increment or decrement their counters, but not both.")
  2326. # CCompiler
  2327. self.add_valrule("Instr.TextureOffset", "offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.")
  2328. # D3D12
  2329. self.add_valrule_msg("Instr.CannotPullPosition", "pull-model evaluation of position disallowed", "%0 does not support pull-model evaluation of position.")
  2330. #self.add_valrule("Instr.ERR_GUARANTEED_RACE_CONDITION_UAV", "TODO - race condition writing to shared resource detected, consider making this write conditional.") warning on fxc.
  2331. #self.add_valrule("Instr.ERR_GUARANTEED_RACE_CONDITION_GSM", "TODO - race condition writing to shared memory detected, consider making this write conditional.") warning on fxc.
  2332. #self.add_valrule("Instr.ERR_INFINITE_LOOP", "TODO - ERR_INFINITE_LOOP") fxc will report error if it can prove the loop is infinite.
  2333. self.add_valrule("Instr.EvalInterpolationMode", "Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.")
  2334. self.add_valrule("Instr.ResourceCoordinateMiss", "coord uninitialized.")
  2335. self.add_valrule("Instr.ResourceCoordinateTooMany", "out of bound coord must be undef.")
  2336. self.add_valrule("Instr.ResourceOffsetMiss", "offset uninitialized.")
  2337. self.add_valrule("Instr.ResourceOffsetTooMany", "out of bound offset must be undef.")
  2338. self.add_valrule("Instr.UndefResultForGetDimension", "GetDimensions used undef dimension %0 on %1.")
  2339. self.add_valrule("Instr.SamplerModeForLOD", "lod instruction requires sampler declared in default mode.")
  2340. self.add_valrule("Instr.SamplerModeForSample", "sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.")
  2341. self.add_valrule("Instr.SamplerModeForSampleC", "sample_c_*/gather_c instructions require sampler declared in comparison mode.")
  2342. self.add_valrule("Instr.SampleCompType", "sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.")
  2343. self.add_valrule("Instr.BarrierModeUselessUGroup", "sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.")
  2344. self.add_valrule("Instr.BarrierModeNoMemory", "sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional.")
  2345. self.add_valrule("Instr.BarrierModeForNonCS", "sync in a non-Compute/Amplification/Mesh Shader must only sync UAV (sync_uglobal).")
  2346. self.add_valrule("Instr.WriteMaskForTypedUAVStore", "store on typed uav must write to all four components of the UAV.")
  2347. self.add_valrule("Instr.ResourceKindForCalcLOD","lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.")
  2348. self.add_valrule("Instr.ResourceKindForSample", "sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.")
  2349. self.add_valrule("Instr.ResourceKindForSampleC", "samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.")
  2350. self.add_valrule("Instr.ResourceKindForGather", "gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.")
  2351. self.add_valrule("Instr.WriteMaskMatchValueForUAVStore", "uav store write mask must match store value mask, write mask is %0 and store value mask is %1.")
  2352. self.add_valrule("Instr.UndefinedValueForUAVStore", "Assignment of undefined values to UAV.")
  2353. self.add_valrule("Instr.ResourceKindForBufferLoadStore", "buffer load/store only works on Raw/Typed/StructuredBuffer.")
  2354. self.add_valrule("Instr.ResourceKindForTextureStore", "texture store only works on Texture1D/1DArray/2D/2DArray/3D.")
  2355. self.add_valrule("Instr.ResourceKindForGetDim", "Invalid resource kind on GetDimensions.")
  2356. self.add_valrule("Instr.ResourceKindForTextureLoad", "texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.")
  2357. self.add_valrule("Instr.ResourceClassForSamplerGather", "sample, lod and gather should be on srv resource.")
  2358. self.add_valrule("Instr.ResourceClassForUAVStore", "store should be on uav resource.")
  2359. self.add_valrule("Instr.ResourceClassForLoad", "load can only run on UAV/SRV resource.")
  2360. self.add_valrule("Instr.ResourceMapToSingleEntry", "Fail to map resource to resource table.")
  2361. self.add_valrule("Instr.ResourceUser", "Resource should only be used by Load/GEP/Call.")
  2362. self.add_valrule("Instr.ResourceKindForTraceRay", "TraceRay should only use RTAccelerationStructure.")
  2363. self.add_valrule("Instr.OffsetOnUAVLoad", "uav load don't support offset.")
  2364. self.add_valrule("Instr.MipOnUAVLoad", "uav load don't support mipLevel/sampleIndex.")
  2365. self.add_valrule("Instr.SampleIndexForLoad2DMS", "load on Texture2DMS/2DMSArray require sampleIndex.")
  2366. self.add_valrule("Instr.CoordinateCountForRawTypedBuf", "raw/typed buffer don't need 2 coordinates.")
  2367. self.add_valrule("Instr.CoordinateCountForStructBuf", "structured buffer require 2 coordinates.")
  2368. self.add_valrule("Instr.MipLevelForGetDimension", "Use mip level on buffer when GetDimensions.")
  2369. self.add_valrule("Instr.DxilStructUser", "Dxil struct types should only be used by ExtractValue.")
  2370. self.add_valrule("Instr.DxilStructUserOutOfBound", "Index out of bound when extract value from dxil struct types.")
  2371. self.add_valrule("Instr.HandleNotFromCreateHandle", "Resource handle should returned by createHandle.")
  2372. self.add_valrule("Instr.BufferUpdateCounterOnUAV", "BufferUpdateCounter valid only on UAV.")
  2373. self.add_valrule("Instr.BufferUpdateCounterOnResHasCounter", "BufferUpdateCounter valid only when HasCounter is true.")
  2374. self.add_valrule("Instr.CBufferOutOfBound", "Cbuffer access out of bound.")
  2375. self.add_valrule("Instr.CBufferClassForCBufferHandle", "Expect Cbuffer for CBufferLoad handle.")
  2376. self.add_valrule("Instr.FailToResloveTGSMPointer", "TGSM pointers must originate from an unambiguous TGSM global variable.")
  2377. self.add_valrule("Instr.ExtractValue", "ExtractValue should only be used on dxil struct types and cmpxchg.")
  2378. self.add_valrule("Instr.TGSMRaceCond", "Race condition writing to shared memory detected, consider making this write conditional.")
  2379. self.add_valrule("Instr.AttributeAtVertexNoInterpolation", "Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.")
  2380. self.add_valrule("Instr.CreateHandleImmRangeID", "Local resource must map to global resource.")
  2381. self.add_valrule("Instr.SignatureOperationNotInEntry", "Dxil operation for input output signature must be in entryPoints.")
  2382. self.add_valrule("Instr.MultipleSetMeshOutputCounts", "SetMeshOUtputCounts cannot be called multiple times.")
  2383. self.add_valrule("Instr.MissingSetMeshOutputCounts", "Missing SetMeshOutputCounts call.")
  2384. self.add_valrule("Instr.NonDominatingSetMeshOutputCounts", "Non-Dominating SetMeshOutputCounts call.")
  2385. self.add_valrule("Instr.MultipleGetMeshPayload", "GetMeshPayload cannot be called multiple times.")
  2386. self.add_valrule("Instr.NotOnceDispatchMesh", "DispatchMesh must be called exactly once in an Amplification shader.")
  2387. self.add_valrule("Instr.NonDominatingDispatchMesh", "Non-Dominating DispatchMesh call.")
  2388. # Some legacy rules:
  2389. # - space is only supported for shader targets 5.1 and higher
  2390. # - multiple rules regarding derivatives, which isn't a supported feature for DXIL
  2391. # - multiple rules regarding library functions, which isn't a supported feature for DXIL (at this time)
  2392. # - multiple rules regarding interfaces, which isn't a supported feature for DXIL
  2393. # - rules for DX9-style intrinsics, which aren't supported for DXIL
  2394. self.add_valrule_msg("Types.NoVector", "Vector types must not be present", "Vector type '%0' is not allowed.")
  2395. self.add_valrule_msg("Types.Defined", "Type must be defined based on DXIL primitives", "Type '%0' is not defined on DXIL primitives.")
  2396. self.add_valrule_msg("Types.IntWidth", "Int type must be of valid width", "Int type '%0' has an invalid width.")
  2397. self.add_valrule("Types.NoMultiDim", "Only one dimension allowed for array type.")
  2398. self.add_valrule("Types.NoPtrToPtr", "Pointers to pointers, or pointers in structures are not allowed.")
  2399. self.add_valrule("Types.I8", "I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.")
  2400. self.add_valrule_msg("Sm.Name", "Target shader model name must be known", "Unknown shader model '%0'.")
  2401. self.add_valrule_msg("Sm.DxilVersion", "Target shader model requires specific Dxil Version", "Shader model requires Dxil Version %0,%1.")
  2402. self.add_valrule_msg("Sm.Opcode", "Opcode must be defined in target shader model", "Opcode %0 not valid in shader model %1.")
  2403. self.add_valrule("Sm.Operand", "Operand must be defined in target shader model.")
  2404. self.add_valrule_msg("Sm.Semantic", "Semantic must be defined in target shader model", "Semantic '%0' is invalid as %1 %2.")
  2405. self.add_valrule_msg("Sm.NoInterpMode", "Interpolation mode must be undefined for VS input/PS output/patch constant.", "Interpolation mode for '%0' is set but should be undefined.")
  2406. self.add_valrule_msg("Sm.ConstantInterpMode", "Interpolation mode must be constant for MS primitive output.", "Interpolation mode for '%0' should be constant.")
  2407. self.add_valrule("Sm.NoPSOutputIdx", "Pixel shader output registers are not indexable.")# TODO restrict to PS
  2408. self.add_valrule("Sm.PSConsistentInterp", "Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).")
  2409. self.add_valrule("Sm.ThreadGroupChannelRange", "Declared Thread Group %0 size %1 outside valid range [%2..%3].")
  2410. self.add_valrule("Sm.MaxTheadGroup", "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.")
  2411. self.add_valrule("Sm.MaxTGSMSize", "Total Thread Group Shared Memory storage is %0, exceeded %1.")
  2412. self.add_valrule("Sm.WaveSizeValue", "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.")
  2413. self.add_valrule("Sm.WaveSizeNeedsDxil16Plus", "WaveSize is valid only for DXIL version 1.6 and higher.")
  2414. self.add_valrule("Sm.ROVOnlyInPS", "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.")
  2415. self.add_valrule("Sm.TessFactorForDomain", "Required TessFactor for domain not found declared anywhere in Patch Constant data.")
  2416. self.add_valrule("Sm.TessFactorSizeMatchDomain", "TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column.")
  2417. self.add_valrule("Sm.InsideTessFactorSizeMatchDomain", "InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column.")
  2418. self.add_valrule("Sm.DomainLocationIdxOOB", "DomainLocation component index out of bounds for the domain.")
  2419. self.add_valrule("Sm.HullPassThruControlPointCountMatch", "For pass thru hull shader, input control point count must match output control point count");
  2420. self.add_valrule("Sm.OutputControlPointsTotalScalars", "Total number of scalars across all HS output control points must not exceed .")
  2421. self.add_valrule("Sm.IsoLineOutputPrimitiveMismatch", "Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.")
  2422. self.add_valrule("Sm.TriOutputPrimitiveMismatch", "Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.")
  2423. self.add_valrule("Sm.ValidDomain", "Invalid Tessellator Domain specified. Must be isoline, tri or quad.")
  2424. self.add_valrule("Sm.PatchConstantOnlyForHSDS", "patch constant signature only valid in HS and DS.")
  2425. self.add_valrule("Sm.StreamIndexRange", "Stream index (%0) must between 0 and %1.")
  2426. self.add_valrule("Sm.PSOutputSemantic", "Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.")
  2427. self.add_valrule("Sm.PSMultipleDepthSemantic", "Pixel Shader only allows one type of depth semantic to be declared.")
  2428. self.add_valrule("Sm.PSTargetIndexMatchesRow", "SV_Target semantic index must match packed row location.")
  2429. self.add_valrule("Sm.PSTargetCol0", "SV_Target packed location must start at column 0.")
  2430. self.add_valrule("Sm.PSCoverageAndInnerCoverage", "InnerCoverage and Coverage are mutually exclusive.")
  2431. self.add_valrule("Sm.GSOutputVertexCountRange", "GS output vertex count must be [0..%0]. %1 specified.")
  2432. self.add_valrule("Sm.GSInstanceCountRange", "GS instance count must be [1..%0]. %1 specified.")
  2433. self.add_valrule("Sm.DSInputControlPointCountRange", "DS input control point count must be [0..%0]. %1 specified.")
  2434. self.add_valrule("Sm.HSInputControlPointCountRange", "HS input control point count must be [0..%0]. %1 specified.")
  2435. self.add_valrule("Sm.ZeroHSInputControlPointWithInput", "When HS input control point count is 0, no input signature should exist.")
  2436. self.add_valrule("Sm.OutputControlPointCountRange", "output control point count must be [0..%0]. %1 specified.")
  2437. self.add_valrule("Sm.GSValidInputPrimitive", "GS input primitive unrecognized.")
  2438. self.add_valrule("Sm.GSValidOutputPrimitiveTopology", "GS output primitive topology unrecognized.")
  2439. self.add_valrule("Sm.AppendAndConsumeOnSameUAV", "BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.")
  2440. self.add_valrule("Sm.InvalidTextureKindOnUAV", "Texture2DMS[Array] or TextureCube[Array] resources are not supported with UAVs.")
  2441. self.add_valrule("Sm.InvalidResourceKind", "Invalid resources kind.")
  2442. self.add_valrule("Sm.InvalidResourceCompType","Invalid resource return type.")
  2443. self.add_valrule("Sm.InvalidSamplerFeedbackType","Invalid sampler feedback type.")
  2444. self.add_valrule("Sm.SampleCountOnlyOn2DMS","Only Texture2DMS/2DMSArray could has sample count.")
  2445. self.add_valrule("Sm.CounterOnlyOnStructBuf", "BufferUpdateCounter valid only on structured buffers.")
  2446. self.add_valrule("Sm.GSTotalOutputVertexDataRange", "Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3.")
  2447. self.add_valrule_msg("Sm.MultiStreamMustBePoint", "When multiple GS output streams are used they must be pointlists", "Multiple GS output streams are used but '%0' is not pointlist.")
  2448. self.add_valrule("Sm.CompletePosition", "Not all elements of SV_Position were written.")
  2449. self.add_valrule("Sm.UndefinedOutput", "Not all elements of output %0 were written.")
  2450. self.add_valrule("Sm.CSNoSignatures", "Compute shaders must not have shader signatures.")
  2451. self.add_valrule("Sm.CBufferTemplateTypeMustBeStruct", "D3D12 constant/texture buffer template element can only be a struct.")
  2452. self.add_valrule_msg("Sm.ResourceRangeOverlap", "Resource ranges must not overlap", "Resource %0 with base %1 size %2 overlap with other resource with base %3 size %4 in space %5.")
  2453. self.add_valrule_msg("Sm.CBufferOffsetOverlap", "CBuffer offsets must not overlap", "CBuffer %0 has offset overlaps at %1.")
  2454. self.add_valrule_msg("Sm.CBufferElementOverflow", "CBuffer elements must not overflow", "CBuffer %0 size insufficient for element at offset %1.")
  2455. self.add_valrule_msg("Sm.CBufferArrayOffsetAlignment", "CBuffer array offset must be aligned to 16-bytes", "CBuffer %0 has unaligned array offset at %1.")
  2456. self.add_valrule_msg("Sm.OpcodeInInvalidFunction", "Invalid DXIL opcode usage like StorePatchConstant in patch constant function", "opcode '%0' should only be used in '%1'.")
  2457. self.add_valrule_msg("Sm.ViewIDNeedsSlot", "ViewID requires compatible space in pixel shader input signature", "Pixel shader input signature lacks available space for ViewID.")
  2458. self.add_valrule("Sm.64bitRawBufferLoadStore", "i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.")
  2459. self.add_valrule("Sm.RayShaderSignatures", "Ray tracing shader '%0' should not have any shader signatures.")
  2460. self.add_valrule("Sm.RayShaderPayloadSize", "For shader '%0', %1 size is smaller than argument's allocation size.")
  2461. self.add_valrule("Sm.MeshShaderMaxVertexCount", "MS max vertex output count must be [0..%0]. %1 specified.")
  2462. self.add_valrule("Sm.MeshShaderMaxPrimitiveCount", "MS max primitive output count must be [0..%0]. %1 specified.")
  2463. self.add_valrule("Sm.MeshShaderPayloadSize", "For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.")
  2464. self.add_valrule("Sm.MeshShaderPayloadSizeDeclared", "For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.")
  2465. self.add_valrule("Sm.MeshShaderOutputSize", "For shader '%0', vertex plus primitive output size is greater than %1.")
  2466. self.add_valrule("Sm.MeshShaderInOutSize", "For shader '%0', payload plus output size is greater than %1.")
  2467. self.add_valrule("Sm.MeshVSigRowCount", "For shader '%0', vertex output signatures are taking up more than %1 rows.")
  2468. self.add_valrule("Sm.MeshPSigRowCount", "For shader '%0', primitive output signatures are taking up more than %1 rows.")
  2469. self.add_valrule("Sm.MeshTotalSigRowCount", "For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.")
  2470. self.add_valrule("Sm.MaxMSSMSize", "Total Thread Group Shared Memory storage is %0, exceeded %1.")
  2471. self.add_valrule("Sm.AmplificationShaderPayloadSize", "For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.")
  2472. self.add_valrule("Sm.AmplificationShaderPayloadSizeDeclared", "For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.")
  2473. # fxc relaxed check of gradient check.
  2474. #self.add_valrule("Uni.NoUniInDiv", "TODO - No instruction requiring uniform execution can be present in divergent block")
  2475. #self.add_valrule("Uni.GradientFlow", "TODO - No divergent gradient operations inside flow control") # a bit more specific than the prior rule
  2476. #self.add_valrule("Uni.ThreadSync", "TODO - Thread sync operation must be in non-varying flow control due to a potential race condition, adding a sync after reading any values controlling shader execution at this point")
  2477. #self.add_valrule("Uni.NoWaveSensitiveGradient", "Gradient operations are not affected by wave-sensitive data or control flow.")
  2478. self.add_valrule("Flow.Reducible", "Execution flow must be reducible.")
  2479. self.add_valrule("Flow.NoRecusion", "Recursion is not permitted.")
  2480. self.add_valrule("Flow.DeadLoop", "Loop must have break.")
  2481. self.add_valrule_msg("Flow.FunctionCall", "Function with parameter is not permitted", "Function %0 with parameter is not permitted, it should be inlined.")
  2482. self.add_valrule_msg("Decl.DxilNsReserved", "The DXIL reserved prefixes must only be used by built-in functions and types", "Declaration '%0' uses a reserved prefix.")
  2483. self.add_valrule_msg("Decl.DxilFnExtern", "External function must be a DXIL function", "External function '%0' is not a DXIL function.")
  2484. self.add_valrule_msg("Decl.UsedInternal", "Internal declaration must be used", "Internal declaration '%0' is unused.")
  2485. self.add_valrule_msg("Decl.NotUsedExternal", "External declaration should not be used", "External declaration '%0' is unused.")
  2486. self.add_valrule_msg("Decl.UsedExternalFunction", "External function must be used", "External function '%0' is unused.")
  2487. self.add_valrule_msg("Decl.FnIsCalled", "Functions can only be used by call instructions", "Function '%0' is used for something other than calling.")
  2488. self.add_valrule_msg("Decl.FnFlattenParam", "Function parameters must not use struct types", "Type '%0' is a struct type but is used as a parameter in function '%1'.")
  2489. self.add_valrule_msg("Decl.FnAttribute", "Functions should only contain known function attributes", "Function '%0' contains invalid attribute '%1' with value '%2'.")
  2490. self.add_valrule_msg("Decl.ResourceInFnSig", "Resources not allowed in function signatures", "Function '%0' uses resource in function signature.")
  2491. self.add_valrule_msg("Decl.PayloadStruct", "Payload parameter must be struct type", "Argument '%0' must be a struct type for payload in shader function '%1'.")
  2492. self.add_valrule_msg("Decl.AttrStruct", "Attributes parameter must be struct type", "Argument '%0' must be a struct type for attributes in shader function '%1'.")
  2493. self.add_valrule_msg("Decl.ParamStruct", "Callable function parameter must be struct type", "Argument '%0' must be a struct type for callable shader function '%1'.")
  2494. self.add_valrule_msg("Decl.ExtraArgs", "Extra arguments not allowed for shader functions", "Extra argument '%0' not allowed for shader function '%1'.")
  2495. self.add_valrule_msg("Decl.ShaderReturnVoid", "Shader functions must return void", "Shader function '%0' must have void return type.")
  2496. self.add_valrule_msg("Decl.ShaderMissingArg", "payload/params/attributes parameter is required for certain shader types", "%0 shader '%1' missing required %2 parameter.")
  2497. # Assign sensible category names and build up an enumeration description
  2498. cat_names = {
  2499. "CONTAINER": "Container",
  2500. "BITCODE": "Bitcode",
  2501. "META": "Metadata",
  2502. "INSTR": "Instruction",
  2503. "FLOW": "Program flow",
  2504. "TYPES": "Type system",
  2505. "SM": "Shader model",
  2506. "UNI": "Uniform analysis",
  2507. "DECL": "Declaration"
  2508. }
  2509. valrule_enum = db_dxil_enum("ValidationRule", "Known validation rules")
  2510. valrule_enum.is_internal = True
  2511. for vr in self.val_rules:
  2512. vr.category = cat_names[vr.group_name]
  2513. vrval = db_dxil_enum_value(vr.enum_name, vr.rule_id, vr.doc)
  2514. vrval.category = vr.category
  2515. vrval.err_msg = vr.err_msg
  2516. valrule_enum.values.append(vrval)
  2517. self.enums.append(valrule_enum)
  2518. def populate_counters(self):
  2519. self.llvm_op_counters = set()
  2520. self.dxil_op_counters = set()
  2521. for i in self.instr:
  2522. counters = getattr(i, 'props', {}).get('counters', ())
  2523. if i.dxil_opid:
  2524. self.dxil_op_counters.update(counters)
  2525. else:
  2526. self.llvm_op_counters.update(counters)
  2527. counter_set = set(self.counters)
  2528. counter_set.update(self.llvm_op_counters)
  2529. counter_set.update(self.dxil_op_counters)
  2530. self.counters = list(sorted(counter_set))
  2531. def add_valrule(self, name, desc):
  2532. self.val_rules.append(db_dxil_valrule(name, len(self.val_rules), err_msg=desc, doc=desc))
  2533. def add_valrule_msg(self, name, desc, err_msg):
  2534. self.val_rules.append(db_dxil_valrule(name, len(self.val_rules), err_msg=err_msg, doc=desc))
  2535. def add_llvm_instr(self, kind, llvm_id, name, llvm_name, doc, oload_types, op_params, **props):
  2536. i = db_dxil_inst(name, llvm_id=llvm_id, llvm_name=llvm_name, doc=doc, ops=op_params, oload_types=oload_types)
  2537. if kind == "TERM": i.is_bb_terminator=True
  2538. if kind == "BINARY": i.is_binary=True
  2539. if kind == "MEMORY": i.is_memory=True
  2540. if kind == "CAST": i.is_cast=True
  2541. i.props = props
  2542. self.instr.append(i)
  2543. def add_dxil_op(self, name, code_id, code_class, doc, oload_types, fn_attr, op_params, **props):
  2544. # The return value is parameter 0, insert the opcode as 1.
  2545. op_params.insert(1, self.opcode_param)
  2546. i = db_dxil_inst(name,
  2547. llvm_id=self.call_instr.llvm_id, llvm_name=self.call_instr.llvm_name,
  2548. dxil_op=name, dxil_opid=code_id, doc=doc, ops=op_params, dxil_class=code_class,
  2549. oload_types=oload_types, fn_attr=fn_attr)
  2550. i.props = props
  2551. self.instr.append(i)
  2552. def add_dxil_op_reserved(self, name, code_id):
  2553. # The return value is parameter 0, insert the opcode as 1.
  2554. op_params = [db_dxil_param(0, "v", "", "reserved"), self.opcode_param]
  2555. i = db_dxil_inst(name,
  2556. llvm_id=self.call_instr.llvm_id, llvm_name=self.call_instr.llvm_name,
  2557. dxil_op=name, dxil_opid=code_id, doc="reserved", ops=op_params, dxil_class="Reserved",
  2558. oload_types="v", fn_attr="")
  2559. self.instr.append(i)
  2560. def get_instr_by_llvm_name(self, llvm_name):
  2561. "Return the instruction with the given LLVM name"
  2562. return next(i for i in self.instr if i.llvm_name == llvm_name)
  2563. def get_dxil_insts(self):
  2564. for i in self.instr:
  2565. if i.dxil_op != "":
  2566. yield i
  2567. def print_stats(self):
  2568. "Print some basic statistics on the instruction database."
  2569. print ("Instruction count: %d" % len(self.instr))
  2570. print ("Max parameter count in instruction: %d" % max(len(i.ops) - 1 for i in self.instr))
  2571. print ("Parameter count: %d" % sum(len(i.ops) - 1 for i in self.instr))
  2572. ###############################################################################
  2573. # HLSL-specific information. #
  2574. ###############################################################################
  2575. class db_hlsl_attribute(object):
  2576. "An HLSL attribute declaration"
  2577. def __init__(self, title_name, scope, args, doc):
  2578. self.name = title_name.lower() # lowercase attribute name
  2579. self.title_name = title_name # title-case attribute name
  2580. self.scope = scope # one of l (loop), c (condition), s (switch), f (function)
  2581. self.args = args # list of arguments
  2582. self.doc = doc # documentation
  2583. class db_hlsl_intrinsic(object):
  2584. "An HLSL intrinsic declaration"
  2585. def __init__(self, name, idx, opname, params, ns, ns_idx, doc, ro, rn, wv, unsigned_op, overload_idx, hidden):
  2586. self.name = name # Function name
  2587. self.idx = idx # Unique number within namespace
  2588. self.opname = opname # D3D-style name
  2589. self.params = params # List of parameters
  2590. self.ns = ns # Function namespace
  2591. self.ns_idx = ns_idx # Namespace index
  2592. self.doc = doc # Documentation
  2593. id_prefix = "IOP" if ns == "Intrinsics" else "MOP"
  2594. # SPIR-V Change Starts
  2595. if ns == "VkIntrinsics":
  2596. name = "Vk" + name
  2597. self.name = "Vk" + self.name
  2598. id_prefix = "IOP"
  2599. # SPIR-V Change Ends
  2600. self.enum_name = "%s_%s" % (id_prefix, name) # enum name
  2601. self.readonly = ro # Only read memory
  2602. self.readnone = rn # Not read memory
  2603. self.wave = wv # Is wave-sensitive
  2604. self.unsigned_op = unsigned_op # Unsigned opcode if exist
  2605. if unsigned_op != "":
  2606. self.unsigned_op = "%s_%s" % (id_prefix, unsigned_op)
  2607. self.overload_param_index = overload_idx # Parameter determines the overload type, -1 means ret type
  2608. self.hidden = hidden # Internal high-level op, not exposed to HLSL
  2609. self.key = ("%3d" % ns_idx) + "!" + name + "!" + ("%2d" % len(params)) + "!" + ("%3d" % idx) # Unique key
  2610. self.vulkanSpecific = ns.startswith("Vk") # Vulkan specific intrinsic - SPIRV change
  2611. class db_hlsl_namespace(object):
  2612. "A grouping of HLSL intrinsics"
  2613. def __init__(self, name):
  2614. self.name = name
  2615. self.intrinsics = []
  2616. class db_hlsl_intrisic_param(object):
  2617. "An HLSL parameter declaration for an intrinsic"
  2618. def __init__(self, name, param_qual, template_id, template_list, component_id, component_list, rows, cols, type_name, idx, template_id_idx, component_id_idx):
  2619. self.name = name # Parameter name
  2620. self.param_qual = param_qual # Parameter qualifier expressions
  2621. self.template_id = template_id # Template ID (possibly identifier)
  2622. self.template_list = template_list # Template list (possibly identifier)
  2623. self.component_id = component_id # Component ID (possibly identifier)
  2624. self.component_list = component_list # Component list (possibly identifier)
  2625. self.rows = rows # Row count for parameter, possibly identifier
  2626. self.cols = cols # Row count for parameter, possibly identifier
  2627. self.type_name = type_name # Type name
  2628. self.idx = idx # Argument index
  2629. self.template_id_idx = template_id_idx # Template ID numeric value
  2630. self.component_id_idx = component_id_idx # Component ID numeric value
  2631. class db_hlsl(object):
  2632. "A database of HLSL language data"
  2633. def __init__(self, intrinsic_defs):
  2634. self.base_types = {
  2635. "bool": "LICOMPTYPE_BOOL",
  2636. "int": "LICOMPTYPE_INT",
  2637. "int32_only": "LICOMPTYPE_INT32_ONLY",
  2638. "int64_only": "LICOMPTYPE_INT64_ONLY",
  2639. "int16_t": "LICOMPTYPE_INT16",
  2640. "uint": "LICOMPTYPE_UINT",
  2641. "uint16_t": "LICOMPTYPE_UINT16",
  2642. "u64": "LICOMPTYPE_UINT64",
  2643. "any_int": "LICOMPTYPE_ANY_INT",
  2644. "any_int32": "LICOMPTYPE_ANY_INT32",
  2645. "any_int64": "LICOMPTYPE_ANY_INT64",
  2646. "uint_only": "LICOMPTYPE_UINT_ONLY",
  2647. "int8_t4_packed": "LICOMPTYPE_INT8_4PACKED",
  2648. "uint8_t4_packed": "LICOMPTYPE_UINT8_4PACKED",
  2649. "float16_t": "LICOMPTYPE_FLOAT16",
  2650. "float": "LICOMPTYPE_FLOAT",
  2651. "float32_only": "LICOMPTYPE_FLOAT32_ONLY",
  2652. "fldbl": "LICOMPTYPE_FLOAT_DOUBLE",
  2653. "any_float": "LICOMPTYPE_ANY_FLOAT",
  2654. "float_like": "LICOMPTYPE_FLOAT_LIKE",
  2655. "double": "LICOMPTYPE_DOUBLE",
  2656. "double_only": "LICOMPTYPE_DOUBLE_ONLY",
  2657. "numeric": "LICOMPTYPE_NUMERIC",
  2658. "numeric16_only": "LICOMPTYPE_NUMERIC16_ONLY",
  2659. "numeric32": "LICOMPTYPE_NUMERIC32",
  2660. "numeric32_only": "LICOMPTYPE_NUMERIC32_ONLY",
  2661. "any": "LICOMPTYPE_ANY",
  2662. "sampler1d": "LICOMPTYPE_SAMPLER1D",
  2663. "sampler2d": "LICOMPTYPE_SAMPLER2D",
  2664. "sampler3d": "LICOMPTYPE_SAMPLER3D",
  2665. "sampler_cube": "LICOMPTYPE_SAMPLERCUBE",
  2666. "sampler_cmp": "LICOMPTYPE_SAMPLERCMP",
  2667. "sampler": "LICOMPTYPE_SAMPLER",
  2668. "resource": "LICOMPTYPE_RESOURCE",
  2669. "ray_desc" : "LICOMPTYPE_RAYDESC",
  2670. "acceleration_struct" : "LICOMPTYPE_ACCELERATION_STRUCT",
  2671. "udt" : "LICOMPTYPE_USER_DEFINED_TYPE",
  2672. "void": "LICOMPTYPE_VOID",
  2673. "string": "LICOMPTYPE_STRING",
  2674. "Texture2D": "LICOMPTYPE_TEXTURE2D",
  2675. "Texture2DArray": "LICOMPTYPE_TEXTURE2DARRAY",
  2676. "wave": "LICOMPTYPE_WAVE",
  2677. "p32i8" : "LICOMPTYPE_INT8_4PACKED",
  2678. "p32u8" : "LICOMPTYPE_UINT8_4PACKED",
  2679. "any_int16or32": "LICOMPTYPE_ANY_INT16_OR_32",
  2680. "sint16or32_only": "LICOMPTYPE_SINT16_OR_32_ONLY",
  2681. }
  2682. self.trans_rowcol = {
  2683. "r": "IA_R",
  2684. "c": "IA_C",
  2685. "r2": "IA_R2",
  2686. "c2": "IA_C2"}
  2687. self.param_qual = {
  2688. "in": "AR_QUAL_IN",
  2689. "inout": "AR_QUAL_IN | AR_QUAL_OUT",
  2690. "out": "AR_QUAL_OUT",
  2691. "col_major": "AR_QUAL_COLMAJOR",
  2692. "row_major": "AR_QUAL_ROWMAJOR"}
  2693. self.intrinsics = []
  2694. self.load_intrinsics(intrinsic_defs)
  2695. self.create_namespaces()
  2696. self.populate_attributes()
  2697. self.opcode_namespace = "hlsl::IntrinsicOp"
  2698. def create_namespaces(self):
  2699. last_ns = None
  2700. self.namespaces = {}
  2701. for i in sorted(self.intrinsics, key=lambda x: x.key):
  2702. if last_ns is None or last_ns.name != i.ns:
  2703. last_ns = db_hlsl_namespace(i.ns)
  2704. self.namespaces[i.ns] = last_ns
  2705. last_ns.intrinsics.append(i)
  2706. def load_intrinsics(self, intrinsic_defs):
  2707. import re
  2708. blank_re = re.compile(r"^\s*$")
  2709. comment_re = re.compile(r"^\s*//")
  2710. namespace_beg_re = re.compile(r"^namespace\s+(\w+)\s*{\s*$")
  2711. namespace_end_re = re.compile(r"^}\s*namespace\s*$")
  2712. intrinsic_re = re.compile(r"^\s*([^(]+)\s+\[\[(\S*)\]\]\s+(\w+)\s*\(\s*([^)]*)\s*\)\s*(:\s*\w+\s*)?;$")
  2713. operand_re = re.compile(r"^:\s*(\w+)\s*$")
  2714. bracket_cleanup_re = re.compile(r"<\s*(\S+)\s*,\s*(\S+)\s*>") # change <a,b> to <a@> to help split params and parse
  2715. params_split_re = re.compile(r"\s*,\s*")
  2716. ws_split_re = re.compile(r"\s+")
  2717. typeref_re = re.compile(r"\$type(\d+)$")
  2718. type_matrix_re = re.compile(r"(\S+)<(\S+)@(\S+)>$")
  2719. type_vector_re = re.compile(r"(\S+)<(\S+)>$")
  2720. type_any_re = re.compile(r"(\S+)<>$")
  2721. digits_re = re.compile(r"^\d+$")
  2722. opt_param_match_re = re.compile(r"^\$match<(\S+)@(\S+)>$")
  2723. ns_idx = 0
  2724. num_entries = 0
  2725. def add_flag(val, new_val):
  2726. if val == "" or val == "0":
  2727. return new_val
  2728. return val + " | " + new_val
  2729. def translate_rowcol(val):
  2730. digits_match = digits_re.match(val)
  2731. if digits_match:
  2732. return val
  2733. assert val in self.trans_rowcol, "unknown row/col %s" % val
  2734. return self.trans_rowcol[val]
  2735. def process_arg(desc, idx, done_args, intrinsic_name):
  2736. "Process a single parameter description."
  2737. opt_list = []
  2738. desc = desc.strip()
  2739. if desc == "...":
  2740. param_name = "..."
  2741. type_name = "..."
  2742. else:
  2743. opt_list = ws_split_re.split(desc)
  2744. assert len(opt_list) > 0, "malformed parameter desc %s" % (desc)
  2745. param_name = opt_list.pop() # last token is name
  2746. type_name = opt_list.pop() # next-to-last is type specifier
  2747. param_qual = "0"
  2748. template_id = str(idx)
  2749. template_list = "LITEMPLATE_ANY"
  2750. component_id = str(idx)
  2751. component_list = "LICOMPTYPE_ANY"
  2752. rows = "1"
  2753. cols = "1"
  2754. if type_name == "$classT":
  2755. assert idx == 0, "'$classT' can only be used as the return type"
  2756. # template_id may be -1 in other places other than return type, for example in Stream.Append().
  2757. # $unspec is a shorthand for return types only though.
  2758. template_id = "-1"
  2759. component_id = "0"
  2760. type_name = "void"
  2761. if type_name == "$funcT":
  2762. template_id = "-3"
  2763. component_id = "0"
  2764. type_name = "void"
  2765. elif type_name == "...":
  2766. assert idx != 0, "'...' can only be used in the parameter list"
  2767. template_id = "-2"
  2768. component_id = "0"
  2769. type_name = "void"
  2770. else:
  2771. typeref_match = typeref_re.match(type_name)
  2772. if typeref_match:
  2773. template_id = typeref_match.group(1)
  2774. component_id = template_id
  2775. assert idx != 1, "Can't use $type on the first argument"
  2776. assert template_id != "0", "Can't match an input to the return type"
  2777. done_idx = int(template_id) - 1
  2778. assert done_idx <= len(args) + 1, "$type must refer to a processed arg"
  2779. done_arg = done_args[done_idx]
  2780. type_name = done_arg.type_name
  2781. # Determine matrix/vector/any/scalar type names.
  2782. type_matrix_match = type_matrix_re.match(type_name)
  2783. if type_matrix_match:
  2784. base_type = type_matrix_match.group(1)
  2785. rows = type_matrix_match.group(2)
  2786. cols = type_matrix_match.group(3)
  2787. template_list = "LITEMPLATE_MATRIX"
  2788. else:
  2789. type_vector_match = type_vector_re.match(type_name)
  2790. if type_vector_match:
  2791. base_type = type_vector_match.group(1)
  2792. cols = type_vector_match.group(2)
  2793. template_list = "LITEMPLATE_VECTOR"
  2794. else:
  2795. type_any_match = type_any_re.match(type_name)
  2796. if type_any_match:
  2797. base_type = type_any_match.group(1)
  2798. rows = "r"
  2799. cols = "c"
  2800. template_list = "LITEMPLATE_ANY"
  2801. else:
  2802. base_type = type_name
  2803. if base_type.startswith("sampler") or base_type.startswith("string") or base_type.startswith("Texture") or base_type.startswith("wave") or base_type.startswith("acceleration_struct") or base_type.startswith("ray_desc"):
  2804. template_list = "LITEMPLATE_OBJECT"
  2805. else:
  2806. template_list = "LITEMPLATE_SCALAR"
  2807. assert base_type in self.base_types, "Unknown base type '%s' in '%s'" % (base_type, desc)
  2808. component_list = self.base_types[base_type]
  2809. rows = translate_rowcol(rows)
  2810. cols = translate_rowcol(cols)
  2811. for opt in opt_list:
  2812. if opt in self.param_qual:
  2813. param_qual = add_flag(param_qual, self.param_qual[opt])
  2814. else:
  2815. opt_param_match_match = opt_param_match_re.match(opt)
  2816. assert opt_param_match_match, "Unknown parameter qualifier '%s'" % (opt)
  2817. template_id = opt_param_match_match.group(1)
  2818. component_id = opt_param_match_match.group(2)
  2819. if component_list == "LICOMPTYPE_VOID":
  2820. if type_name == "void":
  2821. template_list = "LITEMPLATE_VOID"
  2822. rows = "0"
  2823. cols = "0"
  2824. if template_id == "0":
  2825. param_qual = "0"
  2826. # Keep these as numeric values.
  2827. template_id_idx = int(template_id)
  2828. component_id_idx = int(component_id)
  2829. # Verify that references don't point to the right (except for the return value).
  2830. assert idx == 0 or template_id_idx <= int(idx), "Argument '%s' has a forward reference" % (param_name)
  2831. assert idx == 0 or component_id_idx <= int(idx), "Argument '%s' has a forward reference" % (param_name)
  2832. if template_id == "-1":
  2833. template_id = "INTRIN_TEMPLATE_FROM_TYPE"
  2834. elif template_id == "-2":
  2835. template_id = "INTRIN_TEMPLATE_VARARGS"
  2836. elif template_id == "-3":
  2837. template_id = "INTRIN_TEMPLATE_FROM_FUNCTION"
  2838. if component_id == "-1":
  2839. component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0"
  2840. return db_hlsl_intrisic_param(param_name, param_qual, template_id, template_list, component_id, component_list, rows, cols, type_name, idx, template_id_idx, component_id_idx)
  2841. def process_attr(attr):
  2842. attrs = attr.split(',')
  2843. readonly = False # Only read memory
  2844. readnone = False # Not read memory
  2845. is_wave = False; # Is wave-sensitive
  2846. unsigned_op = "" # Unsigned opcode if exist
  2847. overload_param_index = -1 # Parameter determines the overload type, -1 means ret type.
  2848. hidden = False
  2849. for a in attrs:
  2850. if (a == ""):
  2851. continue
  2852. if (a == "ro"):
  2853. readonly = True
  2854. continue
  2855. if (a == "rn"):
  2856. readnone = True
  2857. continue
  2858. if (a == "wv"):
  2859. is_wave = True
  2860. continue
  2861. if (a == "hidden"):
  2862. hidden = True
  2863. continue
  2864. assign = a.split('=')
  2865. if (len(assign) != 2):
  2866. assert False, "invalid attr %s" % (a)
  2867. continue
  2868. d = assign[0]
  2869. v = assign[1]
  2870. if (d == "unsigned_op"):
  2871. unsigned_op = v
  2872. continue
  2873. if (d == "overload"):
  2874. overload_param_index = int(v)
  2875. continue
  2876. assert False, "invalid attr %s" % (a)
  2877. return readonly, readnone, is_wave, unsigned_op, overload_param_index, hidden
  2878. current_namespace = None
  2879. for line in intrinsic_defs:
  2880. if blank_re.match(line): continue
  2881. if comment_re.match(line): continue
  2882. match_obj = namespace_beg_re.match(line)
  2883. if match_obj:
  2884. assert not current_namespace, "cannot open namespace without closing prior one"
  2885. current_namespace = match_obj.group(1)
  2886. num_entries = 0
  2887. ns_idx += 1
  2888. continue
  2889. if namespace_end_re.match(line):
  2890. assert current_namespace, "cannot close namespace without previously opening it"
  2891. current_namespace = None
  2892. continue
  2893. match_obj = intrinsic_re.match(line)
  2894. if match_obj:
  2895. assert current_namespace, "instruction missing namespace %s" % (line)
  2896. # Get a D3D-style operand name for the instruction.
  2897. # Unused for DXIL.
  2898. opts = match_obj.group(1)
  2899. attr = match_obj.group(2)
  2900. name = match_obj.group(3)
  2901. params = match_obj.group(4)
  2902. op = match_obj.group(5)
  2903. if op:
  2904. operand_match = operand_re.match(op)
  2905. if operand_match:
  2906. op = operand_match.group(1)
  2907. if not op:
  2908. op = name
  2909. readonly, readnone, is_wave, unsigned_op, overload_param_index, hidden = process_attr(attr)
  2910. # Add an entry for this intrinsic.
  2911. if bracket_cleanup_re.search(opts):
  2912. opts = bracket_cleanup_re.sub(r"<\1@\2>", opts)
  2913. if bracket_cleanup_re.search(params):
  2914. params = bracket_cleanup_re.sub(r"<\g<1>@\2>", params)
  2915. ret_desc = "out " + opts + " " + name
  2916. if len(params) > 0:
  2917. in_args = params_split_re.split(params)
  2918. else:
  2919. in_args = []
  2920. arg_idx = 1
  2921. args = []
  2922. for in_arg in in_args:
  2923. args.append(process_arg(in_arg, arg_idx, args, name))
  2924. arg_idx += 1
  2925. # We have to process the return type description last
  2926. # to match the compiler's handling of it and allow
  2927. # the return type to match an input type.
  2928. # It needs to be the first entry, so prepend it.
  2929. args.insert(0, process_arg(ret_desc, 0, args, name))
  2930. # TODO: verify a single level of indirection
  2931. self.intrinsics.append(db_hlsl_intrinsic(
  2932. name, num_entries, op, args, current_namespace, ns_idx, "pending doc for " + name,
  2933. readonly, readnone, is_wave, unsigned_op, overload_param_index, hidden))
  2934. num_entries += 1
  2935. continue
  2936. assert False, "cannot parse line %s" % (line)
  2937. def populate_attributes(self):
  2938. "Populate basic definitions for attributes."
  2939. attributes = []
  2940. def add_attr(title_name, scope, doc):
  2941. attributes.append(db_hlsl_attribute(title_name, scope, [], doc))
  2942. def add_attr_arg(title_name, scope, args, doc):
  2943. attributes.append(db_hlsl_attribute(title_name, scope, args, doc))
  2944. add_attr("Allow_UAV_Condition", "l", "Allows a compute shader loop termination condition to be based off of a UAV read. The loop must not contain synchronization intrinsics")
  2945. add_attr("Branch", "c", "Evaluate only one side of the if statement depending on the given condition")
  2946. add_attr("Call", "s", "The bodies of the individual cases in the switch will be moved into hardware subroutines and the switch will be a series of subroutine calls")
  2947. add_attr("EarlyDepthStencil", "f", "Forces depth-stencil testing before a shader executes")
  2948. add_attr("FastOpt", "l", "Reduces the compile time but produces less aggressive optimizations")
  2949. add_attr("Flatten", "c", "Evaluate both sides of the if statement and choose between the two resulting values")
  2950. add_attr("ForceCase", "s", "Force a switch statement in the hardware")
  2951. add_attr("Loop", "l", "Generate code that uses flow control to execute each iteration of the loop")
  2952. add_attr_arg("ClipPlanes", "f", "Optional list of clip planes", [{"name":"ClipPlane", "type":"int", "count":6}])
  2953. add_attr_arg("Domain", "f", "Defines the patch type used in the HS", [{"name":"DomainType", type:"string"}])
  2954. add_attr_arg("Instance", "f", "Use this attribute to instance a geometry shader", [{"name":"Count", "type":"int"}])
  2955. add_attr_arg("MaxTessFactor", "f", "Indicates the maximum value that the hull shader would return for any tessellation factor.", [{"name":"Count", "type":"int"}])
  2956. add_attr_arg("MaxVertexCount", "f", "maxvertexcount doc", [{"name":"Count", "type":"int"}])
  2957. add_attr_arg("NumThreads", "f", "Defines the number of threads to be executed in a single thread group.", [{"name":"x", "type":"int"},{"name":"z", "type":"int"},{"name":"y", "type":"int"}])
  2958. add_attr_arg("OutputControlPoints", "f", "Defines the number of output control points per thread that will be created in the hull shader", [{"name":"Count", "type":"int"}])
  2959. add_attr_arg("OutputTopology", "f", "Defines the output primitive type for the tessellator", [{"name":"Topology", "type":"string"}])
  2960. add_attr_arg("Partitioning", "f", "Defines the tesselation scheme to be used in the hull shader", [{"name":"Scheme", "type":"scheme"}])
  2961. add_attr_arg("PatchConstantFunc", "f", "Defines the function for computing patch constant data", [{"name":"FunctionName", "type":"string"}])
  2962. add_attr_arg("RootSignature", "f", "RootSignature doc", [{"name":"SignatureName", "type":"string"}])
  2963. add_attr_arg("Unroll", "l", "Unroll the loop until it stops executing or a max count", [{"name":"Count", "type":"int"}])
  2964. self.attributes = attributes
  2965. if __name__ == "__main__":
  2966. db = db_dxil()
  2967. print(db)
  2968. db.print_stats()