DX12Driver.hx 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022
  1. package h3d.impl;
  2. #if (hldx && dx12)
  3. import h3d.impl.Driver;
  4. import dx.Dx12;
  5. import haxe.Int64;
  6. import h3d.mat.Pass;
  7. import h3d.mat.Stencil;
  8. private typedef Driver = Dx12;
  9. class TempBuffer {
  10. public var next : TempBuffer;
  11. public var buffer : GpuResource;
  12. public var size : Int;
  13. public var lastUse : Int;
  14. public function new() {
  15. }
  16. public inline function count() {
  17. var b = this;
  18. var k = 0;
  19. while( b != null ) {
  20. k++;
  21. b = b.next;
  22. }
  23. return k;
  24. }
  25. }
  26. class ManagedHeapArray {
  27. var heaps : Array<ManagedHeap>;
  28. var type : DescriptorHeapType;
  29. var size : Int;
  30. var cursor : Int;
  31. public function new(type,size) {
  32. this.type = type;
  33. this.size = size;
  34. heaps = [];
  35. }
  36. public function reset() {
  37. cursor = 0;
  38. }
  39. public function next() {
  40. var h = heaps[cursor++];
  41. if( h == null ) {
  42. h = new ManagedHeap(type, size);
  43. heaps.push(h);
  44. } else
  45. h.clear();
  46. return h;
  47. }
  48. }
  49. class DxFrame {
  50. public var backBuffer : ResourceData;
  51. public var depthBuffer : GpuResource;
  52. public var allocator : CommandAllocator;
  53. public var commandList : CommandList;
  54. public var fenceValue : Int64;
  55. public var toRelease : Array<Resource> = [];
  56. public var tmpBufToNullify : Array<Texture> = [];
  57. public var tmpBufToRelease : Array<dx.Dx12.GpuResource> = [];
  58. public var shaderResourceViews : ManagedHeap;
  59. public var samplerViews : ManagedHeap;
  60. public var shaderResourceCache : ManagedHeapArray;
  61. public var samplerCache : ManagedHeapArray;
  62. public var availableBuffers : TempBuffer;
  63. public var usedBuffers : TempBuffer;
  64. public var queryHeaps : Array<QueryHeap> = [];
  65. public var queriesPending : Array<Query> = [];
  66. public var queryCurrentHeap : Int;
  67. public var queryHeapOffset : Int;
  68. public var queryBuffer : GpuResource;
  69. public function new() {
  70. }
  71. }
  72. class CachedPipeline {
  73. public var bytes : hl.Bytes;
  74. public var size : Int;
  75. public var pipeline : GraphicsPipelineState;
  76. public function new() {
  77. }
  78. }
  79. class ShaderRegisters {
  80. public var globals : Int;
  81. public var params : Int;
  82. public var buffers : Int;
  83. public var textures : Int;
  84. public var samplers : Int;
  85. public var texturesCount : Int;
  86. public var textures2DCount : Int;
  87. public function new() {
  88. }
  89. }
  90. class CompiledShader {
  91. public var vertexRegisters : ShaderRegisters;
  92. public var fragmentRegisters : ShaderRegisters;
  93. public var format : hxd.BufferFormat;
  94. public var pipeline : GraphicsPipelineStateDesc;
  95. public var pipelines : Map<Int,hl.NativeArray<CachedPipeline>> = new Map();
  96. public var rootSignature : RootSignature;
  97. public var inputLayout : hl.CArray<InputElementDesc>;
  98. public var inputCount : Int;
  99. public var shader : hxsl.RuntimeShader;
  100. public function new() {
  101. }
  102. }
  103. @:struct class TempObjects {
  104. public var renderTargets : hl.BytesAccess<Address>;
  105. public var depthStencils : hl.BytesAccess<Address>;
  106. public var vertexViews : hl.CArray<VertexBufferView>;
  107. public var descriptors2 : hl.NativeArray<DescriptorHeap>;
  108. @:packed public var heap(default,null) : HeapProperties;
  109. @:packed public var barrier(default,null) : ResourceBarrier;
  110. @:packed public var clearColor(default,null) : ClearColor;
  111. @:packed public var clearValue(default,null) : ClearValue;
  112. @:packed public var viewport(default,null) : Viewport;
  113. @:packed public var rect(default,null) : Rect;
  114. @:packed public var tex2DSRV(default,null) : Tex2DSRV;
  115. @:packed public var texCubeSRV(default,null) : TexCubeSRV;
  116. @:packed public var tex2DArraySRV(default,null) : Tex2DArraySRV;
  117. @:packed public var bufferSRV(default,null) : BufferSRV;
  118. @:packed public var samplerDesc(default,null) : SamplerDesc;
  119. @:packed public var cbvDesc(default,null) : ConstantBufferViewDesc;
  120. @:packed public var rtvDesc(default,null) : RenderTargetViewDesc;
  121. public var pass : h3d.mat.Pass;
  122. public function new() {
  123. renderTargets = new hl.Bytes(8 * 8);
  124. depthStencils = new hl.Bytes(8);
  125. vertexViews = hl.CArray.alloc(VertexBufferView, 16);
  126. pass = new h3d.mat.Pass("default");
  127. pass.stencil = new h3d.mat.Stencil();
  128. tex2DSRV.dimension = TEXTURE2D;
  129. texCubeSRV.dimension = TEXTURECUBE;
  130. tex2DArraySRV.dimension = TEXTURE2DARRAY;
  131. tex2DSRV.mipLevels = texCubeSRV.mipLevels = tex2DArraySRV.mipLevels = -1;
  132. tex2DSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  133. texCubeSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  134. tex2DArraySRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  135. bufferSRV.dimension = BUFFER;
  136. bufferSRV.flags = RAW;
  137. bufferSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  138. samplerDesc.comparisonFunc = NEVER;
  139. samplerDesc.maxLod = 1e30;
  140. descriptors2 = new hl.NativeArray(2);
  141. barrier.subResource = -1; // all
  142. }
  143. }
  144. class ManagedHeap {
  145. public var stride(default,null) : Int;
  146. var size : Int;
  147. var start : Int;
  148. var cursor : Int;
  149. var limit : Int;
  150. var type : DescriptorHeapType;
  151. var heap : DescriptorHeap;
  152. var address : Address;
  153. var cpuToGpu : Int64;
  154. public var available(get,never) : Int;
  155. public function new(type,size=8) {
  156. this.type = type;
  157. this.stride = Driver.getDescriptorHandleIncrementSize(type);
  158. allocHeap(size);
  159. }
  160. function allocHeap( size : Int ) {
  161. var desc = new DescriptorHeapDesc();
  162. desc.type = type;
  163. desc.numDescriptors = size;
  164. if( type == CBV_SRV_UAV || type == SAMPLER )
  165. desc.flags = SHADER_VISIBLE;
  166. heap = new DescriptorHeap(desc);
  167. limit = cursor = start = 0;
  168. this.size = size;
  169. address = heap.getHandle(false);
  170. cpuToGpu = heap.getHandle(true).value - address.value;
  171. }
  172. public dynamic function onFree( prev : DescriptorHeap ) {
  173. throw "Too many buffers";
  174. }
  175. public function alloc( count : Int ) {
  176. if( cursor >= limit && cursor + count > size ) {
  177. cursor = 0;
  178. if( limit == 0 ) {
  179. var prev = heap;
  180. allocHeap((size * 3) >> 1);
  181. onFree(prev);
  182. }
  183. }
  184. if( cursor < limit && cursor + count >= limit ) {
  185. var prev = heap;
  186. allocHeap((size * 3) >> 1);
  187. onFree(prev);
  188. }
  189. var pos = cursor;
  190. cursor += count;
  191. return address.offset(pos * stride);
  192. }
  193. inline function get_available() {
  194. var d = limit - cursor;
  195. return d <= 0 ? size + d : d;
  196. }
  197. public inline function grow( onFree ) {
  198. var prev = heap;
  199. allocHeap((size*3)>>1);
  200. onFree(prev);
  201. return heap;
  202. }
  203. public function clear() {
  204. limit = cursor = start = 0;
  205. }
  206. public function next() {
  207. limit = start;
  208. start = cursor;
  209. }
  210. public inline function toGPU( address : Address ) : Address {
  211. return new Address(address.value + cpuToGpu);
  212. }
  213. }
  214. class ResourceData {
  215. public var res : GpuResource;
  216. public var state : ResourceState;
  217. public function new() {
  218. }
  219. }
  220. class BufferData extends ResourceData {
  221. public var uploaded : Bool;
  222. }
  223. class IndexBufferData extends BufferData {
  224. public var view : IndexBufferView;
  225. public var count : Int;
  226. public var bits : Int;
  227. }
  228. class VertexBufferData extends BufferData {
  229. public var view : dx.Dx12.VertexBufferView;
  230. public var size : Int;
  231. }
  232. class TextureData extends ResourceData {
  233. public var format : DxgiFormat;
  234. public var color : h3d.Vector;
  235. public var tmpBuf : dx.Dx12.GpuResource;
  236. var clearColorChanges : Int;
  237. public function setClearColor( c : h3d.Vector ) {
  238. var color = color;
  239. if( clearColorChanges > 10 || (color.r == c.r && color.g == c.g && color.b == c.b && color.a == c.a) )
  240. return false;
  241. clearColorChanges++;
  242. color.load(c);
  243. return true;
  244. }
  245. }
  246. class QueryData {
  247. public var heap : Int;
  248. public var offset : Int;
  249. public var result : Float;
  250. public function new() {
  251. }
  252. }
  253. class DX12Driver extends h3d.impl.Driver {
  254. static inline var PSIGN_MATID = 0;
  255. static inline var PSIGN_COLOR_MASK = PSIGN_MATID + 4;
  256. static inline var PSIGN_UNUSED = PSIGN_COLOR_MASK + 1;
  257. static inline var PSIGN_STENCIL_MASK = PSIGN_UNUSED + 1;
  258. static inline var PSIGN_STENCIL_OPS = PSIGN_STENCIL_MASK + 2;
  259. static inline var PSIGN_RENDER_TARGETS = PSIGN_STENCIL_OPS + 4;
  260. static inline var PSIGN_LAYOUT = PSIGN_RENDER_TARGETS + 8;
  261. var pipelineSignature = new hl.Bytes(64);
  262. var adlerOut = new hl.Bytes(4);
  263. var driver : DriverInstance;
  264. var hasDeviceError = false;
  265. var window : dx.Window;
  266. var onContextLost : Void -> Void;
  267. var frames : Array<DxFrame>;
  268. var frame : DxFrame;
  269. var fence : Fence;
  270. var fenceEvent : WaitEvent;
  271. var renderTargetViews : ManagedHeap;
  272. var depthStenciViews : ManagedHeap;
  273. var indirectCommand : CommandSignature;
  274. var currentFrame : Int;
  275. var fenceValue : Int64 = 0;
  276. var needPipelineFlush = false;
  277. var currentPass : h3d.mat.Pass;
  278. var currentWidth : Int;
  279. var currentHeight : Int;
  280. var currentShader : CompiledShader;
  281. var compiledShaders : Map<Int,CompiledShader> = new Map();
  282. var compiler : ShaderCompiler;
  283. var currentIndex : IndexBuffer;
  284. var tmp : TempObjects;
  285. var currentRenderTargets : Array<h3d.mat.Texture> = [];
  286. var defaultDepth : h3d.mat.Texture;
  287. var depthEnabled = true;
  288. var curStencilRef : Int = -1;
  289. var rtWidth : Int;
  290. var rtHeight : Int;
  291. var frameCount : Int;
  292. var tsFreq : haxe.Int64;
  293. public static var INITIAL_RT_COUNT = 1024;
  294. public static var BUFFER_COUNT = 2;
  295. public static var DEVICE_NAME = null;
  296. public static var DEBUG = false;
  297. public function new() {
  298. window = @:privateAccess dx.Window.windows[0];
  299. reset();
  300. }
  301. override function hasFeature(f:Feature) {
  302. return switch(f) {
  303. case Queries, BottomLeftCoords:
  304. false;
  305. default:
  306. true;
  307. };
  308. }
  309. override function isSupportedFormat(fmt:h3d.mat.Data.TextureFormat):Bool {
  310. return true;
  311. }
  312. function reset() {
  313. var flags = new DriverInitFlags();
  314. if( DEBUG ) flags.set(DriverInitFlag.DEBUG);
  315. driver = Driver.create(window, flags, DEVICE_NAME);
  316. frames = [];
  317. for(i in 0...BUFFER_COUNT) {
  318. var f = new DxFrame();
  319. f.backBuffer = new ResourceData();
  320. f.allocator = new CommandAllocator(DIRECT);
  321. f.commandList = new CommandList(DIRECT, f.allocator, null);
  322. f.commandList.close();
  323. f.shaderResourceCache = new ManagedHeapArray(CBV_SRV_UAV, 1024);
  324. f.samplerCache = new ManagedHeapArray(SAMPLER, 1024);
  325. frames.push(f);
  326. }
  327. fence = new Fence(0, NONE);
  328. fenceEvent = new WaitEvent(false);
  329. tmp = new TempObjects();
  330. renderTargetViews = new ManagedHeap(RTV, INITIAL_RT_COUNT);
  331. depthStenciViews = new ManagedHeap(DSV, INITIAL_RT_COUNT);
  332. renderTargetViews.onFree = function(prev) frame.toRelease.push(prev);
  333. depthStenciViews.onFree = function(prev) frame.toRelease.push(prev);
  334. defaultDepth = new h3d.mat.Texture(0,0, Depth24Stencil8);
  335. defaultDepth.t = new TextureData();
  336. defaultDepth.t.state = DEPTH_WRITE;
  337. defaultDepth.name = "defaultDepth";
  338. var desc = new CommandSignatureDesc();
  339. desc.byteStride = 5 * 4;
  340. desc.numArgumentDescs = 1;
  341. desc.argumentDescs = new IndirectArgumentDesc();
  342. desc.argumentDescs.type = DRAW_INDEXED;
  343. indirectCommand = Driver.createCommandSignature(desc,null);
  344. tsFreq = Driver.getTimestampFrequency();
  345. compiler = new ShaderCompiler();
  346. resize(window.width, window.height);
  347. }
  348. function beginFrame() {
  349. frameCount = hxd.Timer.frameCount;
  350. currentFrame = Driver.getCurrentBackBufferIndex();
  351. var prevFrame = frame;
  352. frame = frames[currentFrame];
  353. defaultDepth.t.res = frame.depthBuffer;
  354. frame.allocator.reset();
  355. frame.commandList.reset(frame.allocator, null);
  356. while( frame.toRelease.length > 0 )
  357. frame.toRelease.pop().release();
  358. while( frame.tmpBufToRelease.length > 0 ) {
  359. var tmpBuf = frame.tmpBufToRelease.pop();
  360. if ( tmpBuf != null )
  361. tmpBuf.release();
  362. }
  363. if ( prevFrame != null ) {
  364. while ( prevFrame.tmpBufToNullify.length > 0 ) {
  365. var t = prevFrame.tmpBufToNullify.pop();
  366. frame.tmpBufToRelease.push(t.tmpBuf);
  367. t.tmpBuf = null;
  368. }
  369. }
  370. beginQueries();
  371. var used = frame.usedBuffers;
  372. var b = frame.availableBuffers;
  373. var prev = null;
  374. while( b != null ) {
  375. if( b.lastUse < frameCount - 120 ) {
  376. b.buffer.release();
  377. b = b.next;
  378. } else {
  379. var n = b.next;
  380. b.next = used;
  381. used = b;
  382. b = n;
  383. }
  384. }
  385. frame.availableBuffers = used;
  386. frame.usedBuffers = null;
  387. transition(frame.backBuffer, RENDER_TARGET);
  388. frame.commandList.iaSetPrimitiveTopology(TRIANGLELIST);
  389. renderTargetViews.next();
  390. depthStenciViews.next();
  391. curStencilRef = -1;
  392. currentIndex = null;
  393. setRenderTarget(null);
  394. frame.shaderResourceCache.reset();
  395. frame.samplerCache.reset();
  396. frame.shaderResourceViews = frame.shaderResourceCache.next();
  397. frame.samplerViews = frame.samplerCache.next();
  398. var arr = tmp.descriptors2;
  399. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  400. arr[1] = @:privateAccess frame.samplerViews.heap;
  401. frame.commandList.setDescriptorHeaps(arr);
  402. }
  403. override function clear(?color:Vector, ?depth:Float, ?stencil:Int) {
  404. if( color != null ) {
  405. var clear = tmp.clearColor;
  406. clear.r = color.r;
  407. clear.g = color.g;
  408. clear.b = color.b;
  409. clear.a = color.a;
  410. var count = currentRenderTargets.length;
  411. for( i in 0...count ) {
  412. var tex = currentRenderTargets[i];
  413. if( tex != null && tex.t.setClearColor(color) ) {
  414. // update texture to use another clear value
  415. var prev = tex.t;
  416. tex.t = allocTexture(tex);
  417. @:privateAccess tex.t.clearColorChanges = prev.clearColorChanges;
  418. frame.toRelease.push(prev.res);
  419. Driver.createRenderTargetView(tex.t.res, null, tmp.renderTargets[i]);
  420. }
  421. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  422. }
  423. }
  424. if( depth != null || stencil != null )
  425. frame.commandList.clearDepthStencilView(tmp.depthStencils[0], depth != null ? (stencil != null ? BOTH : DEPTH) : STENCIL, (depth:Float), stencil);
  426. }
  427. function waitGpu() {
  428. Driver.signal(fence, fenceValue);
  429. fence.setEvent(fenceValue, fenceEvent);
  430. fenceEvent.wait(-1);
  431. fenceValue++;
  432. }
  433. override function resize(width:Int, height:Int) {
  434. if( currentWidth == width && currentHeight == height )
  435. return;
  436. currentWidth = rtWidth = width;
  437. currentHeight = rtHeight = height;
  438. @:privateAccess defaultDepth.width = width;
  439. @:privateAccess defaultDepth.height = height;
  440. if( frame != null )
  441. flushFrame(true);
  442. waitGpu();
  443. for( f in frames ) {
  444. if( f.backBuffer.res != null )
  445. f.backBuffer.res.release();
  446. if( f.depthBuffer != null )
  447. f.depthBuffer.release();
  448. }
  449. Driver.resize(width, height, BUFFER_COUNT, R8G8B8A8_UNORM);
  450. renderTargetViews.clear();
  451. depthStenciViews.clear();
  452. for( i => f in frames ) {
  453. f.backBuffer.res = Driver.getBackBuffer(i);
  454. f.backBuffer.res.setName("Backbuffer#"+i);
  455. f.backBuffer.state = PRESENT;
  456. var desc = new ResourceDesc();
  457. var flags = new haxe.EnumFlags();
  458. desc.dimension = TEXTURE2D;
  459. desc.width = width;
  460. desc.height = height;
  461. desc.depthOrArraySize = 1;
  462. desc.mipLevels = 1;
  463. desc.sampleDesc.count = 1;
  464. desc.format = D24_UNORM_S8_UINT;
  465. desc.flags.set(ALLOW_DEPTH_STENCIL);
  466. tmp.heap.type = DEFAULT;
  467. tmp.clearValue.format = desc.format;
  468. tmp.clearValue.depth = 1;
  469. tmp.clearValue.stencil= 0;
  470. f.depthBuffer = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  471. f.depthBuffer.setName("Depthbuffer#"+i);
  472. }
  473. beginFrame();
  474. }
  475. override function begin(frame:Int) {
  476. }
  477. override function isDisposed() {
  478. return hasDeviceError;
  479. }
  480. override function init( onCreate : Bool -> Void, forceSoftware = false ) {
  481. onContextLost = onCreate.bind(true);
  482. haxe.Timer.delay(onCreate.bind(false), 1);
  483. }
  484. override function getDriverName(details:Bool) {
  485. var desc = "DX12";
  486. if( details ) desc += " "+Driver.getDeviceName();
  487. return desc;
  488. }
  489. public function forceDeviceError() {
  490. hasDeviceError = true;
  491. }
  492. function transition( res : ResourceData, to : ResourceState ) {
  493. if( res.state == to )
  494. return;
  495. var b = tmp.barrier;
  496. b.resource = res.res;
  497. b.stateBefore = res.state;
  498. b.stateAfter = to;
  499. frame.commandList.resourceBarrier(b);
  500. res.state = to;
  501. }
  502. function getRTBits( tex : h3d.mat.Texture ) {
  503. inline function mk(channels,format) {
  504. return ((channels - 1) << 2) | (format + 1);
  505. }
  506. return switch( tex.format ) {
  507. case RGBA: mk(4,0);
  508. case R8: mk(1, 0);
  509. case RG8: mk(2, 0);
  510. case RGB8: mk(3, 0);
  511. case R16F: mk(1,1);
  512. case RG16F: mk(2,1);
  513. case RGB16F: mk(3,1);
  514. case RGBA16F: mk(4,1);
  515. case R32F: mk(1,2);
  516. case RG32F: mk(2,2);
  517. case RGB32F: mk(3,2);
  518. case RGBA32F: mk(4,2);
  519. default: throw "Unsupported RT format "+tex.format;
  520. }
  521. }
  522. function getDepthViewFromTexture( tex : h3d.mat.Texture, readOnly : Bool ) {
  523. if ( tex != null && tex.depthBuffer == null ) {
  524. depthEnabled = false;
  525. return null;
  526. }
  527. if ( tex != null ) {
  528. var w = tex.depthBuffer.width;
  529. var h = tex.depthBuffer.height;
  530. if( w != tex.width || h != tex.height )
  531. throw "Depth size mismatch";
  532. }
  533. return getDepthView(tex == null ? null : tex.depthBuffer, readOnly);
  534. }
  535. function getDepthView( depthBuffer : h3d.mat.Texture, readOnly : Bool ) {
  536. var res = depthBuffer == null ? frame.depthBuffer : depthBuffer.t.res;
  537. var depthView = depthStenciViews.alloc(1);
  538. var viewDesc = new DepthStencilViewDesc();
  539. viewDesc.arraySize = 1;
  540. viewDesc.mipSlice = 0;
  541. viewDesc.firstArraySlice = 0;
  542. viewDesc.format = D24_UNORM_S8_UINT;
  543. viewDesc.viewDimension = TEXTURE2D;
  544. if ( readOnly ) {
  545. viewDesc.flags.set(READ_ONLY_DEPTH);
  546. viewDesc.flags.set(READ_ONLY_STENCIL);
  547. }
  548. Driver.createDepthStencilView(res, viewDesc, depthView);
  549. var depths = tmp.depthStencils;
  550. depths[0] = depthView;
  551. depthEnabled = true;
  552. if ( depthBuffer != null )
  553. transition(depthBuffer.t, readOnly ? DEPTH_READ : DEPTH_WRITE);
  554. return depths;
  555. }
  556. override function getDefaultDepthBuffer():h3d.mat.Texture {
  557. return defaultDepth;
  558. }
  559. function initViewport(w,h) {
  560. rtWidth = w;
  561. rtHeight = h;
  562. tmp.viewport.width = w;
  563. tmp.viewport.height = h;
  564. tmp.viewport.maxDepth = 1;
  565. tmp.rect.top = 0;
  566. tmp.rect.left = 0;
  567. tmp.rect.right = w;
  568. tmp.rect.bottom = h;
  569. frame.commandList.rsSetScissorRects(1, tmp.rect);
  570. frame.commandList.rsSetViewports(1, tmp.viewport);
  571. }
  572. override function setRenderTarget(tex:Null<h3d.mat.Texture>, layer:Int = 0, mipLevel:Int = 0, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  573. if( tex != null ) {
  574. if( tex.t == null ) tex.alloc();
  575. transition(tex.t, RENDER_TARGET);
  576. }
  577. depthEnabled = depthBinding != NotBound;
  578. var texView = renderTargetViews.alloc(1);
  579. var isArr = tex != null && (tex.flags.has(IsArray) || tex.flags.has(Cube));
  580. var desc = null;
  581. if( layer != 0 || mipLevel != 0 || isArr ) {
  582. desc = tmp.rtvDesc;
  583. desc.format = tex.t.format;
  584. if( isArr ) {
  585. desc.viewDimension = TEXTURE2DARRAY;
  586. desc.mipSlice = mipLevel;
  587. desc.firstArraySlice = layer;
  588. desc.arraySize = 1;
  589. desc.planeSlice = 0;
  590. } else {
  591. desc.viewDimension = TEXTURE2D;
  592. desc.mipSlice = mipLevel;
  593. desc.planeSlice = 0;
  594. }
  595. }
  596. Driver.createRenderTargetView(tex == null ? frame.backBuffer.res : tex.t.res, desc, texView);
  597. tmp.renderTargets[0] = texView;
  598. if ( tex != null && !tex.flags.has(WasCleared) ) {
  599. tex.flags.set(WasCleared);
  600. var clear = tmp.clearColor;
  601. clear.r = 0;
  602. clear.g = 0;
  603. clear.b = 0;
  604. clear.a = 0;
  605. frame.commandList.clearRenderTargetView(tmp.renderTargets[0], clear);
  606. }
  607. frame.commandList.omSetRenderTargets(1, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(tex, depthBinding == ReadOnly ) : null);
  608. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  609. if( tex != null ) currentRenderTargets.push(tex);
  610. var w = tex == null ? currentWidth : tex.width >> mipLevel;
  611. var h = tex == null ? currentHeight : tex.height >> mipLevel;
  612. if( w == 0 ) w = 1;
  613. if( h == 0 ) h = 1;
  614. initViewport(w, h);
  615. pipelineSignature.setI32(PSIGN_RENDER_TARGETS, tex == null ? 0 : getRTBits(tex) | (depthEnabled ? 0x80000000 : 0));
  616. needPipelineFlush = true;
  617. }
  618. override function setRenderTargets(textures:Array<h3d.mat.Texture>, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  619. while( currentRenderTargets.length > textures.length )
  620. currentRenderTargets.pop();
  621. depthEnabled = depthBinding != NotBound;
  622. var t0 = textures[0];
  623. var texViews = renderTargetViews.alloc(textures.length);
  624. var bits = 0;
  625. for( i => t in textures ) {
  626. if ( t.t == null ) {
  627. t.alloc();
  628. if ( hasDeviceError ) return;
  629. }
  630. var view = texViews.offset(renderTargetViews.stride * i);
  631. Driver.createRenderTargetView(t.t.res, null, view);
  632. tmp.renderTargets[i] = view;
  633. currentRenderTargets[i] = t;
  634. bits |= getRTBits(t) << (i << 2);
  635. if ( !t.flags.has(WasCleared) ) {
  636. t.flags.set(WasCleared);
  637. var clear = tmp.clearColor;
  638. clear.r = 0;
  639. clear.g = 0;
  640. clear.b = 0;
  641. clear.a = 0;
  642. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  643. }
  644. transition(t.t, RENDER_TARGET);
  645. }
  646. frame.commandList.omSetRenderTargets(textures.length, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(t0, depthBinding == ReadOnly) : null);
  647. initViewport(t0.width, t0.height);
  648. pipelineSignature.setI32(PSIGN_RENDER_TARGETS, bits | (depthEnabled ? 0x80000000 : 0));
  649. needPipelineFlush = true;
  650. }
  651. override function setDepth(depthBuffer : h3d.mat.Texture) {
  652. var view = getDepthView(depthBuffer, false);
  653. depthEnabled = true;
  654. frame.commandList.omSetRenderTargets(0, null, true, view);
  655. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  656. initViewport(depthBuffer.width, depthBuffer.height);
  657. pipelineSignature.setI32(PSIGN_RENDER_TARGETS, 0x80000000);
  658. needPipelineFlush = true;
  659. }
  660. override function setRenderZone(x:Int, y:Int, width:Int, height:Int) {
  661. if( width < 0 && height < 0 && x == 0 && y == 0 ) {
  662. tmp.rect.left = 0;
  663. tmp.rect.top = 0;
  664. tmp.rect.right = rtWidth;
  665. tmp.rect.bottom = rtHeight;
  666. frame.commandList.rsSetScissorRects(1, tmp.rect);
  667. } else {
  668. tmp.rect.left = x;
  669. tmp.rect.top = y;
  670. tmp.rect.right = x + width;
  671. tmp.rect.bottom = y + height;
  672. frame.commandList.rsSetScissorRects(1, tmp.rect);
  673. }
  674. }
  675. override function captureRenderBuffer( pixels : hxd.Pixels ) {
  676. var rt = currentRenderTargets[0];
  677. if( rt == null )
  678. throw "Can't capture main render buffer in DirectX";
  679. captureTexPixels(pixels, rt, 0, 0);
  680. }
  681. override function capturePixels(tex:h3d.mat.Texture, layer:Int, mipLevel:Int, ?region:h2d.col.IBounds):hxd.Pixels {
  682. var pixels : hxd.Pixels;
  683. if (region != null) {
  684. if (region.xMax > tex.width) region.xMax = tex.width;
  685. if (region.yMax > tex.height) region.yMax = tex.height;
  686. if (region.xMin < 0) region.xMin = 0;
  687. if (region.yMin < 0) region.yMin = 0;
  688. var w = region.width >> mipLevel;
  689. var h = region.height >> mipLevel;
  690. if( w == 0 ) w = 1;
  691. if( h == 0 ) h = 1;
  692. pixels = hxd.Pixels.alloc(w, h, tex.format);
  693. captureTexPixels(pixels, tex, layer, mipLevel, region.xMin, region.yMin);
  694. } else {
  695. var w = tex.width >> mipLevel;
  696. var h = tex.height >> mipLevel;
  697. if( w == 0 ) w = 1;
  698. if( h == 0 ) h = 1;
  699. pixels = hxd.Pixels.alloc(w, h, tex.format);
  700. captureTexPixels(pixels, tex, layer, mipLevel);
  701. }
  702. return pixels;
  703. }
  704. function captureTexPixels( pixels: hxd.Pixels, tex:h3d.mat.Texture, layer:Int, mipLevel:Int, x : Int = 0, y : Int = 0) {
  705. if( pixels.width == 0 || pixels.height == 0 )
  706. return;
  707. var totalSize : hl.BytesAccess<Int64> = new hl.Bytes(8);
  708. var src = new TextureCopyLocation();
  709. src.res = tex.t.res;
  710. src.subResourceIndex = mipLevel + layer * tex.mipLevels;
  711. var srcDesc = makeTextureDesc(tex);
  712. var dst = new TextureCopyLocation();
  713. dst.type = PLACED_FOOTPRINT;
  714. Driver.getCopyableFootprints(srcDesc, src.subResourceIndex, 1, 0, dst.placedFootprint, null, null, totalSize);
  715. var desc = new ResourceDesc();
  716. var flags = new haxe.EnumFlags();
  717. desc.dimension = BUFFER;
  718. desc.width = totalSize[0];
  719. desc.height = 1;
  720. desc.depthOrArraySize = 1;
  721. desc.mipLevels = 1;
  722. desc.sampleDesc.count = 1;
  723. desc.layout = ROW_MAJOR;
  724. tmp.heap.type = READBACK;
  725. var tmpBuf = Driver.createCommittedResource(tmp.heap, flags, desc, COPY_DEST, null);
  726. var box = new Box();
  727. box.left = x;
  728. box.right = pixels.width;
  729. box.top = y;
  730. box.bottom = pixels.height;
  731. box.back = 1;
  732. transition(tex.t, COPY_SOURCE);
  733. dst.res = tmpBuf;
  734. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, box);
  735. flushFrame();
  736. waitGpu();
  737. var output = tmpBuf.map(0, null);
  738. var stride = hxd.Pixels.calcStride(pixels.width, tex.format);
  739. var rowStride = dst.placedFootprint.footprint.rowPitch;
  740. if( rowStride == stride )
  741. (pixels.bytes:hl.Bytes).blit(pixels.offset, output, 0, stride * pixels.height);
  742. else {
  743. for( i in 0...pixels.height )
  744. (pixels.bytes:hl.Bytes).blit(pixels.offset + i * stride, output, i * rowStride, stride);
  745. }
  746. tmpBuf.unmap(0,null);
  747. tmpBuf.release();
  748. beginFrame();
  749. }
  750. // ---- SHADERS -----
  751. static var VERTEX_FORMATS = [null,null,R32G32_FLOAT,R32G32B32_FLOAT,R32G32B32A32_FLOAT];
  752. function getBinaryPayload( vertex : Bool, code : String ) {
  753. var bin = code.indexOf("//BIN=");
  754. if( bin >= 0 ) {
  755. var end = code.indexOf("#", bin);
  756. if( end >= 0 )
  757. return haxe.crypto.Base64.decode(code.substr(bin + 6, end - bin - 6));
  758. }
  759. if( shaderCache != null )
  760. return shaderCache.resolveShaderBinary(code);
  761. return null;
  762. }
  763. function compileSource( sh : hxsl.RuntimeShader.RuntimeShaderData, profile, baseRegister, rootStr = "" ) {
  764. var args = [];
  765. var out = new hxsl.HlslOut();
  766. out.baseRegister = baseRegister;
  767. if ( sh.code == null ) {
  768. sh.code = out.run(sh.data);
  769. sh.code = rootStr + sh.code;
  770. }
  771. var bytes = getBinaryPayload(sh.vertex, sh.code);
  772. if ( bytes == null ) {
  773. return compiler.compile(sh.code, profile, args);
  774. }
  775. return bytes;
  776. }
  777. override function getNativeShaderCode( shader : hxsl.RuntimeShader ) {
  778. var out = new hxsl.HlslOut();
  779. var vsSource = out.run(shader.vertex.data);
  780. var out = new hxsl.HlslOut();
  781. var psSource = out.run(shader.fragment.data);
  782. return vsSource+"\n\n\n\n"+psSource;
  783. }
  784. function stringifyRootSignature( sign : RootSignatureDesc, name : String, params : hl.CArray<RootParameterConstants> ) : String {
  785. var s = '#define ${name} "RootFlags(';
  786. if ( sign.flags.toInt() == 0 )
  787. s += '0'; // no flags
  788. else {
  789. // RootFlags
  790. for ( f in haxe.EnumTools.getConstructors(RootSignatureFlag) ) {
  791. if ( !sign.flags.has(haxe.EnumTools.createByName(RootSignatureFlag, f)) )
  792. continue;
  793. s += Std.string(f) + '|';
  794. }
  795. s = s.substr(0, s.length - 1);
  796. }
  797. s += ')",';
  798. for ( param in params ) {
  799. var vis = 'SHADER_VISIBILITY_${param.shaderVisibility == VERTEX ? "VERTEX" : "PIXEL"}';
  800. if ( param.parameterType == CONSTANTS ) {
  801. var shaderRegister = param.shaderRegister;
  802. s += 'RootConstants(num32BitConstants=${param.num32BitValues},b${shaderRegister}, visibility=${vis}),';
  803. } else {
  804. try {
  805. var p = unsafeCastTo(param, RootParameterDescriptorTable);
  806. if ( p == null ) continue;
  807. var descRange = p.descriptorRanges;
  808. if ( descRange == null ) continue;
  809. var baseShaderRegister = descRange.baseShaderRegister;
  810. switch ( descRange.rangeType) {
  811. case CBV:
  812. s += 'DescriptorTable(CBV(b${baseShaderRegister}), visibility = ${vis}),';
  813. case SRV:
  814. s += 'DescriptorTable(SRV(t${baseShaderRegister},numDescriptors = ${descRange.numDescriptors}), visibility = ${vis}),';
  815. case SAMPLER:
  816. var baseShaderRegister = descRange.baseShaderRegister;
  817. s += 'DescriptorTable(Sampler(s${baseShaderRegister}, space=${descRange.registerSpace}, numDescriptors = ${descRange.numDescriptors}), visibility = ${vis}),';
  818. case UAV:
  819. throw "Not supported";
  820. }
  821. } catch ( e : Dynamic ) {
  822. continue;
  823. }
  824. }
  825. }
  826. s += '\n';
  827. return s;
  828. }
  829. inline function unsafeCastTo<T,R>( v : T, c : Class<R> ) : R {
  830. var arr = new hl.NativeArray<T>(1);
  831. arr[0] = v;
  832. return (cast arr : hl.NativeArray<R>)[0];
  833. }
  834. function computeRootSignature( shader : hxsl.RuntimeShader ) {
  835. var params = hl.CArray.alloc(RootParameterConstants,16);
  836. var paramsCount = 0, regCount = 0;
  837. var texDescs = [];
  838. var vertexParamsCBV = false;
  839. var fragmentParamsCBV = false;
  840. function allocDescTable(vis) {
  841. var p = unsafeCastTo(params[paramsCount++], RootParameterDescriptorTable);
  842. p.parameterType = DESCRIPTOR_TABLE;
  843. p.numDescriptorRanges = 1;
  844. var range = new DescriptorRange();
  845. texDescs.push(range);
  846. p.descriptorRanges = range;
  847. p.shaderVisibility = vis;
  848. return range;
  849. }
  850. function allocConsts(size,vis,useCBV) {
  851. var reg = regCount++;
  852. if( size == 0 ) return -1;
  853. if( useCBV ) {
  854. var pid = paramsCount;
  855. var r = allocDescTable(vis);
  856. r.rangeType = CBV;
  857. r.numDescriptors = 1;
  858. r.baseShaderRegister = reg;
  859. r.registerSpace = 0;
  860. return pid | 0x100;
  861. }
  862. var pid = paramsCount++;
  863. var p = params[pid];
  864. p.parameterType = CONSTANTS;
  865. p.shaderRegister = reg;
  866. p.shaderVisibility = vis;
  867. p.num32BitValues = size << 2;
  868. return pid;
  869. }
  870. function allocParams( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  871. var vis = sh.vertex ? VERTEX : PIXEL;
  872. var regs = new ShaderRegisters();
  873. regs.globals = allocConsts(sh.globalsSize, vis, false);
  874. regs.params = allocConsts(sh.paramsSize, vis, sh.vertex ? vertexParamsCBV : fragmentParamsCBV);
  875. if( sh.bufferCount > 0 ) {
  876. regs.buffers = paramsCount;
  877. for( i in 0...sh.bufferCount )
  878. allocConsts(1, vis, true);
  879. }
  880. if( sh.texturesCount > 0 ) {
  881. regs.texturesCount = sh.texturesCount;
  882. regs.textures = paramsCount;
  883. var p = sh.textures;
  884. while( p != null ) {
  885. switch( p.type ) {
  886. case TArray( TSampler2D , SConst(n) ): regs.textures2DCount = n;
  887. default:
  888. }
  889. p = p.next;
  890. }
  891. var r = allocDescTable(vis);
  892. r.rangeType = SRV;
  893. r.baseShaderRegister = 0;
  894. r.registerSpace = 0;
  895. r.numDescriptors = sh.texturesCount;
  896. regs.samplers = paramsCount;
  897. var r = allocDescTable(vis);
  898. r.rangeType = SAMPLER;
  899. r.baseShaderRegister = 0;
  900. r.registerSpace = 0;
  901. r.numDescriptors = sh.texturesCount;
  902. }
  903. return regs;
  904. }
  905. // Costs in units:
  906. // Descriptor Tables cost 1 each
  907. // Root CBVs cost 2 each
  908. // Root SRVs cost 2 each
  909. // Root UAVs cost 2 each
  910. // Root Constants cost 1 per 32-bit value
  911. function calcSize( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  912. var s = (sh.globalsSize + sh.paramsSize) << 2;
  913. s += sh.texturesCount;
  914. return s;
  915. }
  916. var totalVertex = calcSize(shader.vertex);
  917. var totalFragment = calcSize(shader.fragment);
  918. var total = totalVertex + totalFragment;
  919. if( total > 64 ) {
  920. var withoutVP = total - (shader.vertex.paramsSize << 2);
  921. var withoutFP = total - (shader.fragment.paramsSize << 2);
  922. if( total > 64 && (withoutVP < 64 || withoutFP > 64) ) {
  923. vertexParamsCBV = true;
  924. total -= (shader.vertex.paramsSize << 2);
  925. }
  926. if( total > 64 ) {
  927. fragmentParamsCBV = true;
  928. total -= (shader.fragment.paramsSize << 2);
  929. }
  930. if( total > 64 )
  931. throw "Too many globals";
  932. }
  933. var vertexRegisters = allocParams(shader.vertex);
  934. var fragmentRegStart = regCount;
  935. var fragmentRegisters = allocParams(shader.fragment);
  936. if( paramsCount > params.length )
  937. throw "ASSERT : Too many parameters";
  938. var sign = new RootSignatureDesc();
  939. sign.flags.set(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
  940. sign.flags.set(DENY_HULL_SHADER_ROOT_ACCESS);
  941. sign.flags.set(DENY_DOMAIN_SHADER_ROOT_ACCESS);
  942. sign.flags.set(DENY_GEOMETRY_SHADER_ROOT_ACCESS);
  943. sign.numParameters = paramsCount;
  944. sign.parameters = params[0];
  945. return { sign : sign, fragmentRegStart : fragmentRegStart, vertexRegisters : vertexRegisters, fragmentRegisters : fragmentRegisters, params : params };
  946. }
  947. function compileShader( shader : hxsl.RuntimeShader ) : CompiledShader {
  948. var res = computeRootSignature(shader);
  949. var c = new CompiledShader();
  950. c.vertexRegisters = res.vertexRegisters;
  951. c.fragmentRegisters = res.fragmentRegisters;
  952. var rootStr = stringifyRootSignature(res.sign, "ROOT_SIGNATURE", res.params);
  953. var vs = compileSource(shader.vertex, "vs_6_0", 0, rootStr);
  954. var ps = compileSource(shader.fragment, "ps_6_0", res.fragmentRegStart, rootStr);
  955. var signSize = 0;
  956. var signBytes = Driver.serializeRootSignature(res.sign, 1, signSize);
  957. var sign = new RootSignature(signBytes,signSize);
  958. var inputs = [];
  959. for( v in shader.vertex.data.vars )
  960. switch( v.kind ) {
  961. case Input: inputs.push(v);
  962. default:
  963. }
  964. var inputLayout = hl.CArray.alloc(InputElementDesc, inputs.length);
  965. var format : Array<hxd.BufferFormat.BufferInput> = [];
  966. for( i => v in inputs ) {
  967. var d = inputLayout[i];
  968. var perInst = 0;
  969. if( v.qualifiers != null )
  970. for( q in v.qualifiers )
  971. switch( q ) {
  972. case PerInstance(k): perInst = k;
  973. default:
  974. }
  975. d.semanticName = @:privateAccess hxsl.HlslOut.semanticName(v.name).toUtf8();
  976. d.inputSlot = i;
  977. format.push({ name : v.name, type : hxd.BufferFormat.InputFormat.fromHXSL(v.type) });
  978. if( perInst > 0 ) {
  979. d.inputSlotClass = PER_INSTANCE_DATA;
  980. d.instanceDataStepRate = perInst;
  981. } else
  982. d.inputSlotClass = PER_VERTEX_DATA;
  983. }
  984. var p = new GraphicsPipelineStateDesc();
  985. p.rootSignature = sign;
  986. p.vs.bytecodeLength = vs.length;
  987. p.vs.shaderBytecode = vs;
  988. p.ps.bytecodeLength = ps.length;
  989. p.ps.shaderBytecode = ps;
  990. p.rasterizerState.fillMode = SOLID;
  991. p.rasterizerState.cullMode = NONE;
  992. p.primitiveTopologyType = TRIANGLE;
  993. p.numRenderTargets = 1;
  994. p.rtvFormats[0] = R8G8B8A8_UNORM;
  995. p.dsvFormat = UNKNOWN;
  996. p.sampleDesc.count = 1;
  997. p.sampleMask = -1;
  998. p.inputLayout.inputElementDescs = inputLayout[0];
  999. p.inputLayout.numElements = inputLayout.length;
  1000. //Driver.createGraphicsPipelineState(p);
  1001. c.format = hxd.BufferFormat.make(format);
  1002. c.pipeline = p;
  1003. c.rootSignature = sign;
  1004. c.inputLayout = inputLayout;
  1005. c.inputCount = inputs.length;
  1006. c.shader = shader;
  1007. for( i in 0...inputs.length )
  1008. inputLayout[i].alignedByteOffset = 1; // will trigger error if not set in makePipeline()
  1009. return c;
  1010. }
  1011. function disposeResource( r : ResourceData ) {
  1012. frame.toRelease.push(r.res);
  1013. r.res = null;
  1014. r.state = PRESENT;
  1015. }
  1016. // ----- BUFFERS
  1017. function allocGPU( size : Int, heapType, state ) {
  1018. var desc = new ResourceDesc();
  1019. var flags = new haxe.EnumFlags();
  1020. desc.dimension = BUFFER;
  1021. desc.width = size;
  1022. desc.height = 1;
  1023. desc.depthOrArraySize = 1;
  1024. desc.mipLevels = 1;
  1025. desc.sampleDesc.count = 1;
  1026. desc.layout = ROW_MAJOR;
  1027. tmp.heap.type = heapType;
  1028. return Driver.createCommittedResource(tmp.heap, flags, desc, state, null);
  1029. }
  1030. override function allocBuffer( m : h3d.Buffer ) : GPUBuffer {
  1031. var buf = new VertexBufferData();
  1032. var size = m.getMemSize();
  1033. var bufSize = m.flags.has(UniformBuffer) ? calcCBVSize(size) : size;
  1034. buf.state = COPY_DEST;
  1035. buf.res = allocGPU(bufSize, DEFAULT, COPY_DEST);
  1036. if( !m.flags.has(UniformBuffer) ) {
  1037. var view = new VertexBufferView();
  1038. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1039. view.sizeInBytes = size;
  1040. view.strideInBytes = m.format.strideBytes;
  1041. buf.view = view;
  1042. }
  1043. buf.size = bufSize;
  1044. buf.uploaded = m.flags.has(Dynamic);
  1045. return buf;
  1046. }
  1047. override function allocIndexes( count : Int, is32 : Bool ) : IndexBuffer {
  1048. var buf = new IndexBufferData();
  1049. buf.state = COPY_DEST;
  1050. buf.count = count;
  1051. buf.bits = is32?2:1;
  1052. var size = count << buf.bits;
  1053. buf.res = allocGPU(size, DEFAULT, COPY_DEST);
  1054. var view = new IndexBufferView();
  1055. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1056. view.format = is32 ? R32_UINT : R16_UINT;
  1057. view.sizeInBytes = size;
  1058. buf.view = view;
  1059. return buf;
  1060. }
  1061. override function allocInstanceBuffer(b:InstanceBuffer, bytes:haxe.io.Bytes) {
  1062. var dataSize = b.commandCount * 5 * 4;
  1063. var buf = allocGPU(dataSize, DEFAULT, COPY_DEST);
  1064. var tmpBuf = allocDynamicBuffer(bytes, dataSize);
  1065. frame.commandList.copyBufferRegion(buf, 0, tmpBuf, 0, dataSize);
  1066. b.data = buf;
  1067. var b = tmp.barrier;
  1068. b.resource = buf;
  1069. b.stateBefore = COPY_DEST;
  1070. b.stateAfter = NON_PIXEL_SHADER_RESOURCE;
  1071. frame.commandList.resourceBarrier(b);
  1072. }
  1073. override function disposeBuffer(v:Buffer) {
  1074. disposeResource(v.vbuf);
  1075. }
  1076. override function disposeIndexes(v:IndexBuffer) {
  1077. disposeResource(v);
  1078. }
  1079. override function disposeInstanceBuffer(b:InstanceBuffer) {
  1080. frame.toRelease.push((b.data:GpuResource));
  1081. // disposeResource(b.data);
  1082. b.data = null;
  1083. }
  1084. function updateBuffer( b : BufferData, bytes : hl.Bytes, startByte : Int, bytesCount : Int ) {
  1085. var tmpBuf;
  1086. if( b.uploaded )
  1087. tmpBuf = allocDynamicBuffer(bytes.offset(startByte), bytesCount);
  1088. else {
  1089. var size = calcCBVSize(bytesCount);
  1090. tmpBuf = allocGPU(size, UPLOAD, GENERIC_READ);
  1091. var ptr = tmpBuf.map(0, null);
  1092. ptr.blit(0, bytes, 0, bytesCount);
  1093. tmpBuf.unmap(0,null);
  1094. }
  1095. frame.commandList.copyBufferRegion(b.res, startByte, tmpBuf, 0, bytesCount);
  1096. if( !b.uploaded ) {
  1097. frame.toRelease.push(tmpBuf);
  1098. b.uploaded = true;
  1099. }
  1100. }
  1101. override function uploadIndexBuffer(i:IndexBuffer, startIndice:Int, indiceCount:Int, buf:hxd.IndexBuffer, bufPos:Int) {
  1102. transition(i, COPY_DEST);
  1103. updateBuffer(i, hl.Bytes.getArray(buf.getNative()).offset(bufPos << i.bits), startIndice << i.bits, indiceCount << i.bits);
  1104. transition(i, INDEX_BUFFER);
  1105. }
  1106. override function uploadIndexBytes(i:IndexBuffer, startIndice:Int, indiceCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
  1107. transition(i, COPY_DEST);
  1108. updateBuffer(i, @:privateAccess buf.b.offset(bufPos << i.bits), startIndice << i.bits, indiceCount << i.bits);
  1109. transition(i, INDEX_BUFFER);
  1110. }
  1111. override function uploadBufferData(b:Buffer, startVertex:Int, vertexCount:Int, buf:hxd.FloatBuffer, bufPos:Int) {
  1112. var data = hl.Bytes.getArray(buf.getNative()).offset(bufPos<<2);
  1113. transition(b.vbuf, COPY_DEST);
  1114. updateBuffer(b.vbuf, data, startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1115. transition(b.vbuf, VERTEX_AND_CONSTANT_BUFFER);
  1116. }
  1117. override function uploadBufferBytes(b:Buffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
  1118. transition(b.vbuf, COPY_DEST);
  1119. updateBuffer(b.vbuf, @:privateAccess buf.b.offset(bufPos), startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1120. transition(b.vbuf, VERTEX_AND_CONSTANT_BUFFER);
  1121. }
  1122. // ------------ TEXTURES -------
  1123. function getTextureFormat( t : h3d.mat.Texture ) : DxgiFormat {
  1124. return switch( t.format ) {
  1125. case RGBA: R8G8B8A8_UNORM;
  1126. case RGBA16F: R16G16B16A16_FLOAT;
  1127. case RGBA32F: R32G32B32A32_FLOAT;
  1128. case R32F: R32_FLOAT;
  1129. case R16F: R16_FLOAT;
  1130. case R8: R8_UNORM;
  1131. case RG8: R8G8_UNORM;
  1132. case RG16F: R16G16_FLOAT;
  1133. case RG32F: R32G32_FLOAT;
  1134. case RGB32F: R32G32B32_FLOAT;
  1135. case RGB10A2: R10G10B10A2_UNORM;
  1136. case RG11B10UF: R11G11B10_FLOAT;
  1137. case SRGB_ALPHA: R8G8B8A8_UNORM_SRGB;
  1138. case R16U: R16_UNORM;
  1139. case RG16U: R16G16_UNORM;
  1140. case RGBA16U: R16G16B16A16_UNORM;
  1141. case S3TC(n):
  1142. switch( n ) {
  1143. case 1: BC1_UNORM;
  1144. case 2: BC2_UNORM;
  1145. case 3: BC3_UNORM;
  1146. case 4: BC4_UNORM;
  1147. case 5: BC5_UNORM;
  1148. case 6: BC6H_UF16;
  1149. case 7: BC7_UNORM;
  1150. default: throw "assert";
  1151. }
  1152. default: throw "Unsupported texture format " + t.format;
  1153. }
  1154. }
  1155. function makeTextureDesc(t:h3d.mat.Texture) {
  1156. var desc = new ResourceDesc();
  1157. desc.dimension = TEXTURE2D;
  1158. desc.width = t.width;
  1159. desc.height = t.height;
  1160. desc.depthOrArraySize = t.layerCount;
  1161. desc.mipLevels = t.mipLevels;
  1162. desc.sampleDesc.count = 1;
  1163. desc.format = getTextureFormat(t);
  1164. return desc;
  1165. }
  1166. override function allocTexture(t:h3d.mat.Texture):Texture {
  1167. if( t.format.match(S3TC(_)) && (t.width & 3 != 0 || t.height & 3 != 0) )
  1168. throw t+" is compressed "+t.width+"x"+t.height+" but should be a 4x4 multiple";
  1169. var isRT = t.flags.has(Target);
  1170. var flags = new haxe.EnumFlags();
  1171. var desc = makeTextureDesc(t);
  1172. var td = new TextureData();
  1173. td.format = desc.format;
  1174. tmp.heap.type = DEFAULT;
  1175. var clear = null;
  1176. if( isRT ) {
  1177. var color = t.t == null || t.t.color == null ? new h3d.Vector(0,0,0,0) : t.t.color; // reuse prev color
  1178. desc.flags.set(ALLOW_RENDER_TARGET);
  1179. clear = tmp.clearValue;
  1180. clear.format = desc.format;
  1181. clear.color.r = color.r;
  1182. clear.color.g = color.g;
  1183. clear.color.b = color.b;
  1184. clear.color.a = color.a;
  1185. td.color = color;
  1186. }
  1187. td.state = isRT ? RENDER_TARGET : COPY_DEST;
  1188. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, isRT ? RENDER_TARGET : COPY_DEST, clear);
  1189. td.res.setName(t.name == null ? "Texture#"+t.id : t.name);
  1190. t.lastFrame = frameCount;
  1191. t.flags.unset(WasCleared);
  1192. return td;
  1193. }
  1194. override function allocDepthBuffer(b:h3d.mat.Texture):Texture {
  1195. var td = new TextureData();
  1196. var desc = new ResourceDesc();
  1197. var flags = new haxe.EnumFlags();
  1198. desc.dimension = TEXTURE2D;
  1199. desc.width = b.width;
  1200. desc.height = b.height;
  1201. desc.depthOrArraySize = 1;
  1202. desc.mipLevels = 1;
  1203. desc.sampleDesc.count = 1;
  1204. desc.format = R24G8_TYPELESS;
  1205. desc.flags.set(ALLOW_DEPTH_STENCIL);
  1206. tmp.heap.type = DEFAULT;
  1207. tmp.clearValue.format = D24_UNORM_S8_UINT;
  1208. tmp.clearValue.depth = 1;
  1209. tmp.clearValue.stencil= 0;
  1210. td.state = DEPTH_WRITE;
  1211. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  1212. return td;
  1213. }
  1214. override function disposeTexture(t:h3d.mat.Texture) {
  1215. disposeResource(t.t);
  1216. t.t = null;
  1217. }
  1218. override function disposeDepthBuffer(t:h3d.mat.Texture) {
  1219. disposeResource(t.t);
  1220. t.t = null;
  1221. }
  1222. override function uploadTextureBitmap(t:h3d.mat.Texture, bmp:hxd.BitmapData, mipLevel:Int, side:Int) {
  1223. var pixels = bmp.getPixels();
  1224. uploadTexturePixels(t, pixels, mipLevel, side);
  1225. pixels.dispose();
  1226. }
  1227. override function uploadTexturePixels(t:h3d.mat.Texture, pixels:hxd.Pixels, mipLevel:Int, side:Int) {
  1228. pixels.convert(t.format);
  1229. if( mipLevel >= t.mipLevels ) throw "Mip level outside texture range : " + mipLevel + " (max = " + (t.mipLevels - 1) + ")";
  1230. tmp.heap.type = UPLOAD;
  1231. var subRes = mipLevel + side * t.mipLevels;
  1232. var nbRes = t.mipLevels * t.layerCount;
  1233. // Todo : optimize for video, currently allocating a new tmpBuf every frame.
  1234. if ( t.t.tmpBuf == null ) {
  1235. var tmpSize = t.t.res.getRequiredIntermediateSize(0, nbRes).low;
  1236. t.t.tmpBuf = allocGPU(tmpSize, UPLOAD, GENERIC_READ);
  1237. }
  1238. var previousSize : hl.BytesAccess<Int64> = new hl.Bytes(8);
  1239. Driver.getCopyableFootprints(makeTextureDesc(t), 0, subRes, 0, null, null, null, previousSize);
  1240. var offset = previousSize[0];
  1241. offset = offset < 0 ? 0 : offset;
  1242. var upd = new SubResourceData();
  1243. var stride = @:privateAccess pixels.stride;
  1244. switch( t.format ) {
  1245. case S3TC(n): stride = pixels.width * ((n == 1 || n == 4) ? 2 : 4); // "uncompressed" stride ?
  1246. default:
  1247. }
  1248. upd.data = (pixels.bytes:hl.Bytes).offset(pixels.offset);
  1249. upd.rowPitch = stride;
  1250. upd.slicePitch = pixels.dataSize;
  1251. transition(t.t, COPY_DEST);
  1252. if( !Driver.updateSubResource(frame.commandList, t.t.res, t.t.tmpBuf, offset, subRes, 1, upd) )
  1253. throw "Failed to update sub resource";
  1254. transition(t.t, PIXEL_SHADER_RESOURCE);
  1255. frame.tmpBufToNullify.push(t.t);
  1256. t.flags.set(WasCleared);
  1257. }
  1258. override function copyTexture(from:h3d.mat.Texture, to:h3d.mat.Texture):Bool {
  1259. if( from.t == null || from.format != to.format || from.width != to.width || from.height != to.height || from.layerCount != to.layerCount )
  1260. return false;
  1261. if( to.t == null ) {
  1262. var prev = from.lastFrame;
  1263. from.preventAutoDispose();
  1264. to.alloc();
  1265. from.lastFrame = prev;
  1266. if( from.t == null ) throw "assert";
  1267. if( to.t == null ) return false;
  1268. }
  1269. transition(from.t, COPY_SOURCE);
  1270. transition(to.t, COPY_DEST);
  1271. var dst = new TextureCopyLocation();
  1272. var src = new TextureCopyLocation();
  1273. dst.res = to.t.res;
  1274. src.res = from.t.res;
  1275. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, null);
  1276. to.flags.set(WasCleared);
  1277. for( t in currentRenderTargets )
  1278. if( t == to || t == from ) {
  1279. transition(t.t, RENDER_TARGET);
  1280. break;
  1281. }
  1282. return true;
  1283. }
  1284. // ----- PIPELINE UPDATE
  1285. override function uploadShaderBuffers(buffers:h3d.shader.Buffers, which:h3d.shader.Buffers.BufferKind) {
  1286. uploadBuffers(buffers, buffers.vertex, which, currentShader.shader.vertex, currentShader.vertexRegisters);
  1287. uploadBuffers(buffers, buffers.fragment, which, currentShader.shader.fragment, currentShader.fragmentRegisters);
  1288. }
  1289. function calcCBVSize( dataSize : Int ) {
  1290. // the view must be a mult of 256
  1291. var sz = dataSize & ~0xFF;
  1292. if( sz != dataSize ) sz += 0x100;
  1293. return sz;
  1294. }
  1295. function allocDynamicBuffer( data : hl.Bytes, dataSize : Int ) {
  1296. var b = frame.availableBuffers, prev = null;
  1297. var tmpBuf = null;
  1298. var size = calcCBVSize(dataSize);
  1299. if ( size == 0 ) size = 1;
  1300. while( b != null ) {
  1301. if( b.size >= size && b.size < size << 1 ) {
  1302. tmpBuf = b.buffer;
  1303. if( prev == null )
  1304. frame.availableBuffers = b.next;
  1305. else
  1306. prev.next = b.next;
  1307. b.lastUse = frameCount;
  1308. b.next = frame.usedBuffers;
  1309. frame.usedBuffers = b;
  1310. break;
  1311. }
  1312. prev = b;
  1313. b = b.next;
  1314. }
  1315. if( tmpBuf == null ) {
  1316. tmpBuf = allocGPU(size, UPLOAD, GENERIC_READ);
  1317. var b = new TempBuffer();
  1318. b.buffer = tmpBuf;
  1319. b.size = size;
  1320. b.lastUse = frameCount;
  1321. b.next = frame.usedBuffers;
  1322. frame.usedBuffers = b;
  1323. }
  1324. var ptr = tmpBuf.map(0, null);
  1325. ptr.blit(0, data, 0, dataSize);
  1326. tmpBuf.unmap(0,null);
  1327. return tmpBuf;
  1328. }
  1329. function uploadBuffers( buffers : h3d.shader.Buffers, buf : h3d.shader.Buffers.ShaderBuffers, which:h3d.shader.Buffers.BufferKind, shader : hxsl.RuntimeShader.RuntimeShaderData, regs : ShaderRegisters ) {
  1330. switch( which ) {
  1331. case Params:
  1332. if( shader.paramsSize > 0 ) {
  1333. var data = hl.Bytes.getArray(buf.params.toData());
  1334. var dataSize = shader.paramsSize << 4;
  1335. if( regs.params & 0x100 != 0 ) {
  1336. // update CBV
  1337. var srv = frame.shaderResourceViews.alloc(1);
  1338. var cbv = allocDynamicBuffer(data,dataSize);
  1339. var desc = tmp.cbvDesc;
  1340. desc.bufferLocation = cbv.getGpuVirtualAddress();
  1341. desc.sizeInBytes = calcCBVSize(dataSize);
  1342. Driver.createConstantBufferView(desc, srv);
  1343. frame.commandList.setGraphicsRootDescriptorTable(regs.params & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1344. } else
  1345. frame.commandList.setGraphicsRoot32BitConstants(regs.params, dataSize >> 2, data, 0);
  1346. }
  1347. case Globals:
  1348. if( shader.globalsSize > 0 )
  1349. frame.commandList.setGraphicsRoot32BitConstants(regs.globals, shader.globalsSize << 2, hl.Bytes.getArray(buf.globals.toData()), 0);
  1350. case Textures:
  1351. if( regs.texturesCount > 0 ) {
  1352. var srv = frame.shaderResourceViews.alloc(regs.texturesCount);
  1353. var sampler = frame.samplerViews.alloc(regs.texturesCount);
  1354. for( i in 0...regs.texturesCount ) {
  1355. var t = buf.tex[i];
  1356. if( t == null || t.isDisposed() ) {
  1357. if( i < regs.textures2DCount ) {
  1358. var color = h3d.mat.Defaults.loadingTextureColor;
  1359. t = h3d.mat.Texture.fromColor(color, (color >>> 24) / 255);
  1360. } else {
  1361. t = h3d.mat.Texture.defaultCubeTexture();
  1362. }
  1363. }
  1364. if( t != null && t.t == null && t.realloc != null ) {
  1365. var s = currentShader;
  1366. t.alloc();
  1367. t.realloc();
  1368. if( hasDeviceError ) return;
  1369. if( s != currentShader ) {
  1370. // realloc triggered a shader change !
  1371. // we need to reset the original shader and reupload everything
  1372. currentShader = null;
  1373. selectShader(s.shader);
  1374. uploadShaderBuffers(buffers,Globals);
  1375. uploadShaderBuffers(buffers,Params);
  1376. uploadShaderBuffers(buffers,Textures);
  1377. return;
  1378. }
  1379. }
  1380. var tdesc : ShaderResourceViewDesc;
  1381. if( t.flags.has(Cube) ) {
  1382. var desc = tmp.texCubeSRV;
  1383. desc.format = t.t.format;
  1384. desc.mostDetailedMip = t.startingMip;
  1385. tdesc = desc;
  1386. } else if( t.flags.has(IsArray) ) {
  1387. var desc = tmp.tex2DArraySRV;
  1388. desc.format = t.t.format;
  1389. desc.arraySize = t.layerCount;
  1390. desc.mostDetailedMip = t.startingMip;
  1391. tdesc = desc;
  1392. } else if ( t.isDepth() ) {
  1393. var desc = tmp.tex2DSRV;
  1394. desc.format = R24_UNORM_X8_TYPELESS;
  1395. desc.mostDetailedMip = t.startingMip;
  1396. tdesc = desc;
  1397. } else {
  1398. var desc = tmp.tex2DSRV;
  1399. desc.format = t.t.format;
  1400. desc.mostDetailedMip = t.startingMip;
  1401. tdesc = desc;
  1402. }
  1403. t.lastFrame = frameCount;
  1404. var state = if ( t.isDepth() )
  1405. DEPTH_READ;
  1406. else if ( shader.vertex )
  1407. NON_PIXEL_SHADER_RESOURCE;
  1408. else
  1409. PIXEL_SHADER_RESOURCE;
  1410. transition(t.t, state);
  1411. Driver.createShaderResourceView(t.t.res, tdesc, srv.offset(i * frame.shaderResourceViews.stride));
  1412. var desc = tmp.samplerDesc;
  1413. desc.filter = switch( [t.filter, t.mipMap] ) {
  1414. case [Nearest, None|Nearest]: MIN_MAG_MIP_POINT;
  1415. case [Nearest, Linear]: MIN_MAG_POINT_MIP_LINEAR;
  1416. case [Linear, None|Nearest]: MIN_MAG_LINEAR_MIP_POINT;
  1417. case [Linear, Linear]: MIN_MAG_MIP_LINEAR;
  1418. }
  1419. desc.addressU = desc.addressV = desc.addressW = switch( t.wrap ) {
  1420. case Clamp: CLAMP;
  1421. case Repeat: WRAP;
  1422. }
  1423. desc.mipLODBias = t.lodBias;
  1424. Driver.createSampler(desc, sampler.offset(i * frame.samplerViews.stride));
  1425. }
  1426. frame.commandList.setGraphicsRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(srv));
  1427. frame.commandList.setGraphicsRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(sampler));
  1428. }
  1429. case Buffers:
  1430. if( shader.bufferCount > 0 ) {
  1431. for( i in 0...shader.bufferCount ) {
  1432. var srv = frame.shaderResourceViews.alloc(1);
  1433. var b = buf.buffers[i];
  1434. var cbv = b.vbuf;
  1435. if( cbv.view != null )
  1436. throw "Buffer was allocated without UniformBuffer flag";
  1437. transition(cbv, VERTEX_AND_CONSTANT_BUFFER);
  1438. var desc = tmp.cbvDesc;
  1439. desc.bufferLocation = cbv.res.getGpuVirtualAddress();
  1440. desc.sizeInBytes = cbv.size;
  1441. Driver.createConstantBufferView(desc, srv);
  1442. frame.commandList.setGraphicsRootDescriptorTable(regs.buffers + i, frame.shaderResourceViews.toGPU(srv));
  1443. }
  1444. }
  1445. }
  1446. }
  1447. override function selectShader( shader : hxsl.RuntimeShader ) {
  1448. var sh = compiledShaders.get(shader.id);
  1449. if( sh == null ) {
  1450. sh = compileShader(shader);
  1451. compiledShaders.set(shader.id, sh);
  1452. }
  1453. if( currentShader == sh )
  1454. return false;
  1455. currentShader = sh;
  1456. needPipelineFlush = true;
  1457. frame.commandList.setGraphicsRootSignature(currentShader.rootSignature);
  1458. return true;
  1459. }
  1460. override function selectMaterial( pass : h3d.mat.Pass ) @:privateAccess {
  1461. needPipelineFlush = true;
  1462. pipelineSignature.setI32(PSIGN_MATID, pass.bits);
  1463. pipelineSignature.setUI8(PSIGN_COLOR_MASK, pass.colorMask);
  1464. var st = pass.stencil;
  1465. if( st != null ) {
  1466. pipelineSignature.setUI16(PSIGN_STENCIL_MASK, st.maskBits & 0xFFFF);
  1467. pipelineSignature.setI32(PSIGN_STENCIL_OPS, st.opBits);
  1468. if( curStencilRef != st.reference ) {
  1469. curStencilRef = st.reference;
  1470. frame.commandList.omSetStencilRef(st.reference);
  1471. }
  1472. } else {
  1473. pipelineSignature.setUI16(PSIGN_STENCIL_MASK, 0);
  1474. pipelineSignature.setI32(PSIGN_STENCIL_OPS, 0);
  1475. }
  1476. }
  1477. override function selectBuffer(buffer:Buffer) {
  1478. var views = tmp.vertexViews;
  1479. var bview = buffer.vbuf.view;
  1480. var map = buffer.format.resolveMapping(currentShader.format);
  1481. var vbuf = buffer.vbuf;
  1482. for( i in 0...currentShader.inputCount ) {
  1483. var v = views[i];
  1484. var inf = map[i];
  1485. v.bufferLocation = bview.bufferLocation;
  1486. v.sizeInBytes = bview.sizeInBytes;
  1487. v.strideInBytes = bview.strideInBytes;
  1488. if( inf.offset >= 256 ) throw "assert";
  1489. pipelineSignature.setUI8(PSIGN_LAYOUT + i, inf.offset | inf.precision.toInt());
  1490. }
  1491. needPipelineFlush = true;
  1492. frame.commandList.iaSetVertexBuffers(0, currentShader.inputCount, views[0]);
  1493. }
  1494. override function selectMultiBuffers(formats:hxd.BufferFormat.MultiFormat,buffers:Array<h3d.Buffer>) {
  1495. var views = tmp.vertexViews;
  1496. var map = formats.resolveMapping(currentShader.format);
  1497. for( i in 0...map.length ) {
  1498. var v = views[i];
  1499. var inf = map[i];
  1500. var bview = @:privateAccess buffers[inf.bufferIndex].vbuf.view;
  1501. v.bufferLocation = bview.bufferLocation;
  1502. v.sizeInBytes = bview.sizeInBytes;
  1503. v.strideInBytes = bview.strideInBytes;
  1504. if( inf.offset >= 256 ) throw "assert";
  1505. pipelineSignature.setUI8(PSIGN_LAYOUT + i, inf.offset | inf.precision.toInt());
  1506. }
  1507. needPipelineFlush = true;
  1508. frame.commandList.iaSetVertexBuffers(0, map.length, views[0]);
  1509. }
  1510. static var CULL : Array<CullMode> = [NONE,BACK,FRONT,NONE];
  1511. static var BLEND_OP : Array<BlendOp> = [ADD,SUBTRACT,REV_SUBTRACT,MIN,MAX];
  1512. static var COMP : Array<ComparisonFunc> = [ALWAYS, NEVER, EQUAL, NOT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL];
  1513. static var BLEND : Array<Blend> = [
  1514. ONE,ZERO,SRC_ALPHA,SRC_COLOR,DEST_ALPHA,DEST_COLOR,INV_SRC_ALPHA,INV_SRC_COLOR,INV_DEST_ALPHA,INV_DEST_COLOR,
  1515. SRC1_COLOR,SRC1_ALPHA,INV_SRC1_COLOR,INV_SRC1_ALPHA,SRC_ALPHA_SAT
  1516. ];
  1517. static var BLEND_ALPHA : Array<Blend> = [
  1518. ONE,ZERO,SRC_ALPHA,SRC_ALPHA,DEST_ALPHA,DEST_ALPHA,INV_SRC_ALPHA,INV_SRC_ALPHA,INV_DEST_ALPHA,INV_DEST_ALPHA,
  1519. SRC1_ALPHA,SRC1_ALPHA,INV_SRC1_ALPHA,INV_SRC1_ALPHA,SRC_ALPHA_SAT,
  1520. ];
  1521. static var STENCIL_OP : Array<StencilOp> = [KEEP, ZERO, REPLACE, INCR_SAT, INCR, DECR_SAT, DECR, INVERT];
  1522. function makePipeline( shader : CompiledShader ) {
  1523. var p = shader.pipeline;
  1524. var passBits = pipelineSignature.getI32(PSIGN_MATID);
  1525. var colorMask = pipelineSignature.getUI8(PSIGN_COLOR_MASK);
  1526. var stencilMask = pipelineSignature.getUI16(PSIGN_STENCIL_MASK);
  1527. var stencilOp = pipelineSignature.getI32(PSIGN_STENCIL_OPS);
  1528. var csrc = Pass.getBlendSrc(passBits);
  1529. var cdst = Pass.getBlendDst(passBits);
  1530. var asrc = Pass.getBlendAlphaSrc(passBits);
  1531. var adst = Pass.getBlendAlphaDst(passBits);
  1532. var cop = Pass.getBlendOp(passBits);
  1533. var aop = Pass.getBlendAlphaOp(passBits);
  1534. var dw = Pass.getDepthWrite(passBits);
  1535. var cmp = Pass.getDepthTest(passBits);
  1536. var cull = Pass.getCulling(passBits);
  1537. var wire = Pass.getWireframe(passBits);
  1538. if( wire != 0 ) cull = 0;
  1539. var rtCount = currentRenderTargets.length;
  1540. if( rtCount == 0 ) rtCount = 1;
  1541. p.numRenderTargets = rtCount;
  1542. p.rasterizerState.cullMode = CULL[cull];
  1543. p.rasterizerState.fillMode = wire == 0 ? SOLID : WIREFRAME;
  1544. p.depthStencilDesc.depthEnable = cmp != 0;
  1545. p.depthStencilDesc.depthWriteMask = dw == 0 || !depthEnabled ? ZERO : ALL;
  1546. p.depthStencilDesc.depthFunc = COMP[cmp];
  1547. var bl = p.blendState;
  1548. for( i in 0...rtCount ) {
  1549. var t = bl.renderTargets[i];
  1550. t.blendEnable = csrc != 0 || cdst != 1;
  1551. t.srcBlend = BLEND[csrc];
  1552. t.dstBlend = BLEND[cdst];
  1553. t.srcBlendAlpha = BLEND_ALPHA[asrc];
  1554. t.dstBlendAlpha = BLEND_ALPHA[adst];
  1555. t.blendOp = BLEND_OP[cop];
  1556. t.blendOpAlpha = BLEND_OP[aop];
  1557. t.renderTargetWriteMask = colorMask;
  1558. var t = currentRenderTargets[i];
  1559. p.rtvFormats[i] = t == null ? R8G8B8A8_UNORM : t.t.format;
  1560. }
  1561. p.dsvFormat = depthEnabled ? D24_UNORM_S8_UINT : UNKNOWN;
  1562. for( i in 0...shader.inputCount ) {
  1563. var d = shader.inputLayout[i];
  1564. var offset = pipelineSignature.getUI8(PSIGN_LAYOUT + i);
  1565. d.alignedByteOffset = offset & ~3;
  1566. d.format = @:privateAccess switch( [shader.format.inputs[i].type, new hxd.BufferFormat.Precision(offset&3)] ) {
  1567. case [DFloat, F32]: R32_FLOAT;
  1568. case [DFloat, F16]: R16_FLOAT;
  1569. case [DFloat, S8]: R8_SNORM;
  1570. case [DFloat, U8]: R8_UNORM;
  1571. case [DVec2, F32]: R32G32_FLOAT;
  1572. case [DVec2, F16]: R16G16_FLOAT;
  1573. case [DVec2, S8]: R8G8_SNORM;
  1574. case [DVec2, U8]: R8G8_UNORM;
  1575. case [DVec3, F32]: R32G32B32_FLOAT;
  1576. case [DVec3, F16]: R16G16B16A16_FLOAT; // padding
  1577. case [DVec3, S8]: R8G8B8A8_SNORM; // padding
  1578. case [DVec3, U8]: R8G8B8A8_UNORM; // padding
  1579. case [DVec4, F32]: R32G32B32A32_FLOAT;
  1580. case [DVec4, F16]: R16G16B16A16_FLOAT;
  1581. case [DVec4, S8]: R8G8B8A8_SNORM;
  1582. case [DVec4, U8]: R8G8B8A8_UNORM;
  1583. case [DBytes4, _]: R8G8B8A8_UINT;
  1584. default: throw "assert";
  1585. };
  1586. }
  1587. var stencil = stencilMask != 0 || stencilOp != 0;
  1588. var st = p.depthStencilDesc;
  1589. st.stencilEnable = stencil;
  1590. if( stencil ) {
  1591. var front = st.frontFace;
  1592. var back = st.backFace;
  1593. st.stencilReadMask = stencilMask & 0xFF;
  1594. st.stencilWriteMask = stencilMask >> 8;
  1595. front.stencilFunc = COMP[Stencil.getFrontTest(stencilOp)];
  1596. front.stencilPassOp = STENCIL_OP[Stencil.getFrontPass(stencilOp)];
  1597. front.stencilFailOp = STENCIL_OP[Stencil.getFrontSTfail(stencilOp)];
  1598. front.stencilDepthFailOp = STENCIL_OP[Stencil.getFrontDPfail(stencilOp)];
  1599. back.stencilFunc = COMP[Stencil.getBackTest(stencilOp)];
  1600. back.stencilPassOp = STENCIL_OP[Stencil.getBackPass(stencilOp)];
  1601. back.stencilFailOp = STENCIL_OP[Stencil.getBackSTfail(stencilOp)];
  1602. back.stencilDepthFailOp = STENCIL_OP[Stencil.getBackDPfail(stencilOp)];
  1603. }
  1604. return Driver.createGraphicsPipelineState(p);
  1605. }
  1606. function flushPipeline() {
  1607. if( !needPipelineFlush ) return;
  1608. needPipelineFlush = false;
  1609. var signature = pipelineSignature;
  1610. var signatureSize = PSIGN_LAYOUT + currentShader.inputCount;
  1611. adlerOut.setI32(0, 0);
  1612. hl.Format.digest(adlerOut, signature, signatureSize, 3);
  1613. var hash = adlerOut.getI32(0);
  1614. var pipes = currentShader.pipelines.get(hash);
  1615. if( pipes == null ) {
  1616. pipes = new hl.NativeArray(1);
  1617. currentShader.pipelines.set(hash, pipes);
  1618. }
  1619. var insert = -1;
  1620. for( i in 0...pipes.length ) {
  1621. var p = pipes[i];
  1622. if( p == null ) {
  1623. insert = i;
  1624. break;
  1625. }
  1626. if( p.size == signatureSize && p.bytes.compare(0, signature, 0, signatureSize) == 0 ) {
  1627. frame.commandList.setPipelineState(p.pipeline);
  1628. return;
  1629. }
  1630. }
  1631. var signatureBytes = @:privateAccess new haxe.io.Bytes(pipelineSignature, signatureSize);
  1632. if( insert < 0 ) {
  1633. var pipes2 = new hl.NativeArray(pipes.length + 1);
  1634. pipes2.blit(0, pipes, 0, insert);
  1635. currentShader.pipelines.set(hash, pipes2);
  1636. pipes = pipes2;
  1637. }
  1638. var cp = new CachedPipeline();
  1639. cp.bytes = signature.sub(0, signatureSize);
  1640. cp.size = signatureSize;
  1641. cp.pipeline = makePipeline(currentShader);
  1642. pipes[insert] = cp;
  1643. frame.commandList.setPipelineState(cp.pipeline);
  1644. }
  1645. // QUERIES
  1646. static inline var QUERY_COUNT = 128;
  1647. override function allocQuery( queryKind : QueryKind ) : Query {
  1648. if( queryKind != TimeStamp )
  1649. throw "Not implemented";
  1650. return new Query();
  1651. }
  1652. override function deleteQuery( q : Query ) {
  1653. // nothing to do
  1654. }
  1655. override function beginQuery( q : Query ) {
  1656. // nothing
  1657. }
  1658. override function endQuery( q : Query ) {
  1659. var heap = frame.queryHeaps[frame.queryCurrentHeap];
  1660. if( heap == null ) {
  1661. var desc = new QueryHeapDesc();
  1662. desc.type = TIMESTAMP;
  1663. desc.count = QUERY_COUNT;
  1664. heap = Driver.createQueryHeap(desc);
  1665. frame.queryHeaps[frame.queryCurrentHeap] = heap;
  1666. if( frame.queryBuffer != null ) {
  1667. frame.queryBuffer.release();
  1668. frame.queryBuffer = null;
  1669. }
  1670. }
  1671. q.offset = frame.queryHeapOffset++;
  1672. q.heap = frame.queryCurrentHeap;
  1673. frame.commandList.endQuery(heap, TIMESTAMP, q.offset);
  1674. frame.queriesPending.push(q);
  1675. if( frame.queryHeapOffset == QUERY_COUNT ) {
  1676. frame.queryHeapOffset = 0;
  1677. frame.queryCurrentHeap++;
  1678. }
  1679. }
  1680. override function queryResultAvailable( q : Query ) {
  1681. return q.heap < 0;
  1682. }
  1683. override function queryResult( q : Query ) {
  1684. return q.result;
  1685. }
  1686. function beginQueries() {
  1687. if( frame.queryBuffer == null || frame.queriesPending.length == 0 )
  1688. return;
  1689. var ptr : hl.BytesAccess<Int64> = frame.queryBuffer.map(0, null);
  1690. while( true ) {
  1691. var q = frame.queriesPending.pop();
  1692. if( q == null ) break;
  1693. if( q.heap >= 0 ) {
  1694. var position = q.heap * QUERY_COUNT + q.offset;
  1695. var v = ptr[position];
  1696. q.result = ((v / tsFreq).low + (v % tsFreq).low / tsFreq.low) * 1e9;
  1697. q.heap = -1;
  1698. }
  1699. }
  1700. frame.queryBuffer.unmap(0, null);
  1701. }
  1702. function flushQueries() {
  1703. if( frame.queryHeapOffset > 0 )
  1704. frame.queryCurrentHeap++;
  1705. if( frame.queryCurrentHeap == 0 )
  1706. return;
  1707. if( frame.queryBuffer == null )
  1708. frame.queryBuffer = allocGPU(frame.queryHeaps.length * QUERY_COUNT * 8, READBACK, COPY_DEST);
  1709. var position = 0;
  1710. for( i in 0...frame.queryCurrentHeap ) {
  1711. var count = i < frame.queryCurrentHeap - 1 ? QUERY_COUNT : frame.queryHeapOffset;
  1712. frame.commandList.resolveQueryData(frame.queryHeaps[i], TIMESTAMP, 0, count, frame.queryBuffer, position);
  1713. position += count * 8;
  1714. }
  1715. frame.queryCurrentHeap = 0;
  1716. frame.queryHeapOffset = 0;
  1717. }
  1718. // --- DRAW etc.
  1719. override function draw( ibuf : IndexBuffer, startIndex : Int, ntriangles : Int ) {
  1720. flushPipeline();
  1721. if( currentIndex != ibuf ) {
  1722. currentIndex = ibuf;
  1723. frame.commandList.iaSetIndexBuffer(ibuf.view);
  1724. }
  1725. frame.commandList.drawIndexedInstanced(ntriangles * 3,1,startIndex,0,0);
  1726. flushResources();
  1727. }
  1728. override function drawInstanced(ibuf:IndexBuffer, commands:InstanceBuffer) {
  1729. flushPipeline();
  1730. if( currentIndex != ibuf ) {
  1731. currentIndex = ibuf;
  1732. frame.commandList.iaSetIndexBuffer(ibuf.view);
  1733. }
  1734. if( commands.data != null ) {
  1735. frame.commandList.executeIndirect(indirectCommand, commands.commandCount, commands.data, 0, null, 0);
  1736. } else {
  1737. frame.commandList.drawIndexedInstanced(commands.indexCount, commands.commandCount, commands.startIndex, 0, 0);
  1738. }
  1739. flushResources();
  1740. }
  1741. function flushResources() {
  1742. if( frame.shaderResourceViews.available < 128 || frame.samplerViews.available < 64 ) {
  1743. frame.shaderResourceViews = frame.shaderResourceCache.next();
  1744. frame.samplerViews = frame.samplerCache.next();
  1745. var arr = tmp.descriptors2;
  1746. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  1747. arr[1] = @:privateAccess frame.samplerViews.heap;
  1748. frame.commandList.setDescriptorHeaps(arr);
  1749. }
  1750. }
  1751. function flushFrame( onResize : Bool = false ) {
  1752. flushQueries();
  1753. frame.commandList.close();
  1754. frame.commandList.execute();
  1755. currentShader = null;
  1756. Driver.flushMessages();
  1757. frame.fenceValue = fenceValue++;
  1758. Driver.signal(fence, frame.fenceValue);
  1759. }
  1760. override function present() {
  1761. transition(frame.backBuffer, PRESENT);
  1762. flushFrame();
  1763. Driver.present(window.vsync);
  1764. waitForFrame(Driver.getCurrentBackBufferIndex());
  1765. beginFrame();
  1766. if( hasDeviceError ) {
  1767. Sys.println("----------- OnContextLost ----------");
  1768. hasDeviceError = false;
  1769. dispose();
  1770. reset();
  1771. onContextLost();
  1772. }
  1773. }
  1774. function waitForFrame( index : Int ) {
  1775. var frame = frames[index];
  1776. if( fence.getValue() < frame.fenceValue ) {
  1777. fence.setEvent(frame.fenceValue, fenceEvent);
  1778. fenceEvent.wait(-1);
  1779. }
  1780. }
  1781. }
  1782. #end