DX12Driver.hx 80 KB


  1. package h3d.impl;
  2. #if (hldx && dx12)
  3. import h3d.impl.Driver;
  4. import dx.Dx12;
  5. import haxe.Int64;
  6. import h3d.mat.Pass;
  7. import h3d.mat.Stencil;
  8. private typedef Driver = Dx12;
  9. class TempBuffer {
  10. public var next : TempBuffer;
  11. public var buffer : GpuResource;
  12. public var size : Int;
  13. public var lastUse : Int;
  14. public function new() {
  15. }
  16. public inline function count() {
  17. var b = this;
  18. var k = 0;
  19. while( b != null ) {
  20. k++;
  21. b = b.next;
  22. }
  23. return k;
  24. }
  25. }
  26. class ManagedHeapArray {
  27. var heaps : Array<ManagedHeap>;
  28. var type : DescriptorHeapType;
  29. var size : Int;
  30. var cursor : Int;
  31. public function new(type,size) {
  32. this.type = type;
  33. this.size = size;
  34. heaps = [];
  35. }
  36. public function reset() {
  37. cursor = 0;
  38. }
  39. public function next() {
  40. var h = heaps[cursor++];
  41. if( h == null ) {
  42. h = new ManagedHeap(type, size);
  43. heaps.push(h);
  44. } else
  45. h.clear();
  46. return h;
  47. }
  48. }
  49. @:struct class BumpAllocation {
  50. public var resource : GpuResource = null;
  51. public var cpuAddress : hl.Bytes = null;
  52. public var offset : Int = 0;
  53. public var byteSize : Int = 0;
  54. public function new() {
  55. }
  56. }
  57. class BumpAllocator {
  58. var resource : GpuResource;
  59. var capacity : Int;
  60. var cpuAddress : hl.Bytes;
  61. var heap : HeapProperties;
  62. var offset : Int = 0;
  63. var next : BumpAllocator;
  64. public function new( size : Int ) {
  65. this.capacity = size;
  66. heap = new HeapProperties();
  67. var desc = new ResourceDesc();
  68. var flags = new haxe.EnumFlags();
  69. desc.dimension = BUFFER;
  70. desc.width = capacity;
  71. desc.height = 1;
  72. desc.depthOrArraySize = 1;
  73. desc.mipLevels = 1;
  74. desc.sampleDesc.count = 1;
  75. desc.layout = ROW_MAJOR;
  76. heap.type = UPLOAD;
  77. resource = Driver.createCommittedResource(heap, flags, desc, GENERIC_READ, null);
  78. cpuAddress = resource.map(0, null);
  79. }
  80. public function reset() {
  81. offset = 0;
  82. if ( next != null) {
  83. next.release();
  84. next = null;
  85. }
  86. }
  87. public function release() {
  88. resource.release();
  89. resource = null;
  90. offset = 0;
  91. capacity = 0;
  92. heap = null;
  93. cpuAddress = null;
  94. if ( next != null) {
  95. next.release();
  96. next = null;
  97. }
  98. }
  99. public inline function alloc( size : Int, alignment = 256, allocation : BumpAllocation ) {
  100. var sz = size & ~(alignment - 1);
  101. if( sz != size ) sz += alignment;
  102. return tryAlloc(sz, alignment, allocation);
  103. }
  104. function tryAlloc( size, alignment = 256, allocation : BumpAllocation ) {
  105. var offsetAligned = offset & ~(alignment - 1);
  106. if( offsetAligned != offset ) offsetAligned += alignment;
  107. var newOffset = size + offsetAligned;
  108. if ( newOffset > capacity ) {
  109. if ( next == null )
  110. next = new BumpAllocator(hxd.Math.imax(Std.int(capacity*3/2), size));
  111. return next.tryAlloc(size, alignment, allocation);
  112. }
  113. allocation.byteSize = size;
  114. allocation.offset = offsetAligned;
  115. allocation.cpuAddress = cpuAddress.offset(offsetAligned);
  116. allocation.resource = resource;
  117. offset = newOffset;
  118. return allocation;
  119. }
  120. }
  121. class DxFrame {
  122. public var backBuffer : ResourceData;
  123. public var backBufferView : Address;
  124. public var depthBuffer : GpuResource;
  125. public var allocator : CommandAllocator;
  126. public var commandList : CommandList;
  127. public var fenceValue : Int64;
  128. public var toRelease : Array<Resource> = [];
  129. public var tmpBufToNullify : Array<Texture> = [];
  130. public var tmpBufToRelease : Array<dx.Dx12.GpuResource> = [];
  131. public var shaderResourceViews : ManagedHeap;
  132. public var samplerViews : ManagedHeap;
  133. public var shaderResourceCache : ManagedHeapArray;
  134. public var samplerCache : ManagedHeapArray;
  135. public var availableBuffers : TempBuffer;
  136. public var usedBuffers : TempBuffer;
  137. public var queryHeaps : Array<QueryHeap> = [];
  138. public var queriesPending : Array<Query> = [];
  139. public var queryCurrentHeap : Int;
  140. public var queryHeapOffset : Int;
  141. public var queryBuffer : GpuResource;
  142. public var bumpAllocator : BumpAllocator;
  143. public function new() {
  144. }
  145. }
  146. class ShaderRegisters {
  147. public var globals : Int;
  148. public var params : Int;
  149. public var buffers : Int;
  150. public var cbvCount : Int;
  151. public var storageCount : Int;
  152. public var textures : Int;
  153. public var samplers : Int;
  154. public var texturesCount : Int;
  155. public var texturesTypes : Array<hxsl.Ast.Type>;
  156. public var bufferTypes : Array<hxsl.Ast.BufferKind>;
  157. public var bufferStrides : Array<Int>;
  158. public var srv : Address;
  159. public var samplersView : Address;
  160. public var lastHeapCount : Int;
  161. public var lastTextures : Array<Texture> = [];
  162. public var lastTexturesBits : Array<Int>= [];
  163. public function new() {
  164. }
  165. }
  166. class CompiledShader {
  167. public var vertexRegisters : ShaderRegisters;
  168. public var fragmentRegisters : ShaderRegisters;
  169. public var format : hxd.BufferFormat;
  170. public var pipeline : GraphicsPipelineStateDesc;
  171. public var pipelines : PipelineCache<GraphicsPipelineState> = new PipelineCache();
  172. public var rootSignature : RootSignature;
  173. public var inputLayout : hl.CArray<InputElementDesc>;
  174. public var inputCount : Int;
  175. public var shader : hxsl.RuntimeShader;
  176. public var isCompute : Bool;
  177. public var computePipeline : ComputePipelineState;
  178. public function new() {
  179. }
  180. }
  181. @:struct class SrvArgs {
  182. public var res : GpuResource;
  183. @:packed public var resourceDesc : Tex2DSRV;
  184. @:packed public var samplerDesc : SamplerDesc;
  185. public var srvAddr : Address;
  186. public var samplerAddr : Address;
  187. }
  188. @:struct class TempObjects {
  189. public var renderTargets : hl.BytesAccess<Address>;
  190. public var depthStencils : hl.BytesAccess<Address>;
  191. public var copyableInfosBytes : hl.Bytes;
  192. public var vertexViews : hl.CArray<VertexBufferView>;
  193. public var descriptors2 : hl.NativeArray<DescriptorHeap>;
  194. public var barriers : hl.CArray<ResourceBarrier>;
  195. public var resourcesToTransition : hl.NativeArray<ResourceData>;
  196. public var maxBarriers : Int;
  197. public var barrierCount : Int;
  198. @:packed public var heap(default,null) : HeapProperties;
  199. @:packed public var barrier(default,null) : ResourceBarrier;
  200. @:packed public var clearColor(default,null) : ClearColor;
  201. @:packed public var clearValue(default,null) : ClearValue;
  202. @:packed public var viewport(default,null) : Viewport;
  203. @:packed public var rect(default,null) : Rect;
  204. @:packed public var bufferSRV(default,null) : BufferSRV;
  205. @:packed public var samplerDesc(default,null) : SamplerDesc;
  206. @:packed public var vertexGlobalDesc(default,null) : ConstantBufferViewDesc;
  207. @:packed public var fragmentGlobalDesc(default,null) : ConstantBufferViewDesc;
  208. @:packed public var cbvDesc(default,null) : ConstantBufferViewDesc;
  209. @:packed public var rtvDesc(default,null) : RenderTargetViewDesc;
  210. @:packed public var uavDesc(default,null) : UAVBufferViewDesc;
  211. @:packed public var wtexDesc(default,null) : UAVTextureViewDesc;
  212. @:packed public var subResourceData(default, null) : SubResourceData;
  213. @:packed public var bumpAllocation(default,null) : BumpAllocation;
  214. @:packed public var srcTextureLocation(default, null) : TextureCopyLocation;
  215. @:packed public var dstTextureLocation(default, null) : TextureCopyLocation;
  216. public var pass : h3d.mat.Pass;
  217. public function new() {
  218. renderTargets = new hl.Bytes(8 * 8);
  219. depthStencils = new hl.Bytes(8);
  220. copyableInfosBytes = new hl.Bytes(8 * 3);
  221. vertexViews = hl.CArray.alloc(VertexBufferView, 16);
  222. maxBarriers = 100;
  223. barriers = hl.CArray.alloc( ResourceBarrier, maxBarriers );
  224. var allSubresource = #if (hldx >= version("1.16.0")) Driver.getConstant(RESOURCE_BARRIER_ALL_SUBRESOURCES) #else 0xffffffff #end;
  225. for ( i in 0...maxBarriers )
  226. barriers[i].subResource = allSubresource;
  227. resourcesToTransition = new hl.NativeArray(maxBarriers);
  228. barrierCount = 0;
  229. pass = new h3d.mat.Pass("default");
  230. pass.stencil = new h3d.mat.Stencil();
  231. bufferSRV.dimension = BUFFER;
  232. bufferSRV.flags = RAW;
  233. bufferSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  234. samplerDesc.comparisonFunc = NEVER;
  235. samplerDesc.maxLod = 1e30;
  236. descriptors2 = new hl.NativeArray(2);
  237. uavDesc.viewDimension = BUFFER;
  238. barrier.subResource = -1; // all
  239. }
  240. }
  241. class ManagedHeap {
  242. public var stride(default,null) : Int;
  243. var size : Int;
  244. var cursor : Int;
  245. var type : DescriptorHeapType;
  246. var heap : DescriptorHeap;
  247. var address : Address;
  248. var cpuToGpu : Int64;
  249. public var available(get,never) : Int;
  250. public function new(type,size=8) {
  251. this.type = type;
  252. this.stride = Driver.getDescriptorHandleIncrementSize(type);
  253. allocHeap(size);
  254. }
  255. function allocHeap( size : Int ) {
  256. var desc = new DescriptorHeapDesc();
  257. desc.type = type;
  258. desc.numDescriptors = size;
  259. if( type == CBV_SRV_UAV || type == SAMPLER )
  260. desc.flags = SHADER_VISIBLE;
  261. heap = new DescriptorHeap(desc);
  262. cursor = 0;
  263. this.size = size;
  264. address = heap.getHandle(false);
  265. cpuToGpu = desc.flags == SHADER_VISIBLE ? ( heap.getHandle(true).value - address.value ) : 0;
  266. }
  267. public dynamic function onFree( prev : DescriptorHeap ) {
  268. throw "Too many buffers";
  269. }
  270. public function alloc( count : Int ) {
  271. if( cursor + count > size ) {
  272. cursor = 0;
  273. var prev = heap;
  274. allocHeap((size * 3) >> 1);
  275. onFree(prev);
  276. }
  277. var pos = cursor;
  278. cursor += count;
  279. return address.offset(pos * stride);
  280. }
  281. inline function get_available() {
  282. return size - cursor;
  283. }
  284. public function clear() {
  285. cursor = 0;
  286. }
  287. public inline function toGPU( address : Address ) : Address {
  288. return new Address(address.value + cpuToGpu);
  289. }
  290. }
  291. class ResourceData {
  292. public var res : GpuResource;
  293. public var state : ResourceState;
  294. public var targetState : ResourceState;
  295. public function new() {
  296. }
  297. }
  298. class VertexBufferData extends ResourceData {
  299. public var view : dx.Dx12.VertexBufferView;
  300. public var iview : dx.Dx12.IndexBufferView;
  301. public var size : Int;
  302. }
  303. class TextureUploadBuffer {
  304. public var tmpBuf : dx.Dx12.GpuResource;
  305. public var lastMipMapUploadPerSide : hl.Bytes;
  306. public function new() {
  307. }
  308. }
  309. class TextureData extends ResourceData {
  310. public var format : DxgiFormat;
  311. public var color : h3d.Vector4;
  312. public var uploadBuffer : TextureUploadBuffer;
  313. var clearColorChanges : Int;
  314. public function setClearColor( c : h3d.Vector4 ) {
  315. var color = color;
  316. if( clearColorChanges > 10 || (color.r == c.r && color.g == c.g && color.b == c.b && color.a == c.a) )
  317. return false;
  318. clearColorChanges++;
  319. color.load(c);
  320. return true;
  321. }
  322. }
  323. class QueryData {
  324. public var heap : Int;
  325. public var offset : Int;
  326. public var result : Float;
  327. public function new() {
  328. }
  329. }
  330. class DX12Driver extends h3d.impl.Driver {
  331. var pipelineBuilder = new PipelineCache.PipelineBuilder();
  332. var driver : DriverInstance;
  333. var hasDeviceError = false;
  334. var window : dx.Window;
  335. var onContextLost : Void -> Void;
  336. var frames : Array<DxFrame>;
  337. var frame : DxFrame;
  338. var fence : Fence;
  339. var fenceEvent : WaitEvent;
  340. var renderTargetViews : ManagedHeap;
  341. var depthStenciViews : ManagedHeap;
  342. var indirectCommand : CommandSignature;
  343. var currentFrame : Int;
  344. var fenceValue : Int64 = 0;
  345. var currentPass : h3d.mat.Pass;
  346. var currentWidth : Int;
  347. var currentHeight : Int;
  348. var currentShader : CompiledShader;
  349. var compiledShaders : Map<Int,CompiledShader> = new Map();
  350. var compiler : ShaderCompiler;
  351. var currentIndex : Buffer;
  352. var tmp : TempObjects;
  353. var currentRenderTargets : Array<h3d.mat.Texture> = [];
  354. var defaultDepth : h3d.mat.Texture;
  355. var depthEnabled = true;
  356. var curStencilRef : Int = -1;
  357. var lastRtvDesc : RenderTargetViewDesc;
  358. var rtWidth : Int;
  359. var rtHeight : Int;
  360. var frameCount : Int;
  361. var tsFreq : haxe.Int64;
  362. var heapCount : Int;
  363. var currentPipelineState : PipelineState;
  364. var lastVertexGlobalBind : Int = -1;
  365. var lastFragmentGlobalBind : Int = -1;
  366. var useDepthClamp : Bool = false;
  367. public static var INITIAL_RT_COUNT = 1024;
  368. public static var INITIAL_SRV_COUNT = 1024;
  369. public static var INITIAL_SAMPLER_COUNT = 1024;
  370. public static var INITIAL_BUMP_ALLOCATOR_SIZE = 2 * 1024 * 1024;
  371. public static var BUFFER_COUNT = #if console 3 #else 2 #end;
  372. public static var DEVICE_NAME = null;
  373. public static var DEBUG = false; // requires dxil.dll when set to true
  374. public function new() {
  375. window = @:privateAccess dx.Window.windows[0];
  376. reset();
  377. }
  378. override function hasFeature(f:Feature) {
  379. return switch(f) {
  380. case Queries, BottomLeftCoords:
  381. false;
  382. default:
  383. true;
  384. };
  385. }
  386. override function isSupportedFormat(fmt:h3d.mat.Data.TextureFormat):Bool {
  387. return true;
  388. }
  389. function reset() {
  390. var flags = new DriverInitFlags();
  391. if( DEBUG ) flags.set(DriverInitFlag.DEBUG);
  392. driver = Driver.create(window, flags, DEVICE_NAME);
  393. frames = [];
  394. for(i in 0...BUFFER_COUNT) {
  395. var f = new DxFrame();
  396. f.backBuffer = new ResourceData();
  397. f.allocator = new CommandAllocator(DIRECT);
  398. f.commandList = new CommandList(DIRECT, f.allocator, null);
  399. f.commandList.close();
  400. f.shaderResourceCache = new ManagedHeapArray(CBV_SRV_UAV, INITIAL_SRV_COUNT);
  401. f.samplerCache = new ManagedHeapArray(SAMPLER, INITIAL_SAMPLER_COUNT);
  402. if ( f.bumpAllocator != null )
  403. f.bumpAllocator.release();
  404. f.bumpAllocator = new BumpAllocator(INITIAL_BUMP_ALLOCATOR_SIZE);
  405. frames.push(f);
  406. }
  407. fence = new Fence(0, NONE);
  408. fenceEvent = new WaitEvent(false);
  409. tmp = new TempObjects();
  410. renderTargetViews = new ManagedHeap(RTV, INITIAL_RT_COUNT);
  411. depthStenciViews = new ManagedHeap(DSV, INITIAL_RT_COUNT);
  412. renderTargetViews.onFree = function(prev) frame.toRelease.push(prev);
  413. depthStenciViews.onFree = function(prev) frame.toRelease.push(prev);
  414. if ( h3d.Engine.getCurrent() != null ) {
  415. defaultDepth = new h3d.mat.Texture(0,0, Depth24Stencil8);
  416. defaultDepth.t = new TextureData();
  417. defaultDepth.t.state = defaultDepth.t.targetState = DEPTH_WRITE;
  418. defaultDepth.name = "defaultDepth";
  419. }
  420. var desc = new CommandSignatureDesc();
  421. var adesc = hl.CArray.alloc(IndirectArgumentDesc, 1);
  422. desc.byteStride = 5 * 4;
  423. desc.numArgumentDescs = 1;
  424. desc.argumentDescs = adesc;
  425. adesc[0].type = DRAW_INDEXED;
  426. indirectCommand = Driver.createCommandSignature(desc,null);
  427. tsFreq = Driver.getTimestampFrequency();
  428. compiler = new ShaderCompiler();
  429. resize(window.width, window.height);
  430. }
  431. function beginFrame() {
  432. frameCount = hxd.Timer.frameCount;
  433. heapCount++;
  434. currentFrame = Driver.getCurrentBackBufferIndex();
  435. var prevFrame = frame;
  436. frame = frames[currentFrame];
  437. defaultDepth.t.res = frame.depthBuffer;
  438. frame.allocator.reset();
  439. frame.commandList.reset(frame.allocator, null);
  440. frame.bumpAllocator.reset();
  441. while( frame.toRelease.length > 0 )
  442. frame.toRelease.pop().release();
  443. while( frame.tmpBufToRelease.length > 0 ) {
  444. var tmpBuf = frame.tmpBufToRelease.pop();
  445. if ( tmpBuf != null )
  446. tmpBuf.release();
  447. }
  448. if ( prevFrame != null ) {
  449. while ( prevFrame.tmpBufToNullify.length > 0 ) {
  450. var t = prevFrame.tmpBufToNullify.pop();
  451. t.uploadBuffer = null;
  452. }
  453. }
  454. beginQueries();
  455. var used = frame.usedBuffers;
  456. var b = frame.availableBuffers;
  457. var prev = null;
  458. while( b != null ) {
  459. if( b.lastUse < frameCount - 120 ) {
  460. b.buffer.release();
  461. b = b.next;
  462. } else {
  463. var n = b.next;
  464. b.next = used;
  465. used = b;
  466. b = n;
  467. }
  468. }
  469. frame.availableBuffers = used;
  470. frame.usedBuffers = null;
  471. transition(frame.backBuffer, RENDER_TARGET);
  472. frame.commandList.iaSetPrimitiveTopology(TRIANGLELIST);
  473. renderTargetViews.clear();
  474. depthStenciViews.clear();
  475. curStencilRef = -1;
  476. currentIndex = null;
  477. frame.backBufferView = renderTargetViews.alloc(1);
  478. Driver.createRenderTargetView(frame.backBuffer.res, null, frame.backBufferView);
  479. setRenderTarget(null);
  480. frame.shaderResourceCache.reset();
  481. frame.samplerCache.reset();
  482. frame.shaderResourceViews = frame.shaderResourceCache.next();
  483. frame.samplerViews = frame.samplerCache.next();
  484. var arr = tmp.descriptors2;
  485. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  486. arr[1] = @:privateAccess frame.samplerViews.heap;
  487. frame.commandList.setDescriptorHeaps(arr);
  488. }
  489. override function clear(?color:Vector4, ?depth:Float, ?stencil:Int) {
  490. flushTransitions();
  491. if( color != null ) {
  492. var clear = tmp.clearColor;
  493. clear.r = color.r;
  494. clear.g = color.g;
  495. clear.b = color.b;
  496. clear.a = color.a;
  497. var count = currentRenderTargets.length;
  498. var needRebind = false;
  499. for( i in 0...count ) {
  500. var tex = currentRenderTargets[i];
  501. if( tex != null && tex.t.setClearColor(color) ) {
  502. needRebind = true;
  503. // update texture to use another clear value
  504. var prev = tex.t;
  505. tex.t = allocTexture(tex);
  506. @:privateAccess tex.t.clearColorChanges = prev.clearColorChanges;
  507. frame.toRelease.push(prev.res);
  508. Driver.createRenderTargetView(tex.t.res, lastRtvDesc, tmp.renderTargets[i]);
  509. }
  510. tex.flags.set(WasCleared);
  511. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  512. }
  513. if ( needRebind )
  514. frame.commandList.omSetRenderTargets(count, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(currentRenderTargets[0], currentRenderTargets[0].depthBuffer.t.state & DEPTH_WRITE == COMMON ) : null);
  515. // clear backbuffer
  516. if( count == 0 )
  517. frame.commandList.clearRenderTargetView(frame.backBufferView, clear);
  518. }
  519. if( depth != null || stencil != null )
  520. frame.commandList.clearDepthStencilView(tmp.depthStencils[0], depth != null ? (stencil != null ? BOTH : DEPTH) : STENCIL, (depth:Float), stencil);
  521. }
  522. function waitGpu() {
  523. Driver.signal(fence, fenceValue);
  524. fence.setEvent(fenceValue, fenceEvent);
  525. fenceEvent.wait(-1);
  526. fenceValue++;
  527. }
  528. override function resize(width:Int, height:Int) {
  529. if( defaultDepth == null || (currentWidth == width && currentHeight == height) )
  530. return;
  531. currentWidth = rtWidth = width;
  532. currentHeight = rtHeight = height;
  533. @:privateAccess defaultDepth.width = width;
  534. @:privateAccess defaultDepth.height = height;
  535. if( frame != null )
  536. flushFrame(true);
  537. waitGpu();
  538. for( f in frames ) {
  539. if( f.backBuffer.res != null ) {
  540. if ( f.backBuffer.targetState != f.backBuffer.state )
  541. cancelTransition(f.backBuffer);
  542. f.backBuffer.res.release();
  543. }
  544. if( f.depthBuffer != null )
  545. f.depthBuffer.release();
  546. }
  547. Driver.resize(width, height, BUFFER_COUNT, R8G8B8A8_UNORM);
  548. renderTargetViews.clear();
  549. depthStenciViews.clear();
  550. for( i => f in frames ) {
  551. f.backBuffer.res = Driver.getBackBuffer(i);
  552. f.backBuffer.res.setName("Backbuffer#"+i);
  553. f.backBuffer.state = f.backBuffer.targetState = PRESENT;
  554. var desc = new ResourceDesc();
  555. var flags = new haxe.EnumFlags();
  556. desc.dimension = TEXTURE2D;
  557. desc.width = width;
  558. desc.height = height;
  559. desc.depthOrArraySize = 1;
  560. desc.mipLevels = 1;
  561. desc.sampleDesc.count = 1;
  562. desc.format = D24_UNORM_S8_UINT;
  563. desc.flags.set(ALLOW_DEPTH_STENCIL);
  564. tmp.heap.type = DEFAULT;
  565. tmp.clearValue.format = desc.format;
  566. tmp.clearValue.depth = 1;
  567. tmp.clearValue.stencil= 0;
  568. f.depthBuffer = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  569. f.depthBuffer.setName("Depthbuffer#"+i);
  570. }
  571. beginFrame();
  572. }
  573. override function begin(frame:Int) {
  574. }
  575. override function isDisposed() {
  576. return hasDeviceError;
  577. }
  578. override function init( onCreate : Bool -> Void, forceSoftware = false ) {
  579. onContextLost = onCreate.bind(true);
  580. haxe.Timer.delay(onCreate.bind(false), 1);
  581. }
  582. override function getDriverName(details:Bool) {
  583. var desc = "DX12";
  584. if( details ) desc += " "+Driver.getDeviceName();
  585. return desc;
  586. }
  587. public function forceDeviceError() {
  588. hasDeviceError = true;
  589. }
  590. inline function cancelTransition( res : ResourceData ) {
  591. var found = false;
  592. for (i in 0...tmp.barrierCount) {
  593. if (tmp.resourcesToTransition[i] == res) {
  594. tmp.barrierCount -= 1;
  595. for (j in i...tmp.barrierCount) {
  596. tmp.resourcesToTransition[j] = tmp.resourcesToTransition[j + 1];
  597. }
  598. found = true;
  599. break;
  600. }
  601. }
  602. if (!found)
  603. throw "Resource not found";
  604. res.targetState = res.state;
  605. }
  606. function transition( res : ResourceData, to : ResourceState ) {
  607. if( res.targetState == to )
  608. return;
  609. if ( res.state == to ) {
  610. cancelTransition(res);
  611. return;
  612. }
  613. if( tmp.maxBarriers == tmp.barrierCount) {
  614. flushTransitions();
  615. tmp.maxBarriers += 100;
  616. tmp.barriers = hl.CArray.alloc(ResourceBarrier, tmp.maxBarriers);
  617. var allSubresource = #if (hldx >= version("1.16.0")) Driver.getConstant(RESOURCE_BARRIER_ALL_SUBRESOURCES) #else 0xffffffff #end;
  618. for ( i in 0...tmp.maxBarriers )
  619. tmp.barriers[i].subResource = allSubresource;
  620. tmp.resourcesToTransition = new hl.NativeArray<ResourceData>(tmp.maxBarriers);
  621. }
  622. // If state is different from targetState, a barrier has already been requested so we just have to update the targetState
  623. if (res.state == res.targetState)
  624. tmp.resourcesToTransition[tmp.barrierCount++] = res;
  625. res.targetState = to;
  626. }
  627. function flushTransitions() {
  628. if (tmp.barrierCount > 0) {
  629. var totalBarrier = 0;
  630. for (i in 0...tmp.barrierCount) {
  631. var res = tmp.resourcesToTransition[i];
  632. // Resource has been disposed
  633. if (res.res == null)
  634. continue;
  635. var b = tmp.barriers[totalBarrier];
  636. b.resource = res.res;
  637. b.stateBefore = res.state;
  638. b.stateAfter = res.targetState;
  639. res.state = res.targetState;
  640. totalBarrier++;
  641. }
  642. if (totalBarrier > 0)
  643. #if (hldx >= version("1.15.0"))
  644. frame.commandList.resourceBarriers(tmp.barriers, totalBarrier);
  645. #else
  646. for (i in 0...totalBarrier)
  647. frame.commandList.resourceBarrier(tmp.barriers[i]);
  648. #end
  649. tmp.barrierCount = 0;
  650. }
  651. }
  652. function getDepthViewFromTexture( tex : h3d.mat.Texture, readOnly : Bool ) {
  653. if ( tex != null && tex.depthBuffer == null ) {
  654. depthEnabled = false;
  655. return null;
  656. }
  657. if ( tex != null ) {
  658. var w = tex.depthBuffer.width;
  659. var h = tex.depthBuffer.height;
  660. if( w != tex.width || h != tex.height )
  661. throw "Depth size mismatch";
  662. }
  663. return getDepthView(tex == null ? null : tex.depthBuffer, readOnly);
  664. }
  665. function getDepthView( depthBuffer : h3d.mat.Texture, readOnly : Bool ) {
  666. var res = depthBuffer == null ? frame.depthBuffer : depthBuffer.t.res;
  667. var depthView = depthStenciViews.alloc(1);
  668. var viewDesc = new DepthStencilViewDesc();
  669. viewDesc.arraySize = 1;
  670. viewDesc.mipSlice = 0;
  671. viewDesc.firstArraySlice = 0;
  672. viewDesc.format = (depthBuffer == null) ? D24_UNORM_S8_UINT : toDxgiDepthFormat(depthBuffer.format);
  673. viewDesc.viewDimension = TEXTURE2D;
  674. if ( readOnly ) {
  675. viewDesc.flags.set(READ_ONLY_DEPTH);
  676. viewDesc.flags.set(READ_ONLY_STENCIL);
  677. }
  678. Driver.createDepthStencilView(res, viewDesc, depthView);
  679. var depths = tmp.depthStencils;
  680. depths[0] = depthView;
  681. depthEnabled = true;
  682. if ( depthBuffer != null && (depthBuffer.t.state & ( DEPTH_READ | DEPTH_WRITE ) == COMMON) )
  683. transition(depthBuffer.t, readOnly ? DEPTH_READ : DEPTH_WRITE);
  684. return depths;
  685. }
  686. override function getDefaultDepthBuffer():h3d.mat.Texture {
  687. return defaultDepth;
  688. }
  689. function initViewport(w,h) {
  690. rtWidth = w;
  691. rtHeight = h;
  692. tmp.viewport.width = w;
  693. tmp.viewport.height = h;
  694. tmp.viewport.maxDepth = 1;
  695. tmp.rect.top = 0;
  696. tmp.rect.left = 0;
  697. tmp.rect.right = w;
  698. tmp.rect.bottom = h;
  699. frame.commandList.rsSetScissorRects(1, tmp.rect);
  700. frame.commandList.rsSetViewports(1, tmp.viewport);
  701. }
  702. override function setRenderTarget(tex:Null<h3d.mat.Texture>, layer:Int = 0, mipLevel:Int = 0, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  703. if( tex != null ) {
  704. if( tex.t == null ) tex.alloc();
  705. transition(tex.t, RENDER_TARGET);
  706. }
  707. depthEnabled = depthBinding != NotBound;
  708. var isArr = tex != null && (tex.flags.has(IsArray) || tex.flags.has(Cube));
  709. var is3D = tex != null && tex.flags.has(Is3D);
  710. var desc = null;
  711. if( layer != 0 || mipLevel != 0 || isArr || is3D ) {
  712. desc = tmp.rtvDesc;
  713. desc.format = tex.t.format;
  714. if( isArr ) {
  715. desc.viewDimension = TEXTURE2DARRAY;
  716. desc.mipSlice = mipLevel;
  717. desc.firstArraySlice = layer;
  718. desc.arraySize = 1;
  719. desc.planeSlice = 0;
  720. } else if ( is3D ) {
  721. desc.viewDimension = TEXTURE3D;
  722. desc.mipSlice = mipLevel;
  723. desc.firstArraySlice = layer;
  724. desc.arraySize = 1;
  725. desc.planeSlice = 0;
  726. } else {
  727. desc.viewDimension = TEXTURE2D;
  728. desc.mipSlice = mipLevel;
  729. desc.planeSlice = 0;
  730. }
  731. }
  732. lastRtvDesc = desc;
  733. if (tex != null) {
  734. var texView = renderTargetViews.alloc(1);
  735. Driver.createRenderTargetView(tex.t.res, desc, texView);
  736. tmp.renderTargets[0] = texView;
  737. }
  738. else {
  739. tmp.renderTargets[0] = frame.backBufferView;
  740. }
  741. if ( tex != null && !tex.flags.has(WasCleared) ) {
  742. tex.flags.set(WasCleared);
  743. var clear = tmp.clearColor;
  744. clear.r = 0;
  745. clear.g = 0;
  746. clear.b = 0;
  747. clear.a = 0;
  748. flushTransitions();
  749. frame.commandList.clearRenderTargetView(tmp.renderTargets[0], clear);
  750. }
  751. frame.commandList.omSetRenderTargets(1, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(tex, depthBinding == ReadOnly ) : null);
  752. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  753. if( tex != null ) currentRenderTargets.push(tex);
  754. var w = tex == null ? currentWidth : tex.width >> mipLevel;
  755. var h = tex == null ? currentHeight : tex.height >> mipLevel;
  756. if( w == 0 ) w = 1;
  757. if( h == 0 ) h = 1;
  758. initViewport(w, h);
  759. pipelineBuilder.setRenderTarget(tex, depthEnabled);
  760. }
  761. function toDxgiDepthFormat( format : hxd.PixelFormat ) {
  762. switch( format ) {
  763. case null:
  764. return cast 0;
  765. case Depth16:
  766. return D16_UNORM;
  767. case Depth24Stencil8, Depth24:
  768. return D24_UNORM_S8_UINT;
  769. case Depth32:
  770. return D32_FLOAT;
  771. default:
  772. throw "Unsupported depth format "+ format;
  773. }
  774. }
  775. override function setRenderTargets(textures:Array<h3d.mat.Texture>, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  776. while( currentRenderTargets.length > textures.length )
  777. currentRenderTargets.pop();
  778. depthEnabled = depthBinding != NotBound;
  779. lastRtvDesc = null;
  780. var t0 = textures[0];
  781. var texViews = renderTargetViews.alloc(textures.length);
  782. for( i => t in textures ) {
  783. if ( t.t == null ) {
  784. t.alloc();
  785. if ( hasDeviceError ) return;
  786. }
  787. var view = texViews.offset(renderTargetViews.stride * i);
  788. Driver.createRenderTargetView(t.t.res, null, view);
  789. tmp.renderTargets[i] = view;
  790. currentRenderTargets[i] = t;
  791. transition( t.t, RENDER_TARGET);
  792. if ( !t.flags.has(WasCleared) ) {
  793. t.flags.set(WasCleared);
  794. var clear = tmp.clearColor;
  795. clear.r = 0;
  796. clear.g = 0;
  797. clear.b = 0;
  798. clear.a = 0;
  799. flushTransitions();
  800. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  801. }
  802. }
  803. frame.commandList.omSetRenderTargets(textures.length, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(t0, depthBinding == ReadOnly) : null);
  804. initViewport(t0.width, t0.height);
  805. pipelineBuilder.setRenderTargets(textures, depthEnabled);
  806. }
  807. override function setDepth(depthBuffer : h3d.mat.Texture) {
  808. var view = getDepthView(depthBuffer, false);
  809. depthEnabled = true;
  810. frame.commandList.omSetRenderTargets(0, null, true, view);
  811. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  812. initViewport(depthBuffer.width, depthBuffer.height);
  813. pipelineBuilder.setDepth(depthBuffer);
  814. }
  815. override function setDepthClamp( enabled : Bool ) {
  816. useDepthClamp = enabled;
  817. }
  818. override function setDepthBias( depthBias : Float, slopeScaledBias : Float ) {
  819. pipelineBuilder.setDepthBias(depthBias, slopeScaledBias);
  820. }
  821. override function setRenderZone(x:Int, y:Int, width:Int, height:Int) {
  822. if( width < 0 && height < 0 && x == 0 && y == 0 ) {
  823. tmp.rect.left = 0;
  824. tmp.rect.top = 0;
  825. tmp.rect.right = rtWidth;
  826. tmp.rect.bottom = rtHeight;
  827. frame.commandList.rsSetScissorRects(1, tmp.rect);
  828. } else {
  829. tmp.rect.left = x;
  830. tmp.rect.top = y;
  831. tmp.rect.right = x + width;
  832. tmp.rect.bottom = y + height;
  833. frame.commandList.rsSetScissorRects(1, tmp.rect);
  834. }
  835. }
  836. override function captureRenderBuffer( pixels : hxd.Pixels ) {
  837. var rt = currentRenderTargets[0];
  838. if( rt == null )
  839. throw "Can't capture main render buffer in DirectX";
  840. captureTexPixels(pixels, rt, 0, 0);
  841. }
  842. override function capturePixels(tex:h3d.mat.Texture, layer:Int, mipLevel:Int, ?region:h2d.col.IBounds):hxd.Pixels {
  843. var pixels : hxd.Pixels;
  844. var oldRTs = [for ( rt in currentRenderTargets) rt];
  845. if (region != null) {
  846. if (region.xMax > tex.width) region.xMax = tex.width;
  847. if (region.yMax > tex.height) region.yMax = tex.height;
  848. if (region.xMin < 0) region.xMin = 0;
  849. if (region.yMin < 0) region.yMin = 0;
  850. var w = region.width >> mipLevel;
  851. var h = region.height >> mipLevel;
  852. if( w == 0 ) w = 1;
  853. if( h == 0 ) h = 1;
  854. pixels = hxd.Pixels.alloc(w, h, tex.format);
  855. captureTexPixels(pixels, tex, layer, mipLevel, region.xMin, region.yMin);
  856. } else {
  857. var w = tex.width >> mipLevel;
  858. var h = tex.height >> mipLevel;
  859. if( w == 0 ) w = 1;
  860. if( h == 0 ) h = 1;
  861. pixels = hxd.Pixels.alloc(w, h, tex.format);
  862. captureTexPixels(pixels, tex, layer, mipLevel);
  863. }
  864. if(oldRTs.length > 0){
  865. setRenderTargets(oldRTs);
  866. }
  867. return pixels;
  868. }
  869. function captureTexPixels( pixels: hxd.Pixels, tex:h3d.mat.Texture, layer:Int, mipLevel:Int, x : Int = 0, y : Int = 0) {
  870. if( pixels.width == 0 || pixels.height == 0 )
  871. return;
  872. var totalSize : hl.BytesAccess<Int64> = tmp.copyableInfosBytes;
  873. var src = tmp.srcTextureLocation;
  874. src.res = tex.t.res;
  875. src.type = SUBRESOURCE_INDEX;
  876. src.subResourceIndex = mipLevel + layer * tex.mipLevels;
  877. var srcDesc = makeTextureDesc(tex);
  878. var dst = tmp.dstTextureLocation;
  879. dst.type = PLACED_FOOTPRINT;
  880. Driver.getCopyableFootprints(srcDesc, src.subResourceIndex, 1, 0, dst.placedFootprint, null, null, totalSize);
  881. var desc = new ResourceDesc();
  882. var flags = new haxe.EnumFlags();
  883. desc.dimension = BUFFER;
  884. desc.width = totalSize[0];
  885. desc.height = 1;
  886. desc.depthOrArraySize = 1;
  887. desc.mipLevels = 1;
  888. desc.sampleDesc.count = 1;
  889. desc.layout = ROW_MAJOR;
  890. tmp.heap.type = READBACK;
  891. var tmpBuf = Driver.createCommittedResource(tmp.heap, flags, desc, COPY_DEST, null);
  892. var box = new Box();
  893. box.left = x;
  894. box.right = pixels.width;
  895. box.top = y;
  896. box.bottom = pixels.height;
  897. box.back = 1;
  898. transition(tex.t, COPY_SOURCE);
  899. flushTransitions();
  900. dst.res = tmpBuf;
  901. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, box);
  902. flushFrame();
  903. waitGpu();
  904. var output = tmpBuf.map(0, null);
  905. var stride = hxd.Pixels.calcStride(pixels.width, tex.format);
  906. var rowStride = dst.placedFootprint.footprint.rowPitch;
  907. if( rowStride == stride )
  908. (pixels.bytes:hl.Bytes).blit(pixels.offset, output, 0, stride * pixels.height);
  909. else {
  910. for( i in 0...pixels.height )
  911. (pixels.bytes:hl.Bytes).blit(pixels.offset + i * stride, output, i * rowStride, stride);
  912. }
  913. tmpBuf.unmap(0,null);
  914. tmpBuf.release();
  915. beginFrame();
  916. }
  917. // ---- SHADERS -----
  918. static var VERTEX_FORMATS = [null,null,R32G32_FLOAT,R32G32B32_FLOAT,R32G32B32A32_FLOAT];
  919. function getBinaryPayload( code : String, profile ) {
  920. var bin = code.indexOf("//BIN=");
  921. if( bin >= 0 ) {
  922. var end = code.indexOf("#", bin);
  923. if( end >= 0 )
  924. return haxe.crypto.Base64.decode(code.substr(bin + 6, end - bin - 6));
  925. }
  926. if( shaderCache != null )
  927. return shaderCache.resolveShaderBinary(code, profile);
  928. return null;
  929. }
  930. static final SHADER_ARGS : Array<String>= [/*"-Zi"*/];
  931. function compileSource( sh : hxsl.RuntimeShader.RuntimeShaderData, profile, baseRegister, rootStr = "" ) {
  932. var out = new hxsl.HlslOut();
  933. out.baseRegister = baseRegister;
  934. if( sh.code == null ) {
  935. sh.code = out.run(sh.data);
  936. sh.code = rootStr + sh.code;
  937. }
  938. var key = profile;
  939. for ( arg in SHADER_ARGS )
  940. key += arg;
  941. var bytes = getBinaryPayload(sh.code, key);
  942. if( bytes == null ) {
  943. bytes = compiler.compile(sh.code, profile, SHADER_ARGS);
  944. if( shaderCache != null )
  945. shaderCache.saveCompiledShader(sh.code, bytes, key);
  946. }
  947. return bytes;
  948. }
  949. override function getNativeShaderCode( shader : hxsl.RuntimeShader ) {
  950. var out = new hxsl.HlslOut();
  951. var vsSource = out.run(shader.vertex.data);
  952. if( shader.mode == Compute )
  953. return vsSource;
  954. var out = new hxsl.HlslOut();
  955. var psSource = out.run(shader.fragment.data);
  956. return vsSource+"\n\n\n\n"+psSource;
  957. }
  958. function stringifyRootSignature( sign : RootSignatureDesc, name : String, params : hl.CArray<RootParameterDescriptorTable>, paramsCount : Int ) : String {
  959. var s = '#define ${name} "RootFlags(';
  960. if ( sign.flags.toInt() == 0 )
  961. s += '0'; // no flags
  962. else {
  963. // RootFlags
  964. for ( f in haxe.EnumTools.getConstructors(RootSignatureFlag) ) {
  965. if ( !sign.flags.has(haxe.EnumTools.createByName(RootSignatureFlag, f)) )
  966. continue;
  967. s += Std.string(f) + '|';
  968. }
  969. s = s.substr(0, s.length - 1);
  970. }
  971. s += ')",';
  972. for ( i in 0...paramsCount ) {
  973. var param = params[i];
  974. var vis = "SHADER_VISIBILITY_"+switch( param.shaderVisibility ) { case VERTEX: "VERTEX"; case PIXEL: "PIXEL"; default: "ALL"; };
  975. if ( param.parameterType == CONSTANTS ) {
  976. var p = unsafeCastTo(param, RootParameterConstants);
  977. var shaderRegister = p.shaderRegister;
  978. s += 'RootConstants(num32BitConstants=${p.num32BitValues},b${shaderRegister}, visibility=${vis}),';
  979. } else {
  980. try {
  981. var p = param;
  982. if( p.descriptorRanges == null ) continue;
  983. s += 'DescriptorTable(';
  984. for ( i in 0...p.numDescriptorRanges) {
  985. var descRange = p.descriptorRanges[i];
  986. var baseShaderRegister = descRange.baseShaderRegister;
  987. switch ( descRange.rangeType) {
  988. case CBV:
  989. s += 'CBV(b${baseShaderRegister}, numDescriptors = ${descRange.numDescriptors}),';
  990. case SRV:
  991. s += 'SRV(t${baseShaderRegister}, numDescriptors = ${descRange.numDescriptors}),';
  992. case SAMPLER:
  993. var baseShaderRegister = descRange.baseShaderRegister;
  994. s += 'Sampler(s${baseShaderRegister}, space=${descRange.registerSpace}, numDescriptors = ${descRange.numDescriptors}),';
  995. case UAV:
  996. var reg = descRange.baseShaderRegister;
  997. s += 'UAV(u${reg}, numDescriptors = ${descRange.numDescriptors}),';
  998. }
  999. }
  1000. s += 'visibility = ${vis}),';
  1001. } catch ( e : Dynamic ) {
  1002. continue;
  1003. }
  1004. }
  1005. }
  1006. s += '\n';
  1007. return s;
  1008. }
  1009. inline function unsafeCastTo<T,R>( v : T, c : Class<R> ) : R {
  1010. #if (haxe_ver < 5)
  1011. var arr = new hl.NativeArray<T>(1);
  1012. arr[0] = v;
  1013. return (cast arr : hl.NativeArray<R>)[0];
  1014. #else
  1015. return hl.Api.unsafeCast(v);
  1016. #end
  1017. }
  1018. function computeRootSignature( shader : hxsl.RuntimeShader ) {
  1019. var allocatedParams = 16;
  1020. var params = hl.CArray.alloc(RootParameterDescriptorTable,allocatedParams);
  1021. var paramsCount = 0, regCount = 0;
  1022. var ranges = [];
  1023. var globalsParamsCBV = false;
  1024. var vertexParamsCBV = false;
  1025. var fragmentParamsCBV = false;
  1026. function allocDescTable(vis, rangeCount = 1) {
  1027. var p = params[paramsCount++];
  1028. p.parameterType = DESCRIPTOR_TABLE;
  1029. p.numDescriptorRanges = rangeCount;
  1030. var rangeArr = hl.CArray.alloc(DescriptorRange,rangeCount);
  1031. for ( i in 0...rangeCount) {
  1032. var range = rangeArr[i];
  1033. #if (hldx >= version("1.15.0"))
  1034. range.offsetInDescriptorsFromTableStart = Driver.getConstant(DESCRIPTOR_RANGE_OFFSET_APPEND);
  1035. #else
  1036. range.offsetInDescriptorsFromTableStart = 0xffffffff;
  1037. #end
  1038. ranges.push(range);
  1039. }
  1040. p.descriptorRanges = rangeArr;
  1041. p.shaderVisibility = vis;
  1042. return rangeArr;
  1043. }
  1044. function allocConsts(size,vis,type) {
  1045. var reg = regCount++;
  1046. if( size == 0 ) return -1;
  1047. if( type != null ) {
  1048. var pid = paramsCount;
  1049. var r = allocDescTable(vis)[0];
  1050. r.rangeType = type;
  1051. r.numDescriptors = 1;
  1052. r.baseShaderRegister = reg;
  1053. r.registerSpace = 0;
  1054. return pid | 0x100;
  1055. }
  1056. var pid = paramsCount++;
  1057. var p = unsafeCastTo(params[pid], RootParameterConstants);
  1058. p.parameterType = CONSTANTS;
  1059. p.shaderRegister = reg;
  1060. p.shaderVisibility = vis;
  1061. p.num32BitValues = size << 2;
  1062. return pid;
  1063. }
  1064. function allocParams( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  1065. var vis = switch( sh.kind ) {
  1066. case Vertex: VERTEX;
  1067. case Fragment: PIXEL;
  1068. default: ALL;
  1069. }
  1070. var regs = new ShaderRegisters();
  1071. regs.globals = allocConsts(sh.globalsSize, vis, globalsParamsCBV ? CBV : null);
  1072. regs.params = allocConsts(sh.paramsSize, vis, (sh.kind == Fragment ? fragmentParamsCBV : vertexParamsCBV) ? CBV : null);
  1073. regs.buffers = paramsCount;
  1074. if( sh.bufferCount > 0 ) {
  1075. regs.bufferTypes = [];
  1076. regs.bufferStrides = [];
  1077. var uavCount = 0;
  1078. var p = sh.buffers;
  1079. while( p != null ) {
  1080. switch( p.type ) {
  1081. case TBuffer(type,_,kind):
  1082. regs.bufferTypes.push(kind);
  1083. regs.bufferStrides.push(hxsl.Ast.Tools.size(type) << 2);
  1084. switch ( kind ) {
  1085. case Uniform, Partial:
  1086. regs.cbvCount++;
  1087. case Storage, StoragePartial:
  1088. regs.storageCount++;
  1089. case RW, RWPartial:
  1090. uavCount++;
  1091. default:
  1092. throw "assert";
  1093. }
  1094. default:
  1095. throw "assert";
  1096. }
  1097. p = p.next;
  1098. }
  1099. var rangeCount = 0;
  1100. rangeCount += regs.cbvCount > 0 ? 1 : 0;
  1101. rangeCount += regs.storageCount > 0 ? 1 : 0;
  1102. rangeCount += uavCount > 0 ? 1 : 0;
  1103. var rangArr = allocDescTable(vis, rangeCount);
  1104. var i = 0;
  1105. if ( regs.cbvCount > 0 ) {
  1106. var r = rangArr[i];
  1107. r.rangeType = CBV;
  1108. r.baseShaderRegister = regCount;
  1109. r.registerSpace = 0;
  1110. r.numDescriptors = regs.cbvCount;
  1111. regCount += regs.cbvCount;
  1112. i++;
  1113. }
  1114. if ( regs.storageCount > 0 ) {
  1115. var r = rangArr[i];
  1116. r.rangeType = SRV;
  1117. r.baseShaderRegister = regs.texturesCount;
  1118. r.registerSpace = 0;
  1119. r.numDescriptors = regs.storageCount;
  1120. i++;
  1121. }
  1122. if ( uavCount > 0 ) {
  1123. var r = rangArr[i];
  1124. r.rangeType = UAV;
  1125. r.baseShaderRegister = regCount;
  1126. r.registerSpace = 0;
  1127. r.numDescriptors = uavCount;
  1128. regCount += uavCount;
  1129. i++;
  1130. }
  1131. }
  1132. if( sh.texturesCount > 0 ) {
  1133. regs.texturesTypes = [];
  1134. var uavCount = 0;
  1135. var p = sh.data.vars;
  1136. for( v in sh.data.vars ) {
  1137. switch( v.type ) {
  1138. case TArray(t = TSampler(_) | TRWTexture(_), SConst(n)):
  1139. for( i in 0...n )
  1140. regs.texturesTypes.push(t);
  1141. if( t.match(TSampler(_)) )
  1142. regs.texturesCount += n;
  1143. else {
  1144. uavCount += n;
  1145. }
  1146. default:
  1147. }
  1148. }
  1149. regs.textures = paramsCount;
  1150. var rangeCount = 0;
  1151. rangeCount += regs.texturesCount > 0 ? 1 : 0;
  1152. rangeCount += uavCount > 0 ? 1 : 0;
  1153. var rangeArr = allocDescTable(vis, rangeCount);
  1154. var i = 0;
  1155. if( regs.texturesCount > 0 ) {
  1156. var r = rangeArr[i];
  1157. r.rangeType = SRV;
  1158. r.baseShaderRegister = regs.storageCount;
  1159. r.registerSpace = 0;
  1160. r.numDescriptors = regs.texturesCount;
  1161. i++;
  1162. regs.samplers = paramsCount;
  1163. var r = allocDescTable(vis)[0];
  1164. r.rangeType = SAMPLER;
  1165. r.baseShaderRegister = 0;
  1166. r.registerSpace = 0;
  1167. r.numDescriptors = regs.texturesCount;
  1168. }
  1169. if ( uavCount > 0 ) {
  1170. var r = rangeArr[i];
  1171. r.rangeType = UAV;
  1172. r.baseShaderRegister = regCount;
  1173. r.registerSpace = 0;
  1174. r.numDescriptors = uavCount;
  1175. regCount += uavCount;
  1176. i++;
  1177. }
  1178. }
  1179. return regs;
  1180. }
  1181. // Costs in units:
  1182. // Descriptor Tables cost 1 each
  1183. // Root CBVs cost 2 each
  1184. // Root SRVs cost 2 each
  1185. // Root UAVs cost 2 each
  1186. // Root Constants cost 1 per 32-bit value
  1187. function calcSize( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  1188. var s = (sh.globalsSize + sh.paramsSize) << 2;
  1189. // 1 descriptor table for all textures and 1 descriptor table for all samplers
  1190. s += ( sh.texturesCount > 0 ) ? 2 : 0;
  1191. // 1 descriptor table for all buffers
  1192. s += ( sh.bufferCount > 0 ) ? 1 : 0;
  1193. return s;
  1194. }
  1195. var totalVertex = calcSize(shader.vertex);
  1196. var isCompute = shader.mode == Compute;
  1197. var totalFragment = isCompute ? 0 : calcSize(shader.fragment);
  1198. var total = totalVertex + totalFragment;
  1199. if( total > 64 ) {
  1200. var vertexParamCostGain = 1 - (shader.vertex.paramsSize << 2);
  1201. var fragmentParamCostGain = isCompute ? 0 : 1 - (shader.fragment.paramsSize << 2);
  1202. // Remove the size cost of the root constant and add one descriptor table.
  1203. var withoutVP = total + vertexParamCostGain;
  1204. var withoutFP = total + fragmentParamCostGain;
  1205. if( withoutVP <= 64 || ( withoutFP > 64 && withoutVP > 64 ) ) {
  1206. vertexParamsCBV = true;
  1207. total = withoutVP;
  1208. }
  1209. if( total > 64 && !isCompute ) {
  1210. fragmentParamsCBV = true;
  1211. total = total + fragmentParamCostGain;
  1212. }
  1213. if( total > 64 ) {
  1214. globalsParamsCBV = true;
  1215. var vertexGlobalCostGain = 1 - (shader.vertex.globalsSize << 2);
  1216. var fragmentGlobalCostGain = isCompute ? 0 : 1 - (shader.fragment.globalsSize << 2);
  1217. var withoutGlobal = total + vertexGlobalCostGain + fragmentGlobalCostGain;
  1218. if ( withoutGlobal > 64 )
  1219. throw "Too many params. Should not be possible if every params fall into descriptor table.";
  1220. }
  1221. }
  1222. var regs = [];
  1223. for( s in shader.getShaders() )
  1224. regs.push({ start : regCount, registers : allocParams(s) });
  1225. if( paramsCount > allocatedParams )
  1226. throw "ASSERT : Too many parameters";
  1227. var sign = new RootSignatureDesc();
  1228. if( shader.mode == Compute ) {
  1229. sign.flags.set(DENY_PIXEL_SHADER_ROOT_ACCESS);
  1230. sign.flags.set(DENY_VERTEX_SHADER_ROOT_ACCESS);
  1231. } else
  1232. sign.flags.set(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
  1233. sign.flags.set(DENY_HULL_SHADER_ROOT_ACCESS);
  1234. sign.flags.set(DENY_DOMAIN_SHADER_ROOT_ACCESS);
  1235. sign.flags.set(DENY_GEOMETRY_SHADER_ROOT_ACCESS);
  1236. #if !xbogdk
  1237. sign.flags.set(DENY_AMPLIFICATION_SHADER_ROOT_ACCESS);
  1238. sign.flags.set(DENY_MESH_SHADER_ROOT_ACCESS);
  1239. #end
  1240. sign.numParameters = paramsCount;
  1241. sign.parameters = cast params;
  1242. return { sign : sign, registers : regs, params : params, paramsCount : paramsCount, ranges : ranges };
  1243. }
  1244. function compileShader( shader : hxsl.RuntimeShader ) : CompiledShader {
  1245. var res = computeRootSignature(shader);
  1246. var c = new CompiledShader();
  1247. var rootStr = stringifyRootSignature(res.sign, "ROOT_SIGNATURE", res.params, res.paramsCount);
  1248. var vs = shader.mode == Compute ? null : compileSource(shader.vertex, "vs_6_0", 0, rootStr);
  1249. var ps = shader.mode == Compute ? null : compileSource(shader.fragment, "ps_6_0", res.registers[1].start, rootStr);
  1250. var cs = shader.mode == Compute ? compileSource(shader.compute, "cs_6_0", 0, rootStr) : null;
  1251. var signSize = 0;
  1252. var signBytes = Driver.serializeRootSignature(res.sign, 1, signSize);
  1253. var sign = new RootSignature(signBytes,signSize);
  1254. c.rootSignature = sign;
  1255. c.shader = shader;
  1256. if( shader.mode == Compute ) {
  1257. c.isCompute = true;
  1258. var desc = new ComputePipelineStateDesc();
  1259. desc.rootSignature = sign;
  1260. desc.cs.shaderBytecode = cs;
  1261. desc.cs.bytecodeLength = cs.length;
  1262. c.computePipeline = Driver.createComputePipelineState(desc);
  1263. c.vertexRegisters = res.registers[0].registers;
  1264. return c;
  1265. }
  1266. c.vertexRegisters = res.registers[0].registers;
  1267. c.fragmentRegisters = res.registers[1].registers;
  1268. var inputs = [];
  1269. for( v in shader.vertex.data.vars )
  1270. switch( v.kind ) {
  1271. case Input: inputs.push(v);
  1272. default:
  1273. }
  1274. var inputLayout = hl.CArray.alloc(InputElementDesc, inputs.length);
  1275. var format : Array<hxd.BufferFormat.BufferInput> = [];
  1276. var allNames = new Map();
  1277. var varNames = new Map();
  1278. for ( i => v in inputs)
  1279. hxsl.HlslOut.varName(v, varNames, allNames);
  1280. for( i => v in inputs ) {
  1281. var d = inputLayout[i];
  1282. var perInst = 0;
  1283. if( v.qualifiers != null )
  1284. for( q in v.qualifiers )
  1285. switch( q ) {
  1286. case PerInstance(k): perInst = k;
  1287. default:
  1288. }
  1289. d.semanticName = @:privateAccess hxsl.HlslOut.semanticName(varNames.get(v.id)).toUtf8();
  1290. d.inputSlot = i;
  1291. format.push({ name : v.name, type : hxd.BufferFormat.InputFormat.fromHXSL(v.type) });
  1292. if( perInst > 0 ) {
  1293. d.inputSlotClass = PER_INSTANCE_DATA;
  1294. d.instanceDataStepRate = perInst;
  1295. } else
  1296. d.inputSlotClass = PER_VERTEX_DATA;
  1297. }
  1298. var p = new GraphicsPipelineStateDesc();
  1299. p.rootSignature = sign;
  1300. p.vs.bytecodeLength = vs.length;
  1301. p.vs.shaderBytecode = vs;
  1302. p.ps.bytecodeLength = ps.length;
  1303. p.ps.shaderBytecode = ps;
  1304. p.rasterizerState.fillMode = SOLID;
  1305. p.rasterizerState.cullMode = NONE;
  1306. p.primitiveTopologyType = TRIANGLE;
  1307. p.numRenderTargets = 1;
  1308. p.rtvFormats[0] = R8G8B8A8_UNORM;
  1309. p.dsvFormat = UNKNOWN;
  1310. p.sampleDesc.count = 1;
  1311. p.sampleMask = -1;
  1312. p.inputLayout.inputElementDescs = inputLayout;
  1313. p.inputLayout.numElements = inputs.length;
  1314. //Driver.createGraphicsPipelineState(p);
  1315. c.format = hxd.BufferFormat.make(format);
  1316. c.pipeline = p;
  1317. c.inputLayout = inputLayout;
  1318. c.inputCount = inputs.length;
  1319. for( i in 0...inputs.length )
  1320. inputLayout[i].alignedByteOffset = 1; // will trigger error if not set in makePipeline()
  1321. return c;
  1322. }
  1323. function disposeResource( r : ResourceData ) {
  1324. frame.toRelease.push(r.res);
  1325. r.res = null;
  1326. r.state = r.targetState = PRESENT;
  1327. }
  1328. // ----- BUFFERS
  1329. function allocGPU( size : Int, heapType, state, uav=false ) {
  1330. var desc = new ResourceDesc();
  1331. var flags = new haxe.EnumFlags();
  1332. desc.dimension = BUFFER;
  1333. desc.width = size;
  1334. desc.height = 1;
  1335. desc.depthOrArraySize = 1;
  1336. desc.mipLevels = 1;
  1337. desc.sampleDesc.count = 1;
  1338. desc.layout = ROW_MAJOR;
  1339. if( uav ) desc.flags.set(ALLOW_UNORDERED_ACCESS);
  1340. tmp.heap.type = heapType;
  1341. return Driver.createCommittedResource(tmp.heap, flags, desc, state, null);
  1342. }
  1343. override function allocBuffer( m : h3d.Buffer ) : GPUBuffer {
  1344. var buf = new VertexBufferData();
  1345. var size = m.getMemSize();
  1346. var bufSize = m.flags.has(UniformBuffer) || m.flags.has(ReadWriteBuffer) ? calcCBVSize(size) : size;
  1347. buf.state = buf.targetState = COPY_DEST;
  1348. buf.res = allocGPU(bufSize, DEFAULT, COMMON, m.flags.has(ReadWriteBuffer));
  1349. if( m.flags.has(UniformBuffer) ) {
  1350. // no view
  1351. } else if( m.flags.has(IndexBuffer) ) {
  1352. var view = new IndexBufferView();
  1353. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1354. view.format = m.format.strideBytes == 4 ? R32_UINT : R16_UINT;
  1355. view.sizeInBytes = size;
  1356. buf.iview = view;
  1357. } else {
  1358. var view = new VertexBufferView();
  1359. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1360. view.sizeInBytes = size;
  1361. view.strideInBytes = m.format.strideBytes;
  1362. buf.view = view;
  1363. }
  1364. buf.size = bufSize;
  1365. return buf;
  1366. }
  1367. override function allocInstanceBuffer(b:InstanceBuffer, bytes:haxe.io.Bytes) {
  1368. var dataSize = b.commandCount * 5 * 4;
  1369. var buf = new VertexBufferData();
  1370. buf.state = buf.targetState = COPY_DEST;
  1371. buf.res = allocGPU(dataSize, DEFAULT, COMMON);
  1372. var alloc = allocDynamicBuffer(bytes, dataSize);
  1373. frame.commandList.copyBufferRegion(buf.res, 0, alloc.resource, alloc.offset, dataSize);
  1374. b.data = buf;
  1375. }
  1376. override function uploadInstanceBufferBytes(b : InstanceBuffer, startVertex : Int, vertexCount : Int, buf : haxe.io.Bytes, bufPos : Int ) {
  1377. var strideBytes = 5 * 4;
  1378. updateBuffer(b.data, @:privateAccess buf.b.offset(bufPos), startVertex * strideBytes, vertexCount * strideBytes);
  1379. }
  1380. override function disposeBuffer(v:Buffer) {
  1381. disposeResource(v.vbuf);
  1382. }
  1383. override function disposeInstanceBuffer(b:InstanceBuffer) {
  1384. frame.toRelease.push((b.data.res:GpuResource));
  1385. // disposeResource(b.data);
  1386. b.data = null;
  1387. }
  1388. function updateBuffer( b : ResourceData, bytes : hl.Bytes, startByte : Int, bytesCount : Int ) {
  1389. var alloc = allocDynamicBuffer(bytes, bytesCount);
  1390. transition(b, COPY_DEST);
  1391. flushTransitions();
  1392. frame.commandList.copyBufferRegion(b.res, startByte, alloc.resource, alloc.offset, bytesCount);
  1393. }
  1394. override function uploadIndexData(i:Buffer, startIndice:Int, indiceCount:Int, buf:hxd.IndexBuffer, bufPos:Int) {
  1395. var bits = i.format.strideBytes >> 1;
  1396. updateBuffer(i.vbuf, hl.Bytes.getArray(buf.getNative()).offset(bufPos << bits), startIndice << bits, indiceCount << bits);
  1397. }
  1398. override function uploadBufferData(b:Buffer, startVertex:Int, vertexCount:Int, buf:hxd.FloatBuffer, bufPos:Int) {
  1399. var data = hl.Bytes.getArray(buf.getNative()).offset(bufPos<<2);
  1400. updateBuffer(b.vbuf, data, startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1401. }
  1402. override function uploadBufferBytes(b:Buffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
  1403. updateBuffer(b.vbuf, @:privateAccess buf.b.offset(bufPos), startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1404. }
  1405. override function readBufferBytes(b:Buffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
  1406. var stride = b.format.strideBytes;
  1407. var totalSize = vertexCount*stride;
  1408. var desc = new ResourceDesc();
  1409. var flags = new haxe.EnumFlags();
  1410. desc.dimension = BUFFER;
  1411. desc.width = totalSize;
  1412. desc.height = 1;
  1413. desc.depthOrArraySize = 1;
  1414. desc.mipLevels = 1;
  1415. desc.sampleDesc.count = 1;
  1416. desc.layout = ROW_MAJOR;
  1417. tmp.heap.type = READBACK;
  1418. var tmpBuf = Driver.createCommittedResource(tmp.heap, flags, desc, COPY_DEST, null);
  1419. transition(b.vbuf, COPY_SOURCE);
  1420. flushTransitions();
  1421. frame.commandList.copyBufferRegion(tmpBuf, 0, b.vbuf.res, startVertex*stride, totalSize);
  1422. flushFrame();
  1423. waitGpu();
  1424. var output = tmpBuf.map(0, null);
  1425. @:privateAccess buf.b.blit(bufPos, output, 0, totalSize);
  1426. tmpBuf.release();
  1427. beginFrame();
  1428. }
  1429. // ------------ TEXTURES -------
  1430. function getTextureFormat( t : h3d.mat.Texture ) : DxgiFormat {
  1431. return switch( t.format ) {
  1432. case RGBA: R8G8B8A8_UNORM;
  1433. case RGBA16F: R16G16B16A16_FLOAT;
  1434. case RGBA32F: R32G32B32A32_FLOAT;
  1435. case R32F: R32_FLOAT;
  1436. case R16F: R16_FLOAT;
  1437. case R8: R8_UNORM;
  1438. case RG8: R8G8_UNORM;
  1439. case RG16F: R16G16_FLOAT;
  1440. case RG32F: R32G32_FLOAT;
  1441. case RGB32F: R32G32B32_FLOAT;
  1442. case RGB10A2: R10G10B10A2_UNORM;
  1443. case RG11B10UF: R11G11B10_FLOAT;
  1444. case SRGB_ALPHA: R8G8B8A8_UNORM_SRGB;
  1445. case R16U: R16_UNORM;
  1446. case RG16U: R16G16_UNORM;
  1447. case RGBA16U: R16G16B16A16_UNORM;
  1448. case S3TC(n):
  1449. switch( n ) {
  1450. case 1: BC1_UNORM;
  1451. case 2: BC2_UNORM;
  1452. case 3: BC3_UNORM;
  1453. case 4: BC4_UNORM;
  1454. case 5: BC5_UNORM;
  1455. case 6: BC6H_UF16;
  1456. case 7: BC7_UNORM;
  1457. default: throw "assert";
  1458. }
  1459. default: throw "Unsupported texture format " + t.format;
  1460. }
  1461. }
  1462. function makeTextureDesc(t:h3d.mat.Texture) {
  1463. var desc = new ResourceDesc();
  1464. desc.dimension = t.flags.has(Is3D) ? TEXTURE3D : TEXTURE2D;
  1465. desc.width = t.width;
  1466. desc.height = t.height;
  1467. desc.depthOrArraySize = t.layerCount;
  1468. desc.mipLevels = t.mipLevels;
  1469. desc.sampleDesc.count = 1;
  1470. desc.format = getTextureFormat(t);
  1471. return desc;
  1472. }
  1473. override function allocTexture(t:h3d.mat.Texture):Texture {
  1474. if( t.format.match(S3TC(_)) && (t.width & 3 != 0 || t.height & 3 != 0) )
  1475. throw t+" is compressed "+t.width+"x"+t.height+" but should be a 4x4 multiple";
  1476. var isRT = t.flags.has(Target);
  1477. var flags = new haxe.EnumFlags();
  1478. var desc = makeTextureDesc(t);
  1479. var td = new TextureData();
  1480. td.format = desc.format;
  1481. tmp.heap.type = DEFAULT;
  1482. var clear = null;
  1483. if( isRT ) {
  1484. var color = t.t == null || t.t.color == null ? new h3d.Vector4(0,0,0,0) : t.t.color; // reuse prev color
  1485. desc.flags.set(ALLOW_RENDER_TARGET);
  1486. clear = tmp.clearValue;
  1487. clear.format = desc.format;
  1488. clear.color.r = color.r;
  1489. clear.color.g = color.g;
  1490. clear.color.b = color.b;
  1491. clear.color.a = color.a;
  1492. td.color = color;
  1493. }
  1494. if( t.flags.has(Writable) )
  1495. desc.flags.set(ALLOW_UNORDERED_ACCESS);
  1496. td.state = td.targetState = isRT ? RENDER_TARGET : COPY_DEST;
  1497. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, isRT ? RENDER_TARGET : COMMON, clear);
  1498. td.res.setName(t.name == null ? "Texture#"+t.id : t.name);
  1499. t.lastFrame = frameCount;
  1500. t.flags.unset(WasCleared);
  1501. return td;
  1502. }
  1503. override function allocDepthBuffer(b:h3d.mat.Texture):Texture {
  1504. var td = new TextureData();
  1505. var desc = new ResourceDesc();
  1506. var flags = new haxe.EnumFlags();
  1507. desc.dimension = TEXTURE2D;
  1508. desc.width = b.width;
  1509. desc.height = b.height;
  1510. desc.depthOrArraySize = 1;
  1511. desc.mipLevels = 1;
  1512. desc.sampleDesc.count = 1;
  1513. desc.format = toDxgiDepthFormat(b.format);
  1514. desc.flags.set(ALLOW_DEPTH_STENCIL);
  1515. #if console
  1516. desc.flags = new haxe.EnumFlags<ResourceFlag>( desc.flags.toInt() | 0x00800000 ); // FORCE_TEXTURE_COMPATIBILITY
  1517. #end
  1518. tmp.heap.type = DEFAULT;
  1519. tmp.clearValue.format = desc.format;
  1520. tmp.clearValue.depth = 1;
  1521. tmp.clearValue.stencil= 0;
  1522. td.state = td.targetState = DEPTH_WRITE;
  1523. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  1524. return td;
  1525. }
  1526. override function disposeTexture(t:h3d.mat.Texture) {
  1527. disposeResource(t.t);
  1528. t.t = null;
  1529. }
  1530. override function disposeDepthBuffer(t:h3d.mat.Texture) {
  1531. disposeResource(t.t);
  1532. t.t = null;
  1533. }
  1534. override function uploadTextureBitmap(t:h3d.mat.Texture, bmp:hxd.BitmapData, mipLevel:Int, side:Int) {
  1535. var pixels = bmp.getPixels();
  1536. uploadTexturePixels(t, pixels, mipLevel, side);
  1537. pixels.dispose();
  1538. }
  1539. override function uploadTexturePixels(t:h3d.mat.Texture, pixels:hxd.Pixels, mipLevel:Int, side:Int) {
  1540. pixels.convert(t.format);
  1541. if( mipLevel >= t.mipLevels ) throw "Mip level outside texture range : " + mipLevel + " (max = " + (t.mipLevels - 1) + ")";
  1542. var is3d = t.flags.has(Is3D);
  1543. var subRes = is3d ? mipLevel : mipLevel + side * t.mipLevels;
  1544. var tmpSize = t.t.res.getRequiredIntermediateSize(subRes, 1).low;
  1545. if ( is3d )
  1546. tmpSize = Std.int(tmpSize / t.layerCount );
  1547. #if (hldx >= version("1.15.0"))
  1548. var textureAlignment = Driver.getConstant(TEXTURE_DATA_PLACEMENT_ALIGNMENT);
  1549. #else
  1550. var textureAlignment = 512;
  1551. #end
  1552. var allocation = frame.bumpAllocator.alloc(tmpSize, textureAlignment, tmp.bumpAllocation);
  1553. transition(t.t, COPY_DEST);
  1554. flushTransitions();
  1555. var dst = tmp.dstTextureLocation;
  1556. dst.res = t.t.res;
  1557. dst.subResourceIndex = subRes;
  1558. dst.type = SUBRESOURCE_INDEX;
  1559. var dstDesc = makeTextureDesc(t);
  1560. var src = tmp.srcTextureLocation;
  1561. src.res = allocation.resource;
  1562. src.type = PLACED_FOOTPRINT;
  1563. var numRow : hl.BytesAccess<Int64> = tmp.copyableInfosBytes;
  1564. var rowSizeInBytes : hl.BytesAccess<Int64> = tmp.copyableInfosBytes.offset(8);
  1565. Driver.getCopyableFootprints(dstDesc, subRes, 1, allocation.offset, src.placedFootprint, numRow, rowSizeInBytes, null);
  1566. var rowPitch = src.placedFootprint.footprint.rowPitch;
  1567. var data = (pixels.bytes:hl.Bytes).offset(pixels.offset);
  1568. var numRow = numRow[0].low;
  1569. var rowSizeInBytes = rowSizeInBytes[0].low;
  1570. for ( i in 0...numRow)
  1571. allocation.cpuAddress.blit(rowPitch * i, data, rowSizeInBytes * i, rowSizeInBytes);
  1572. src.placedFootprint.footprint.depth = 1;
  1573. frame.commandList.copyTextureRegion(dst, 0, 0, is3d ? side : 0, src, null);
  1574. t.flags.set(WasCleared);
  1575. }
  1576. override function copyTexture(from:h3d.mat.Texture, to:h3d.mat.Texture):Bool {
  1577. if( from.t == null || from.format != to.format || from.width != to.width || from.height != to.height || from.layerCount != to.layerCount || from.mipLevels != to.mipLevels )
  1578. return false;
  1579. if( to.t == null ) {
  1580. var prev = from.lastFrame;
  1581. from.preventAutoDispose();
  1582. to.alloc();
  1583. from.lastFrame = prev;
  1584. if( from.t == null ) throw "assert";
  1585. if( to.t == null ) return false;
  1586. }
  1587. transition( from.t, COPY_SOURCE);
  1588. transition( to.t, COPY_DEST);
  1589. flushTransitions();
  1590. var dst = tmp.dstTextureLocation;
  1591. var src = tmp.srcTextureLocation;
  1592. dst.res = to.t.res;
  1593. src.res = from.t.res;
  1594. dst.type = SUBRESOURCE_INDEX;
  1595. src.type = SUBRESOURCE_INDEX;
  1596. var is3d = to.flags.has(Is3D);
  1597. var subResCount = is3d ? to.mipLevels : to.layerCount * to.mipLevels;
  1598. for ( i in 0...subResCount ) {
  1599. dst.subResourceIndex = i;
  1600. src.subResourceIndex = i;
  1601. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, null);
  1602. }
  1603. to.flags.set(WasCleared);
  1604. for( t in currentRenderTargets )
  1605. if( t == to || t == from )
  1606. transition( t.t, RENDER_TARGET );
  1607. return true;
  1608. }
  1609. // ----- PIPELINE UPDATE
  1610. override function uploadShaderBuffers(buffers:h3d.shader.Buffers, which:h3d.shader.Buffers.BufferKind) {
  1611. uploadBuffers(buffers, buffers.vertex, which, currentShader.shader.vertex, currentShader.vertexRegisters);
  1612. if( !currentShader.isCompute )
  1613. uploadBuffers(buffers, buffers.fragment, which, currentShader.shader.fragment, currentShader.fragmentRegisters);
  1614. }
  1615. function calcCBVSize( dataSize : Int ) {
  1616. // the view must be a mult of 256
  1617. var sz = dataSize & ~0xFF;
  1618. if( sz != dataSize ) sz += 0x100;
  1619. return sz;
  1620. }
  1621. function allocDynamicBuffer( data : hl.Bytes, dataSize : Int ) : BumpAllocation {
  1622. var allocation = frame.bumpAllocator.alloc(dataSize, tmp.bumpAllocation);
  1623. allocation.cpuAddress.blit(0, data, 0, dataSize);
  1624. return allocation;
  1625. }
  1626. function hasBuffersTexturesChanged ( buf : h3d.shader.Buffers.ShaderBuffers, regs : ShaderRegisters ) : Bool {
  1627. var changed = regs.lastHeapCount != heapCount;
  1628. if( !changed ) {
  1629. for( i in 0...regs.texturesTypes.length )
  1630. if( regs.lastTextures[i] != ( buf.tex[i] != null ? buf.tex[i].t : null ) || regs.lastTexturesBits[i] != ( buf.tex[i] != null ? buf.tex[i].bits : -1 ) ) {
  1631. changed = true;
  1632. break;
  1633. }
  1634. }
  1635. return changed;
  1636. }
  1637. var srvRingBuf : hl.CArray<SrvArgs>;
  1638. var srvHead : Int = 1;
  1639. var srvTail : Int = 0;
  1640. var srvThreadLaunched : Bool = false;
  1641. inline function computeSRVBufferDistance() : Int {
  1642. return (srvHead + (~(srvTail - 1 ) & 0xFF)) & 0xFF;
  1643. }
  1644. inline function processSRV() {
  1645. var index = (srvTail + 1) & 0xFF;
  1646. var args = srvRingBuf[index];
  1647. Driver.createShaderResourceView(args.res, args.resourceDesc, args.srvAddr);
  1648. Driver.createSampler(args.samplerDesc, args.samplerAddr);
  1649. srvTail = index;
  1650. }
  1651. function runThread() {
  1652. while(true) {
  1653. // Check if ring buffer is empty
  1654. if ( computeSRVBufferDistance() != 1 )
  1655. processSRV();
  1656. else
  1657. Sys.sleep(0);
  1658. }
  1659. }
  1660. inline function toDepthFormat(format : h3d.mat.Data.TextureFormat ) : DxgiFormat {
  1661. var fmt = switch (format) {
  1662. case Depth16:
  1663. R16_UNORM;
  1664. case Depth24, Depth24Stencil8:
  1665. R24_UNORM_X8_TYPELESS;
  1666. case Depth32:
  1667. R32_FLOAT;
  1668. default:
  1669. throw "Unsupported depth format "+ format;
  1670. }
  1671. return fmt;
  1672. }
  1673. function createSRV( t : h3d.mat.Texture, srvAddr : Address, samplerAddr : Address ) {
  1674. if (!srvThreadLaunched) {
  1675. srvThreadLaunched = true;
  1676. srvRingBuf = hl.CArray.alloc(SrvArgs, 256);
  1677. #if !console
  1678. var thread = sys.thread.Thread.create(runThread);
  1679. thread.setName("DX12");
  1680. #end
  1681. }
  1682. // Check if ring buffer is full
  1683. while ( computeSRVBufferDistance() == 0 ) {};
  1684. var srvArgs = srvRingBuf[srvHead];
  1685. if( t.flags.has(Cube) ) {
  1686. var desc = unsafeCastTo(srvArgs.resourceDesc, TexCubeSRV);
  1687. desc.format = t.t.format;
  1688. desc.dimension = TEXTURECUBE;
  1689. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1690. desc.mostDetailedMip = t.startingMip;
  1691. desc.mipLevels = -1;
  1692. desc.resourceMinLODClamp = 0;
  1693. } else if( t.flags.has(IsArray) ) {
  1694. var desc = unsafeCastTo(srvArgs.resourceDesc, Tex2DArraySRV);
  1695. desc.format = t.t.format;
  1696. desc.dimension = TEXTURE2DARRAY;
  1697. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1698. desc.mostDetailedMip = t.startingMip;
  1699. desc.mipLevels = -1;
  1700. desc.firstArraySlice = 0;
  1701. desc.arraySize = t.layerCount;
  1702. desc.planeSlice = 0;
  1703. desc.resourceMinLODClamp = 0;
  1704. } else if ( t.flags.has(Is3D) ) {
  1705. var desc = unsafeCastTo(srvArgs.resourceDesc, Tex3DSRV);
  1706. desc.format = t.t.format;
  1707. desc.dimension = TEXTURE3D;
  1708. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1709. desc.mostDetailedMip = t.startingMip;
  1710. desc.mipLevels = -1;
  1711. desc.resourceMinLODClamp = 0;
  1712. } else {
  1713. var desc = srvArgs.resourceDesc;
  1714. desc.format = t.isDepth() ? toDepthFormat(t.format) : t.t.format;
  1715. desc.dimension = TEXTURE2D;
  1716. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1717. desc.mostDetailedMip = t.startingMip;
  1718. desc.mipLevels = -1;
  1719. desc.planeSlice = 0;
  1720. desc.resourceMinLODClamp = 0;
  1721. }
  1722. var desc = srvArgs.samplerDesc;
  1723. desc.comparisonFunc = NEVER;
  1724. desc.maxLod = 1e30;
  1725. desc.filter = switch( [t.filter, t.mipMap] ) {
  1726. case [Nearest, None|Nearest]: MIN_MAG_MIP_POINT;
  1727. case [Nearest, Linear]: MIN_MAG_POINT_MIP_LINEAR;
  1728. case [Linear, None|Nearest]: MIN_MAG_LINEAR_MIP_POINT;
  1729. case [Linear, Linear]: MIN_MAG_MIP_LINEAR;
  1730. }
  1731. desc.addressU = desc.addressV = desc.addressW = switch( t.wrap ) {
  1732. case Clamp: CLAMP;
  1733. case Repeat: WRAP;
  1734. }
  1735. desc.mipLODBias = t.lodBias;
  1736. srvArgs.res = t.t.res;
  1737. srvArgs.srvAddr = srvAddr;
  1738. srvArgs.samplerAddr = samplerAddr;
  1739. srvHead = (srvHead + 1) & 0xFF;
  1740. #if console
  1741. processSRV();
  1742. #end
  1743. }
  1744. function uploadBuffers( buffers : h3d.shader.Buffers, buf : h3d.shader.Buffers.ShaderBuffers, which:h3d.shader.Buffers.BufferKind, shader : hxsl.RuntimeShader.RuntimeShaderData, regs : ShaderRegisters ) {
  1745. switch( which ) {
  1746. case Params:
  1747. if( shader.paramsSize > 0 ) {
  1748. var data = hl.Bytes.getArray(buf.params.toData());
  1749. var dataSize = shader.paramsSize << 4;
  1750. if( regs.params & 0x100 != 0 ) {
  1751. // update CBV
  1752. var srv = frame.shaderResourceViews.alloc(1);
  1753. var alloc = allocDynamicBuffer(data,dataSize);
  1754. var desc = tmp.cbvDesc;
  1755. desc.bufferLocation = alloc.resource.getGpuVirtualAddress() + alloc.offset;
  1756. desc.sizeInBytes = alloc.byteSize;
  1757. Driver.createConstantBufferView(desc, srv);
  1758. if( currentShader.isCompute )
  1759. frame.commandList.setComputeRootDescriptorTable(regs.params & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1760. else
  1761. frame.commandList.setGraphicsRootDescriptorTable(regs.params & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1762. } else if( currentShader.isCompute )
  1763. frame.commandList.setComputeRoot32BitConstants(regs.params, dataSize >> 2, data, 0);
  1764. else
  1765. frame.commandList.setGraphicsRoot32BitConstants(regs.params, dataSize >> 2, data, 0);
  1766. }
  1767. case Globals:
  1768. var isFragment = shader.kind == Fragment;
  1769. var bind = -1;
  1770. if( shader.globalsSize > 0 ) {
  1771. var data = hl.Bytes.getArray(buf.globals.toData());
  1772. var dataSize = shader.globalsSize << 4;
  1773. if( regs.globals & 0x100 != 0 ) {
  1774. // update CBV
  1775. var srv = frame.shaderResourceViews.alloc(1);
  1776. var alloc = allocDynamicBuffer(data,dataSize);
  1777. var desc = isFragment ? tmp.fragmentGlobalDesc : tmp.vertexGlobalDesc;
  1778. desc.bufferLocation = alloc.resource.getGpuVirtualAddress() + alloc.offset;
  1779. desc.sizeInBytes = alloc.byteSize;
  1780. Driver.createConstantBufferView(desc, srv);
  1781. bind = regs.globals & 0xFF;
  1782. if( currentShader.isCompute )
  1783. frame.commandList.setComputeRootDescriptorTable(bind, frame.shaderResourceViews.toGPU(srv));
  1784. else
  1785. frame.commandList.setGraphicsRootDescriptorTable(bind, frame.shaderResourceViews.toGPU(srv));
  1786. } else if( currentShader.isCompute )
  1787. frame.commandList.setComputeRoot32BitConstants(regs.globals, dataSize >> 2, data, 0);
  1788. else
  1789. frame.commandList.setGraphicsRoot32BitConstants(regs.globals, dataSize >> 2, data, 0);
  1790. }
  1791. if ( isFragment )
  1792. lastFragmentGlobalBind = bind;
  1793. else
  1794. lastVertexGlobalBind = bind;
  1795. case Textures:
  1796. if( shader.texturesCount > 0 ) {
  1797. if ( hasBuffersTexturesChanged(buf, regs) ) {
  1798. regs.lastHeapCount = heapCount;
  1799. regs.srv = frame.shaderResourceViews.alloc(shader.texturesCount);
  1800. regs.samplersView = frame.samplerViews.alloc(regs.texturesCount);
  1801. if ( regs.lastTextures.length < shader.texturesCount ) {
  1802. regs.lastTextures.resize(shader.texturesCount);
  1803. regs.lastTexturesBits.resize(shader.texturesCount);
  1804. }
  1805. var regIndex = regs.buffers + shader.bufferCount;
  1806. var textureIndex = 0;
  1807. var uavIndex = regs.texturesCount;
  1808. for( i in 0...shader.texturesCount ) {
  1809. var t = buf.tex[i];
  1810. var pt = regs.texturesTypes[i];
  1811. if( t == null || t.isDisposed() ) {
  1812. switch ( pt ) {
  1813. case TSampler(TCube, false):
  1814. t = h3d.mat.Texture.defaultCubeTexture();
  1815. case TSampler(_, false):
  1816. var color = h3d.mat.Defaults.loadingTextureColor;
  1817. t = h3d.mat.Texture.fromColor(color, (color >>> 24) / 255);
  1818. default:
  1819. throw "Missing texture";
  1820. }
  1821. }
  1822. if( t != null && t.t == null && t.realloc != null ) {
  1823. var s = currentShader;
  1824. t.alloc();
  1825. t.realloc();
  1826. if( hasDeviceError ) return;
  1827. if( s != currentShader ) {
  1828. // realloc triggered a shader change !
  1829. // we need to reset the original shader and reupload everything
  1830. currentShader = null;
  1831. selectShader(s.shader);
  1832. uploadShaderBuffers(buffers,Globals);
  1833. uploadShaderBuffers(buffers,Params);
  1834. uploadShaderBuffers(buffers,Textures);
  1835. return;
  1836. }
  1837. }
  1838. regs.lastTextures[i] = buf.tex[i] != null ? buf.tex[i].t : null;
  1839. regs.lastTexturesBits[i] = buf.tex[i] != null ? buf.tex[i].bits : -1;
  1840. switch( pt ) {
  1841. case TRWTexture(dim,arr,chans):
  1842. var tdim : hxsl.Ast.TexDimension = t.flags.has(Cube) ? TCube : dim;
  1843. var fmt;
  1844. if( (arr != t.flags.has(IsArray)) || dim != tdim )
  1845. throw "Texture format does not match: "+t+"["+t.format+"] should be "+hxsl.Ast.Tools.toString(pt);
  1846. var srv = frame.shaderResourceViews.alloc(1);
  1847. if( !t.flags.has(Writable) )
  1848. throw "Texture was allocated without Writable flag";
  1849. transition(t.t, UNORDERED_ACCESS);
  1850. var desc = tmp.wtexDesc;
  1851. desc.format = cast getTextureFormat(t);
  1852. desc.viewDimension = switch( [dim,arr] ) {
  1853. case [T1D, false]: TEXTURE1D;
  1854. case [T2D, false]: TEXTURE2D;
  1855. case [T3D, false]: TEXTURE3D;
  1856. case [T1D, true]: TEXTURE1DARRAY;
  1857. case [T2D, true]: TEXTURE2DARRAY;
  1858. default: throw "Unsupported RWTexture "+t;
  1859. }
  1860. desc.mipSlice = 0;
  1861. desc.planeSlice = 0;
  1862. if(t.flags.has(Is3D)){
  1863. desc.wSlice = t.get_layerCount();
  1864. }
  1865. if( arr ) {
  1866. desc.firstArraySlice = 0;
  1867. desc.arraySize = 1;
  1868. }
  1869. Driver.createUnorderedAccessView(t.t.res, null, desc, regs.srv.offset(uavIndex * frame.shaderResourceViews.stride));
  1870. uavIndex++;
  1871. continue;
  1872. default:
  1873. t.lastFrame = frameCount;
  1874. var state = if ( shader.kind == Fragment )
  1875. PIXEL_SHADER_RESOURCE;
  1876. else
  1877. NON_PIXEL_SHADER_RESOURCE;
  1878. transition(t.t, state);
  1879. createSRV(t, regs.srv.offset(textureIndex * frame.shaderResourceViews.stride), regs.samplersView.offset(textureIndex * frame.samplerViews.stride));
  1880. textureIndex++;
  1881. }
  1882. }
  1883. } else {
  1884. for( i in 0...shader.texturesCount ) {
  1885. var t = buf.tex[i];
  1886. if (t == null || t.t == null)
  1887. continue;
  1888. var pt = regs.texturesTypes[i];
  1889. var state = switch( pt ) {
  1890. case TRWTexture(_,_,_):
  1891. UNORDERED_ACCESS;
  1892. default:
  1893. (shader.kind == Fragment ? PIXEL_SHADER_RESOURCE : NON_PIXEL_SHADER_RESOURCE);
  1894. }
  1895. transition(t.t, state);
  1896. }
  1897. }
  1898. if( currentShader.isCompute ) {
  1899. frame.commandList.setComputeRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
  1900. if ( regs.texturesCount > 0 )
  1901. frame.commandList.setComputeRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(regs.samplersView));
  1902. } else {
  1903. frame.commandList.setGraphicsRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
  1904. if ( regs.texturesCount > 0 )
  1905. frame.commandList.setGraphicsRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(regs.samplersView));
  1906. }
  1907. }
  1908. case Buffers:
  1909. if( shader.bufferCount > 0 ) {
  1910. var srv = frame.shaderResourceViews.alloc(shader.bufferCount);
  1911. var cbvIndex = 0;
  1912. var storageIndex = regs.cbvCount;
  1913. var uavIndex = regs.cbvCount + regs.storageCount;
  1914. for( i in 0...shader.bufferCount ) {
  1915. var b = buf.buffers[i];
  1916. var cbv = b.vbuf;
  1917. switch( regs.bufferTypes[i] ) {
  1918. case Uniform:
  1919. if( cbv.view != null )
  1920. throw "Buffer was allocated without UniformBuffer flag";
  1921. transition(cbv, VERTEX_AND_CONSTANT_BUFFER);
  1922. var desc = tmp.cbvDesc;
  1923. desc.bufferLocation = cbv.res.getGpuVirtualAddress();
  1924. desc.sizeInBytes = cbv.size;
  1925. Driver.createConstantBufferView(desc, srv.offset(cbvIndex * frame.shaderResourceViews.stride));
  1926. cbvIndex++;
  1927. case Storage:
  1928. var state = shader.kind == Fragment ? PIXEL_SHADER_RESOURCE : NON_PIXEL_SHADER_RESOURCE;
  1929. transition(cbv, state);
  1930. var desc = tmp.bufferSRV;
  1931. var stride = regs.bufferStrides[i];
  1932. desc.numElements = Std.int(cbv.size / stride);
  1933. desc.structureByteStride = stride;
  1934. desc.flags = NONE;
  1935. Driver.createShaderResourceView(cbv.res, desc, srv.offset(storageIndex * frame.shaderResourceViews.stride));
  1936. storageIndex++;
  1937. case RW:
  1938. if( !b.flags.has(ReadWriteBuffer) )
  1939. throw "Buffer was allocated without ReadWriteBuffer flag";
  1940. transition(cbv, UNORDERED_ACCESS);
  1941. var desc = tmp.uavDesc;
  1942. var stride = regs.bufferStrides[i];
  1943. desc.numElements = Std.int(cbv.size / stride);
  1944. desc.structureSizeInBytes = stride;
  1945. Driver.createUnorderedAccessView(cbv.res, null, desc, srv.offset(uavIndex * frame.shaderResourceViews.stride));
  1946. uavIndex++;
  1947. default:
  1948. throw "assert";
  1949. }
  1950. }
  1951. if( currentShader.isCompute )
  1952. frame.commandList.setComputeRootDescriptorTable(regs.buffers, frame.shaderResourceViews.toGPU(srv));
  1953. else
  1954. frame.commandList.setGraphicsRootDescriptorTable(regs.buffers, frame.shaderResourceViews.toGPU(srv));
  1955. }
  1956. }
  1957. }
  1958. override function selectShader( shader : hxsl.RuntimeShader ) {
  1959. var sh = compiledShaders.get(shader.id);
  1960. if( sh == null ) {
  1961. sh = compileShader(shader);
  1962. compiledShaders.set(shader.id, sh);
  1963. }
  1964. if( currentShader == sh )
  1965. return false;
  1966. currentShader = sh;
  1967. pipelineBuilder.setShader(shader);
  1968. if( sh.isCompute ) {
  1969. frame.commandList.setComputeRootSignature(currentShader.rootSignature);
  1970. if ( currentPipelineState != currentShader.computePipeline ) {
  1971. frame.commandList.setPipelineState(currentShader.computePipeline);
  1972. currentPipelineState = currentShader.computePipeline;
  1973. }
  1974. } else {
  1975. frame.commandList.setGraphicsRootSignature(currentShader.rootSignature);
  1976. }
  1977. return true;
  1978. }
  1979. override function selectMaterial( pass : h3d.mat.Pass ) @:privateAccess {
  1980. var depthClamp = pass.depthClamp;
  1981. pass.depthClamp = depthClamp || useDepthClamp;
  1982. pipelineBuilder.selectMaterial(pass);
  1983. pass.depthClamp = depthClamp;
  1984. var st = pass.stencil;
  1985. if( st != null && curStencilRef != st.reference ) {
  1986. curStencilRef = st.reference;
  1987. frame.commandList.omSetStencilRef(st.reference);
  1988. }
  1989. }
  1990. override function selectBuffer(buffer:Buffer) {
  1991. var views = tmp.vertexViews;
  1992. var bview = buffer.vbuf.view;
  1993. var map = buffer.format.resolveMapping(currentShader.format);
  1994. var vbuf = buffer.vbuf;
  1995. for( i in 0...currentShader.inputCount ) {
  1996. var v = views[i];
  1997. var inf = map[i];
  1998. v.bufferLocation = bview.bufferLocation;
  1999. v.sizeInBytes = bview.sizeInBytes;
  2000. v.strideInBytes = bview.strideInBytes;
  2001. pipelineBuilder.setBuffer(i, inf, v.strideInBytes);
  2002. }
  2003. transition(vbuf, VERTEX_AND_CONSTANT_BUFFER);
  2004. frame.commandList.iaSetVertexBuffers(0, currentShader.inputCount, views[0]);
  2005. }
  2006. override function selectMultiBuffers(formats:hxd.BufferFormat.MultiFormat,buffers:Array<h3d.Buffer>) {
  2007. var views = tmp.vertexViews;
  2008. var map = formats.resolveMapping(currentShader.format);
  2009. for( i in 0...map.length ) {
  2010. var v = views[i];
  2011. var inf = map[i];
  2012. var vbuf = @:privateAccess buffers[inf.bufferIndex].vbuf;
  2013. var bview = vbuf.view;
  2014. v.bufferLocation = bview.bufferLocation;
  2015. v.sizeInBytes = bview.sizeInBytes;
  2016. v.strideInBytes = bview.strideInBytes;
  2017. transition(vbuf, VERTEX_AND_CONSTANT_BUFFER );
  2018. pipelineBuilder.setBuffer(i, inf, v.strideInBytes);
  2019. }
  2020. frame.commandList.iaSetVertexBuffers(0, map.length, views[0]);
  2021. }
  2022. static var CULL : Array<CullMode> = [NONE,BACK,FRONT,NONE];
  2023. static var BLEND_OP : Array<BlendOp> = [ADD,SUBTRACT,REV_SUBTRACT,MIN,MAX];
  2024. static var COMP : Array<ComparisonFunc> = [ALWAYS, NEVER, EQUAL, NOT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL];
  2025. static var BLEND : Array<Blend> = [
  2026. ONE,ZERO,SRC_ALPHA,SRC_COLOR,DEST_ALPHA,DEST_COLOR,INV_SRC_ALPHA,INV_SRC_COLOR,INV_DEST_ALPHA,INV_DEST_COLOR,
  2027. SRC1_COLOR,SRC1_ALPHA,INV_SRC1_COLOR,INV_SRC1_ALPHA,SRC_ALPHA_SAT
  2028. ];
  2029. static var BLEND_ALPHA : Array<Blend> = [
  2030. ONE,ZERO,SRC_ALPHA,SRC_ALPHA,DEST_ALPHA,DEST_ALPHA,INV_SRC_ALPHA,INV_SRC_ALPHA,INV_DEST_ALPHA,INV_DEST_ALPHA,
  2031. SRC1_ALPHA,SRC1_ALPHA,INV_SRC1_ALPHA,INV_SRC1_ALPHA,SRC_ALPHA_SAT,
  2032. ];
  2033. static var STENCIL_OP : Array<StencilOp> = [KEEP, ZERO, REPLACE, INCR_SAT, INCR, DECR_SAT, DECR, INVERT];
  2034. function makePipeline( shader : CompiledShader ) {
  2035. var p = shader.pipeline;
  2036. var pass = pipelineBuilder.getCurrentPass();
  2037. var depth = pipelineBuilder.getDepthProps();
  2038. if( pass.wireframe ) pass.culling = None;
  2039. var rtCount = currentRenderTargets.length;
  2040. if( rtCount == 0 ) rtCount = 1;
  2041. p.numRenderTargets = rtCount;
  2042. p.rasterizerState.cullMode = CULL[pass.culling.getIndex()];
  2043. p.rasterizerState.fillMode = pass.wireframe ? WIREFRAME : SOLID;
  2044. p.depthStencilDesc.depthEnable = pass.depthTest != Always;
  2045. p.depthStencilDesc.depthWriteMask = !pass.depthWrite || !depthEnabled ? ZERO : ALL;
  2046. p.depthStencilDesc.depthFunc = COMP[pass.depthTest.getIndex()];
  2047. p.rasterizerState.depthClipEnable = !pass.depthClamp;
  2048. p.rasterizerState.depthBias = Std.int(depth.bias);
  2049. p.rasterizerState.slopeScaledDepthBias = depth.slopeScaledBias;
  2050. var bl = p.blendState;
  2051. for( i in 0...rtCount ) {
  2052. var t = bl.renderTargets[i];
  2053. t.blendEnable = pass.blendSrc != One || pass.blendDst != Zero;
  2054. t.srcBlend = BLEND[pass.blendSrc.getIndex()];
  2055. t.dstBlend = BLEND[pass.blendDst.getIndex()];
  2056. t.srcBlendAlpha = BLEND_ALPHA[pass.blendAlphaSrc.getIndex()];
  2057. t.dstBlendAlpha = BLEND_ALPHA[pass.blendAlphaDst.getIndex()];
  2058. t.blendOp = BLEND_OP[pass.blendOp.getIndex()];
  2059. t.blendOpAlpha = BLEND_OP[pass.blendAlphaOp.getIndex()];
  2060. t.renderTargetWriteMask = pass.colorMask;
  2061. var t = currentRenderTargets[i];
  2062. p.rtvFormats[i] = t == null ? R8G8B8A8_UNORM : t.t.format;
  2063. }
  2064. p.dsvFormat = toDxgiDepthFormat(depth.format);
  2065. for ( i in rtCount...8 )
  2066. p.rtvFormats[i] = DxgiFormat.UNKNOWN;
  2067. for( i in 0...shader.inputCount ) {
  2068. var d = shader.inputLayout[i];
  2069. var inf = pipelineBuilder.getBufferInput(i);
  2070. d.alignedByteOffset = inf.offset;
  2071. d.format = @:privateAccess switch( [shader.format.inputs[i].type, inf.precision] ) {
  2072. case [DFloat, F32]: R32_FLOAT;
  2073. case [DFloat, F16]: R16_FLOAT;
  2074. case [DFloat, S8]: R8_SNORM;
  2075. case [DFloat, U8]: R8_UNORM;
  2076. case [DVec2, F32]: R32G32_FLOAT;
  2077. case [DVec2, F16]: R16G16_FLOAT;
  2078. case [DVec2, S8]: R8G8_SNORM;
  2079. case [DVec2, U8]: R8G8_UNORM;
  2080. case [DVec3, F32]: R32G32B32_FLOAT;
  2081. case [DVec3, F16]: R16G16B16A16_FLOAT; // padding
  2082. case [DVec3, S8]: R8G8B8A8_SNORM; // padding
  2083. case [DVec3, U8]: R8G8B8A8_UNORM; // padding
  2084. case [DVec4, F32]: R32G32B32A32_FLOAT;
  2085. case [DVec4, F16]: R16G16B16A16_FLOAT;
  2086. case [DVec4, S8]: R8G8B8A8_SNORM;
  2087. case [DVec4, U8]: R8G8B8A8_UNORM;
  2088. case [DBytes4, _]: R8G8B8A8_UINT;
  2089. default: throw "assert";
  2090. };
  2091. }
  2092. var stencil = pass.stencil;
  2093. var st = p.depthStencilDesc;
  2094. st.stencilEnable = stencil != null;
  2095. if( stencil != null ) {
  2096. var front = st.frontFace;
  2097. var back = st.backFace;
  2098. st.stencilReadMask = stencil.readMask;
  2099. st.stencilWriteMask = stencil.writeMask;
  2100. front.stencilFunc = COMP[stencil.frontTest.getIndex()];
  2101. front.stencilPassOp = STENCIL_OP[stencil.frontPass.getIndex()];
  2102. front.stencilFailOp = STENCIL_OP[stencil.frontSTfail.getIndex()];
  2103. front.stencilDepthFailOp = STENCIL_OP[stencil.frontDPfail.getIndex()];
  2104. back.stencilFunc = COMP[stencil.backTest.getIndex()];
  2105. back.stencilPassOp = STENCIL_OP[stencil.backPass.getIndex()];
  2106. back.stencilFailOp = STENCIL_OP[stencil.backSTfail.getIndex()];
  2107. back.stencilDepthFailOp = STENCIL_OP[stencil.backDPfail.getIndex()];
  2108. }
  2109. return Driver.createGraphicsPipelineState(p);
  2110. }
  2111. function flushPipeline() {
  2112. if( !pipelineBuilder.needFlush ) return;
  2113. var cache = pipelineBuilder.lookup(currentShader.pipelines, currentShader.inputCount);
  2114. if( cache.pipeline == null )
  2115. cache.pipeline = makePipeline(currentShader);
  2116. if ( currentPipelineState != cache.pipeline ) {
  2117. frame.commandList.setPipelineState(cache.pipeline);
  2118. currentPipelineState = cache.pipeline;
  2119. }
  2120. }
  2121. // QUERIES
  2122. static inline var QUERY_COUNT = 128;
  2123. override function allocQuery( queryKind : QueryKind ) : Query {
  2124. if( queryKind != TimeStamp )
  2125. throw "Not implemented";
  2126. return new Query();
  2127. }
  2128. override function deleteQuery( q : Query ) {
  2129. // nothing to do
  2130. }
  2131. override function beginQuery( q : Query ) {
  2132. // nothing
  2133. }
  2134. override function endQuery( q : Query ) {
  2135. var heap = frame.queryHeaps[frame.queryCurrentHeap];
  2136. if( heap == null ) {
  2137. var desc = new QueryHeapDesc();
  2138. desc.type = TIMESTAMP;
  2139. desc.count = QUERY_COUNT;
  2140. heap = Driver.createQueryHeap(desc);
  2141. frame.queryHeaps[frame.queryCurrentHeap] = heap;
  2142. if( frame.queryBuffer != null ) {
  2143. frame.queryBuffer.release();
  2144. frame.queryBuffer = null;
  2145. }
  2146. }
  2147. q.offset = frame.queryHeapOffset++;
  2148. q.heap = frame.queryCurrentHeap;
  2149. frame.commandList.endQuery(heap, TIMESTAMP, q.offset);
  2150. frame.queriesPending.push(q);
  2151. if( frame.queryHeapOffset == QUERY_COUNT ) {
  2152. frame.queryHeapOffset = 0;
  2153. frame.queryCurrentHeap++;
  2154. }
  2155. }
  2156. override function queryResultAvailable( q : Query ) {
  2157. return q.heap < 0;
  2158. }
  2159. override function queryResult( q : Query ) {
  2160. return q.result;
  2161. }
  2162. function beginQueries() {
  2163. if( frame.queryBuffer == null || frame.queriesPending.length == 0 )
  2164. return;
  2165. var ptr : hl.BytesAccess<Int64> = frame.queryBuffer.map(0, null);
  2166. while( true ) {
  2167. var q = frame.queriesPending.pop();
  2168. if( q == null ) break;
  2169. if( q.heap >= 0 ) {
  2170. var position = q.heap * QUERY_COUNT + q.offset;
  2171. var v = ptr[position];
  2172. q.result = ((v / tsFreq).low + (v % tsFreq).low / tsFreq.low) * 1e9;
  2173. q.heap = -1;
  2174. }
  2175. }
  2176. frame.queryBuffer.unmap(0, null);
  2177. }
  2178. function flushQueries() {
  2179. if( frame.queryHeapOffset > 0 )
  2180. frame.queryCurrentHeap++;
  2181. if( frame.queryCurrentHeap == 0 )
  2182. return;
  2183. if( frame.queryBuffer == null )
  2184. frame.queryBuffer = allocGPU(frame.queryHeaps.length * QUERY_COUNT * 8, READBACK, COPY_DEST);
  2185. var position = 0;
  2186. for( i in 0...frame.queryCurrentHeap ) {
  2187. var count = i < frame.queryCurrentHeap - 1 ? QUERY_COUNT : frame.queryHeapOffset;
  2188. frame.commandList.resolveQueryData(frame.queryHeaps[i], TIMESTAMP, 0, count, frame.queryBuffer, position);
  2189. position += count * 8;
  2190. }
  2191. frame.queryCurrentHeap = 0;
  2192. frame.queryHeapOffset = 0;
  2193. }
  2194. // --- DRAW etc.
  2195. override function draw( ibuf : Buffer, startIndex : Int, ntriangles : Int ) {
  2196. flushPipeline();
  2197. if( currentIndex != ibuf ) {
  2198. currentIndex = ibuf;
  2199. transition(ibuf.vbuf, INDEX_BUFFER);
  2200. frame.commandList.iaSetIndexBuffer(ibuf.vbuf.iview);
  2201. }
  2202. flushTransitions();
  2203. frame.commandList.drawIndexedInstanced(ntriangles * 3,1,startIndex,0,0);
  2204. }
  2205. override function drawInstanced(ibuf:Buffer, commands:InstanceBuffer) {
  2206. flushPipeline();
  2207. if( currentIndex != ibuf ) {
  2208. currentIndex = ibuf;
  2209. transition(ibuf.vbuf, INDEX_BUFFER);
  2210. frame.commandList.iaSetIndexBuffer(ibuf.vbuf.iview);
  2211. }
  2212. if( commands.data != null ) {
  2213. transition(commands.data, INDIRECT_ARGUMENT);
  2214. if ( commands.countBuffer != null )
  2215. transition(commands.countBuffer, INDIRECT_ARGUMENT);
  2216. flushTransitions();
  2217. frame.commandList.executeIndirect(indirectCommand, commands.commandCount, commands.data.res, commands.offset * InstanceBuffer.ELEMENT_SIZE, commands.countBuffer != null ? commands.countBuffer.res : null, commands.countOffset * 4);
  2218. } else {
  2219. flushTransitions();
  2220. frame.commandList.drawIndexedInstanced(commands.indexCount, commands.commandCount, commands.startIndex, 0, 0);
  2221. }
  2222. }
  2223. override function flushShaderBuffers() {
  2224. if( frame.shaderResourceViews.available < 128 || frame.samplerViews.available < 64 ) {
  2225. frame.shaderResourceViews = frame.shaderResourceCache.next();
  2226. frame.samplerViews = frame.samplerCache.next();
  2227. heapCount++;
  2228. var arr = tmp.descriptors2;
  2229. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  2230. arr[1] = @:privateAccess frame.samplerViews.heap;
  2231. frame.commandList.setDescriptorHeaps(arr);
  2232. inline function rebindGlobal(bindSlot, desc) {
  2233. if ( bindSlot >= 0 ) {
  2234. var srv = frame.shaderResourceViews.alloc(1);
  2235. Driver.createConstantBufferView(desc, srv);
  2236. if( currentShader.isCompute )
  2237. frame.commandList.setComputeRootDescriptorTable(bindSlot, frame.shaderResourceViews.toGPU(srv));
  2238. else
  2239. frame.commandList.setGraphicsRootDescriptorTable(bindSlot, frame.shaderResourceViews.toGPU(srv));
  2240. }
  2241. }
  2242. rebindGlobal(lastVertexGlobalBind, tmp.vertexGlobalDesc);
  2243. rebindGlobal(lastFragmentGlobalBind, tmp.fragmentGlobalDesc);
  2244. }
  2245. }
  2246. function flushSRV() {
  2247. while ( computeSRVBufferDistance() != 1 ) {};
  2248. }
  2249. function flushFrame( onResize : Bool = false ) {
  2250. flushQueries();
  2251. frame.commandList.close();
  2252. flushSRV();
  2253. frame.commandList.execute();
  2254. currentPipelineState = null;
  2255. currentShader = null;
  2256. Driver.flushMessages();
  2257. frame.fenceValue = fenceValue++;
  2258. Driver.signal(fence, frame.fenceValue);
  2259. }
  2260. override function present() {
  2261. transition(frame.backBuffer, PRESENT);
  2262. flushTransitions();
  2263. flushFrame();
  2264. Driver.present(window.vsync);
  2265. waitForFrame(Driver.getCurrentBackBufferIndex());
  2266. beginFrame();
  2267. if( hasDeviceError ) {
  2268. Sys.println("----------- OnContextLost ----------");
  2269. hasDeviceError = false;
  2270. dispose();
  2271. reset();
  2272. onContextLost();
  2273. }
  2274. }
  2275. function waitForFrame( index : Int ) {
  2276. var frame = frames[index];
  2277. if( fence.getValue() < frame.fenceValue ) {
  2278. fence.setEvent(frame.fenceValue, fenceEvent);
  2279. fenceEvent.wait(-1);
  2280. }
  2281. }
  2282. override function computeDispatch( x : Int = 1, y : Int = 1, z : Int = 1, barrier : Bool = true ) {
  2283. flushTransitions();
  2284. frame.commandList.dispatch(x,y,z);
  2285. if( barrier )
  2286. memoryBarrier();
  2287. }
  2288. override function memoryBarrier() {
  2289. var barrier = tmp.barrier;
  2290. barrier.resource = null;
  2291. @:privateAccess barrier.type = UAV;
  2292. frame.commandList.resourceBarrier(barrier);
  2293. }
  2294. }
  2295. #end