DX12Driver.hx 61 KB


  1. package h3d.impl;
  2. #if (hldx && dx12)
  3. import h3d.impl.Driver;
  4. import dx.Dx12;
  5. import haxe.Int64;
  6. import h3d.mat.Pass;
  7. import h3d.mat.Stencil;
  8. private typedef Driver = Dx12;
  9. class TempBuffer {
  10. public var next : TempBuffer;
  11. public var buffer : GpuResource;
  12. public var size : Int;
  13. public var lastUse : Int;
  14. public function new() {
  15. }
  16. public inline function count() {
  17. var b = this;
  18. var k = 0;
  19. while( b != null ) {
  20. k++;
  21. b = b.next;
  22. }
  23. return k;
  24. }
  25. }
  26. class ManagedHeapArray {
  27. var heaps : Array<ManagedHeap>;
  28. var type : DescriptorHeapType;
  29. var size : Int;
  30. var cursor : Int;
  31. public function new(type,size) {
  32. this.type = type;
  33. this.size = size;
  34. heaps = [];
  35. }
  36. public function reset() {
  37. cursor = 0;
  38. }
  39. public function next() {
  40. var h = heaps[cursor++];
  41. if( h == null ) {
  42. h = new ManagedHeap(type, size);
  43. heaps.push(h);
  44. } else
  45. h.clear();
  46. return h;
  47. }
  48. }
  49. class DxFrame {
  50. public var backBuffer : ResourceData;
  51. public var backBufferView : Address;
  52. public var depthBuffer : GpuResource;
  53. public var allocator : CommandAllocator;
  54. public var commandList : CommandList;
  55. public var fenceValue : Int64;
  56. public var toRelease : Array<Resource> = [];
  57. public var tmpBufToNullify : Array<Texture> = [];
  58. public var tmpBufToRelease : Array<dx.Dx12.GpuResource> = [];
  59. public var shaderResourceViews : ManagedHeap;
  60. public var samplerViews : ManagedHeap;
  61. public var shaderResourceCache : ManagedHeapArray;
  62. public var samplerCache : ManagedHeapArray;
  63. public var availableBuffers : TempBuffer;
  64. public var usedBuffers : TempBuffer;
  65. public var queryHeaps : Array<QueryHeap> = [];
  66. public var queriesPending : Array<Query> = [];
  67. public var queryCurrentHeap : Int;
  68. public var queryHeapOffset : Int;
  69. public var queryBuffer : GpuResource;
  70. public function new() {
  71. }
  72. }
  73. class CachedPipeline {
  74. public var bytes : hl.Bytes;
  75. public var size : Int;
  76. public var pipeline : GraphicsPipelineState;
  77. public function new() {
  78. }
  79. }
  80. class ShaderRegisters {
  81. public var globals : Int;
  82. public var params : Int;
  83. public var buffers : Int;
  84. public var textures : Int;
  85. public var samplers : Int;
  86. public var texturesCount : Int;
  87. public var textures2DCount : Int;
  88. public function new() {
  89. }
  90. }
  91. class CompiledShader {
  92. public var vertexRegisters : ShaderRegisters;
  93. public var fragmentRegisters : ShaderRegisters;
  94. public var format : hxd.BufferFormat;
  95. public var pipeline : GraphicsPipelineStateDesc;
  96. public var pipelines : Map<Int,hl.NativeArray<CachedPipeline>> = new Map();
  97. public var rootSignature : RootSignature;
  98. public var inputLayout : hl.CArray<InputElementDesc>;
  99. public var inputCount : Int;
  100. public var shader : hxsl.RuntimeShader;
  101. public function new() {
  102. }
  103. }
  104. @:struct class TempObjects {
  105. public var renderTargets : hl.BytesAccess<Address>;
  106. public var depthStencils : hl.BytesAccess<Address>;
  107. public var vertexViews : hl.CArray<VertexBufferView>;
  108. public var descriptors2 : hl.NativeArray<DescriptorHeap>;
  109. @:packed public var heap(default,null) : HeapProperties;
  110. @:packed public var barrier(default,null) : ResourceBarrier;
  111. @:packed public var clearColor(default,null) : ClearColor;
  112. @:packed public var clearValue(default,null) : ClearValue;
  113. @:packed public var viewport(default,null) : Viewport;
  114. @:packed public var rect(default,null) : Rect;
  115. @:packed public var tex2DSRV(default,null) : Tex2DSRV;
  116. @:packed public var texCubeSRV(default,null) : TexCubeSRV;
  117. @:packed public var tex2DArraySRV(default,null) : Tex2DArraySRV;
  118. @:packed public var bufferSRV(default,null) : BufferSRV;
  119. @:packed public var samplerDesc(default,null) : SamplerDesc;
  120. @:packed public var cbvDesc(default,null) : ConstantBufferViewDesc;
  121. @:packed public var rtvDesc(default,null) : RenderTargetViewDesc;
  122. public var pass : h3d.mat.Pass;
  123. public function new() {
  124. renderTargets = new hl.Bytes(8 * 8);
  125. depthStencils = new hl.Bytes(8);
  126. vertexViews = hl.CArray.alloc(VertexBufferView, 16);
  127. pass = new h3d.mat.Pass("default");
  128. pass.stencil = new h3d.mat.Stencil();
  129. tex2DSRV.dimension = TEXTURE2D;
  130. texCubeSRV.dimension = TEXTURECUBE;
  131. tex2DArraySRV.dimension = TEXTURE2DARRAY;
  132. tex2DSRV.mipLevels = texCubeSRV.mipLevels = tex2DArraySRV.mipLevels = -1;
  133. tex2DSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  134. texCubeSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  135. tex2DArraySRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  136. bufferSRV.dimension = BUFFER;
  137. bufferSRV.flags = RAW;
  138. bufferSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  139. samplerDesc.comparisonFunc = NEVER;
  140. samplerDesc.maxLod = 1e30;
  141. descriptors2 = new hl.NativeArray(2);
  142. barrier.subResource = -1; // all
  143. }
  144. }
  145. class ManagedHeap {
  146. public var stride(default,null) : Int;
  147. var size : Int;
  148. var start : Int;
  149. var cursor : Int;
  150. var limit : Int;
  151. var type : DescriptorHeapType;
  152. var heap : DescriptorHeap;
  153. var address : Address;
  154. var cpuToGpu : Int64;
  155. public var available(get,never) : Int;
  156. public function new(type,size=8) {
  157. this.type = type;
  158. this.stride = Driver.getDescriptorHandleIncrementSize(type);
  159. allocHeap(size);
  160. }
  161. function allocHeap( size : Int ) {
  162. var desc = new DescriptorHeapDesc();
  163. desc.type = type;
  164. desc.numDescriptors = size;
  165. if( type == CBV_SRV_UAV || type == SAMPLER )
  166. desc.flags = SHADER_VISIBLE;
  167. heap = new DescriptorHeap(desc);
  168. limit = cursor = start = 0;
  169. this.size = size;
  170. address = heap.getHandle(false);
  171. cpuToGpu = desc.flags == SHADER_VISIBLE ? ( heap.getHandle(true).value - address.value ) : 0;
  172. }
  173. public dynamic function onFree( prev : DescriptorHeap ) {
  174. throw "Too many buffers";
  175. }
  176. public function alloc( count : Int ) {
  177. if( cursor >= limit && cursor + count > size ) {
  178. cursor = 0;
  179. if( limit == 0 ) {
  180. var prev = heap;
  181. allocHeap((size * 3) >> 1);
  182. onFree(prev);
  183. }
  184. }
  185. if( cursor < limit && cursor + count >= limit ) {
  186. var prev = heap;
  187. allocHeap((size * 3) >> 1);
  188. onFree(prev);
  189. }
  190. var pos = cursor;
  191. cursor += count;
  192. return address.offset(pos * stride);
  193. }
  194. inline function get_available() {
  195. var d = limit - cursor;
  196. return d <= 0 ? size + d : d;
  197. }
  198. public inline function grow( onFree ) {
  199. var prev = heap;
  200. allocHeap((size*3)>>1);
  201. onFree(prev);
  202. return heap;
  203. }
  204. public function clear() {
  205. limit = cursor = start = 0;
  206. }
  207. public function next() {
  208. limit = start;
  209. start = cursor;
  210. }
  211. public inline function toGPU( address : Address ) : Address {
  212. return new Address(address.value + cpuToGpu);
  213. }
  214. }
  215. class ResourceData {
  216. public var res : GpuResource;
  217. public var state : ResourceState;
  218. public function new() {
  219. }
  220. }
  221. class BufferData extends ResourceData {
  222. public var uploaded : Bool;
  223. }
  224. class VertexBufferData extends BufferData {
  225. public var view : dx.Dx12.VertexBufferView;
  226. public var iview : dx.Dx12.IndexBufferView;
  227. public var size : Int;
  228. }
  229. class TextureData extends ResourceData {
  230. public var format : DxgiFormat;
  231. public var color : h3d.Vector4;
  232. public var tmpBuf : dx.Dx12.GpuResource;
  233. var clearColorChanges : Int;
  234. public function setClearColor( c : h3d.Vector4 ) {
  235. var color = color;
  236. if( clearColorChanges > 10 || (color.r == c.r && color.g == c.g && color.b == c.b && color.a == c.a) )
  237. return false;
  238. clearColorChanges++;
  239. color.load(c);
  240. return true;
  241. }
  242. }
  243. class QueryData {
  244. public var heap : Int;
  245. public var offset : Int;
  246. public var result : Float;
  247. public function new() {
  248. }
  249. }
  250. class DX12Driver extends h3d.impl.Driver {
  251. static inline var PSIGN_MATID = 0;
  252. static inline var PSIGN_COLOR_MASK = PSIGN_MATID + 4;
  253. static inline var PSIGN_UNUSED = PSIGN_COLOR_MASK + 1;
  254. static inline var PSIGN_STENCIL_MASK = PSIGN_UNUSED + 1;
  255. static inline var PSIGN_STENCIL_OPS = PSIGN_STENCIL_MASK + 2;
  256. static inline var PSIGN_RENDER_TARGETS = PSIGN_STENCIL_OPS + 4;
  257. static inline var PSIGN_LAYOUT = PSIGN_RENDER_TARGETS + 8;
  258. var pipelineSignature = new hl.Bytes(64);
  259. var adlerOut = new hl.Bytes(4);
  260. var driver : DriverInstance;
  261. var hasDeviceError = false;
  262. var window : dx.Window;
  263. var onContextLost : Void -> Void;
  264. var frames : Array<DxFrame>;
  265. var frame : DxFrame;
  266. var fence : Fence;
  267. var fenceEvent : WaitEvent;
  268. var renderTargetViews : ManagedHeap;
  269. var depthStenciViews : ManagedHeap;
  270. var indirectCommand : CommandSignature;
  271. var currentFrame : Int;
  272. var fenceValue : Int64 = 0;
  273. var needPipelineFlush = false;
  274. var currentPass : h3d.mat.Pass;
  275. var currentWidth : Int;
  276. var currentHeight : Int;
  277. var currentShader : CompiledShader;
  278. var compiledShaders : Map<Int,CompiledShader> = new Map();
  279. var compiler : ShaderCompiler;
  280. var currentIndex : Buffer;
  281. var tmp : TempObjects;
  282. var currentRenderTargets : Array<h3d.mat.Texture> = [];
  283. var defaultDepth : h3d.mat.Texture;
  284. var depthEnabled = true;
  285. var curStencilRef : Int = -1;
  286. var rtWidth : Int;
  287. var rtHeight : Int;
  288. var frameCount : Int;
  289. var tsFreq : haxe.Int64;
  290. public static var INITIAL_RT_COUNT = 1024;
  291. public static var BUFFER_COUNT = 2;
  292. public static var DEVICE_NAME = null;
  293. public static var DEBUG = false;
  294. public function new() {
  295. window = @:privateAccess dx.Window.windows[0];
  296. reset();
  297. }
  298. override function hasFeature(f:Feature) {
  299. return switch(f) {
  300. case Queries, BottomLeftCoords:
  301. false;
  302. default:
  303. true;
  304. };
  305. }
  306. override function isSupportedFormat(fmt:h3d.mat.Data.TextureFormat):Bool {
  307. return true;
  308. }
  309. function reset() {
  310. var flags = new DriverInitFlags();
  311. if( DEBUG ) flags.set(DriverInitFlag.DEBUG);
  312. driver = Driver.create(window, flags, DEVICE_NAME);
  313. frames = [];
  314. for(i in 0...BUFFER_COUNT) {
  315. var f = new DxFrame();
  316. f.backBuffer = new ResourceData();
  317. f.allocator = new CommandAllocator(DIRECT);
  318. f.commandList = new CommandList(DIRECT, f.allocator, null);
  319. f.commandList.close();
  320. f.shaderResourceCache = new ManagedHeapArray(CBV_SRV_UAV, 1024);
  321. f.samplerCache = new ManagedHeapArray(SAMPLER, 1024);
  322. frames.push(f);
  323. }
  324. fence = new Fence(0, NONE);
  325. fenceEvent = new WaitEvent(false);
  326. tmp = new TempObjects();
  327. renderTargetViews = new ManagedHeap(RTV, INITIAL_RT_COUNT);
  328. depthStenciViews = new ManagedHeap(DSV, INITIAL_RT_COUNT);
  329. renderTargetViews.onFree = function(prev) frame.toRelease.push(prev);
  330. depthStenciViews.onFree = function(prev) frame.toRelease.push(prev);
  331. defaultDepth = new h3d.mat.Texture(0,0, Depth24Stencil8);
  332. defaultDepth.t = new TextureData();
  333. defaultDepth.t.state = DEPTH_WRITE;
  334. defaultDepth.name = "defaultDepth";
  335. var desc = new CommandSignatureDesc();
  336. var adesc = hl.CArray.alloc(IndirectArgumentDesc, 1);
  337. desc.byteStride = 5 * 4;
  338. desc.numArgumentDescs = 1;
  339. desc.argumentDescs = adesc;
  340. adesc[0].type = DRAW_INDEXED;
  341. indirectCommand = Driver.createCommandSignature(desc,null);
  342. tsFreq = Driver.getTimestampFrequency();
  343. compiler = new ShaderCompiler();
  344. resize(window.width, window.height);
  345. }
  346. function beginFrame() {
  347. frameCount = hxd.Timer.frameCount;
  348. currentFrame = Driver.getCurrentBackBufferIndex();
  349. var prevFrame = frame;
  350. frame = frames[currentFrame];
  351. defaultDepth.t.res = frame.depthBuffer;
  352. frame.allocator.reset();
  353. frame.commandList.reset(frame.allocator, null);
  354. while( frame.toRelease.length > 0 )
  355. frame.toRelease.pop().release();
  356. while( frame.tmpBufToRelease.length > 0 ) {
  357. var tmpBuf = frame.tmpBufToRelease.pop();
  358. if ( tmpBuf != null )
  359. tmpBuf.release();
  360. }
  361. if ( prevFrame != null ) {
  362. while ( prevFrame.tmpBufToNullify.length > 0 ) {
  363. var t = prevFrame.tmpBufToNullify.pop();
  364. frame.tmpBufToRelease.push(t.tmpBuf);
  365. t.tmpBuf = null;
  366. }
  367. }
  368. beginQueries();
  369. var used = frame.usedBuffers;
  370. var b = frame.availableBuffers;
  371. var prev = null;
  372. while( b != null ) {
  373. if( b.lastUse < frameCount - 120 ) {
  374. b.buffer.release();
  375. b = b.next;
  376. } else {
  377. var n = b.next;
  378. b.next = used;
  379. used = b;
  380. b = n;
  381. }
  382. }
  383. frame.availableBuffers = used;
  384. frame.usedBuffers = null;
  385. transition(frame.backBuffer, RENDER_TARGET);
  386. frame.commandList.iaSetPrimitiveTopology(TRIANGLELIST);
  387. renderTargetViews.next();
  388. depthStenciViews.next();
  389. curStencilRef = -1;
  390. currentIndex = null;
  391. frame.backBufferView = renderTargetViews.alloc(1);
  392. Driver.createRenderTargetView(frame.backBuffer.res, null, frame.backBufferView);
  393. setRenderTarget(null);
  394. frame.shaderResourceCache.reset();
  395. frame.samplerCache.reset();
  396. frame.shaderResourceViews = frame.shaderResourceCache.next();
  397. frame.samplerViews = frame.samplerCache.next();
  398. var arr = tmp.descriptors2;
  399. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  400. arr[1] = @:privateAccess frame.samplerViews.heap;
  401. frame.commandList.setDescriptorHeaps(arr);
  402. }
  403. override function clear(?color:Vector4, ?depth:Float, ?stencil:Int) {
  404. if( color != null ) {
  405. var clear = tmp.clearColor;
  406. clear.r = color.r;
  407. clear.g = color.g;
  408. clear.b = color.b;
  409. clear.a = color.a;
  410. var count = currentRenderTargets.length;
  411. for( i in 0...count ) {
  412. var tex = currentRenderTargets[i];
  413. if( tex != null && tex.t.setClearColor(color) ) {
  414. // update texture to use another clear value
  415. var prev = tex.t;
  416. tex.t = allocTexture(tex);
  417. @:privateAccess tex.t.clearColorChanges = prev.clearColorChanges;
  418. frame.toRelease.push(prev.res);
  419. Driver.createRenderTargetView(tex.t.res, null, tmp.renderTargets[i]);
  420. }
  421. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  422. }
  423. // clear backbuffer
  424. if( count == 0 )
  425. frame.commandList.clearRenderTargetView(frame.backBufferView, clear);
  426. }
  427. if( depth != null || stencil != null )
  428. frame.commandList.clearDepthStencilView(tmp.depthStencils[0], depth != null ? (stencil != null ? BOTH : DEPTH) : STENCIL, (depth:Float), stencil);
  429. }
  430. function waitGpu() {
  431. Driver.signal(fence, fenceValue);
  432. fence.setEvent(fenceValue, fenceEvent);
  433. fenceEvent.wait(-1);
  434. fenceValue++;
  435. }
  436. override function resize(width:Int, height:Int) {
  437. if( currentWidth == width && currentHeight == height )
  438. return;
  439. currentWidth = rtWidth = width;
  440. currentHeight = rtHeight = height;
  441. @:privateAccess defaultDepth.width = width;
  442. @:privateAccess defaultDepth.height = height;
  443. if( frame != null )
  444. flushFrame(true);
  445. waitGpu();
  446. for( f in frames ) {
  447. if( f.backBuffer.res != null )
  448. f.backBuffer.res.release();
  449. if( f.depthBuffer != null )
  450. f.depthBuffer.release();
  451. }
  452. Driver.resize(width, height, BUFFER_COUNT, R8G8B8A8_UNORM);
  453. renderTargetViews.clear();
  454. depthStenciViews.clear();
  455. for( i => f in frames ) {
  456. f.backBuffer.res = Driver.getBackBuffer(i);
  457. f.backBuffer.res.setName("Backbuffer#"+i);
  458. f.backBuffer.state = PRESENT;
  459. var desc = new ResourceDesc();
  460. var flags = new haxe.EnumFlags();
  461. desc.dimension = TEXTURE2D;
  462. desc.width = width;
  463. desc.height = height;
  464. desc.depthOrArraySize = 1;
  465. desc.mipLevels = 1;
  466. desc.sampleDesc.count = 1;
  467. desc.format = D24_UNORM_S8_UINT;
  468. desc.flags.set(ALLOW_DEPTH_STENCIL);
  469. tmp.heap.type = DEFAULT;
  470. tmp.clearValue.format = desc.format;
  471. tmp.clearValue.depth = 1;
  472. tmp.clearValue.stencil= 0;
  473. f.depthBuffer = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  474. f.depthBuffer.setName("Depthbuffer#"+i);
  475. }
  476. beginFrame();
  477. }
  478. override function begin(frame:Int) {
  479. }
  480. override function isDisposed() {
  481. return hasDeviceError;
  482. }
  483. override function init( onCreate : Bool -> Void, forceSoftware = false ) {
  484. onContextLost = onCreate.bind(true);
  485. haxe.Timer.delay(onCreate.bind(false), 1);
  486. }
  487. override function getDriverName(details:Bool) {
  488. var desc = "DX12";
  489. if( details ) desc += " "+Driver.getDeviceName();
  490. return desc;
  491. }
  492. public function forceDeviceError() {
  493. hasDeviceError = true;
  494. }
  495. function transition( res : ResourceData, to : ResourceState ) {
  496. if( res.state == to )
  497. return;
  498. var b = tmp.barrier;
  499. b.resource = res.res;
  500. b.stateBefore = res.state;
  501. b.stateAfter = to;
  502. frame.commandList.resourceBarrier(b);
  503. res.state = to;
  504. }
  505. function getRTBits( tex : h3d.mat.Texture ) {
  506. inline function mk(channels,format) {
  507. return ((channels - 1) << 2) | (format + 1);
  508. }
  509. return switch( tex.format ) {
  510. case RGBA: mk(4,0);
  511. case R8: mk(1, 0);
  512. case RG8: mk(2, 0);
  513. case RGB8: mk(3, 0);
  514. case R16F: mk(1,1);
  515. case RG16F: mk(2,1);
  516. case RGB16F: mk(3,1);
  517. case RGBA16F: mk(4,1);
  518. case R32F: mk(1,2);
  519. case RG32F: mk(2,2);
  520. case RGB32F: mk(3,2);
  521. case RGBA32F: mk(4,2);
  522. default: throw "Unsupported RT format "+tex.format;
  523. }
  524. }
  525. function getDepthViewFromTexture( tex : h3d.mat.Texture, readOnly : Bool ) {
  526. if ( tex != null && tex.depthBuffer == null ) {
  527. depthEnabled = false;
  528. return null;
  529. }
  530. if ( tex != null ) {
  531. var w = tex.depthBuffer.width;
  532. var h = tex.depthBuffer.height;
  533. if( w != tex.width || h != tex.height )
  534. throw "Depth size mismatch";
  535. }
  536. return getDepthView(tex == null ? null : tex.depthBuffer, readOnly);
  537. }
  538. function getDepthView( depthBuffer : h3d.mat.Texture, readOnly : Bool ) {
  539. var res = depthBuffer == null ? frame.depthBuffer : depthBuffer.t.res;
  540. var depthView = depthStenciViews.alloc(1);
  541. var viewDesc = new DepthStencilViewDesc();
  542. viewDesc.arraySize = 1;
  543. viewDesc.mipSlice = 0;
  544. viewDesc.firstArraySlice = 0;
  545. viewDesc.format = D24_UNORM_S8_UINT;
  546. viewDesc.viewDimension = TEXTURE2D;
  547. if ( readOnly ) {
  548. viewDesc.flags.set(READ_ONLY_DEPTH);
  549. viewDesc.flags.set(READ_ONLY_STENCIL);
  550. }
  551. Driver.createDepthStencilView(res, viewDesc, depthView);
  552. var depths = tmp.depthStencils;
  553. depths[0] = depthView;
  554. depthEnabled = true;
  555. if ( depthBuffer != null )
  556. transition(depthBuffer.t, readOnly ? DEPTH_READ : DEPTH_WRITE);
  557. return depths;
  558. }
  559. override function getDefaultDepthBuffer():h3d.mat.Texture {
  560. return defaultDepth;
  561. }
  562. function initViewport(w,h) {
  563. rtWidth = w;
  564. rtHeight = h;
  565. tmp.viewport.width = w;
  566. tmp.viewport.height = h;
  567. tmp.viewport.maxDepth = 1;
  568. tmp.rect.top = 0;
  569. tmp.rect.left = 0;
  570. tmp.rect.right = w;
  571. tmp.rect.bottom = h;
  572. frame.commandList.rsSetScissorRects(1, tmp.rect);
  573. frame.commandList.rsSetViewports(1, tmp.viewport);
  574. }
  575. override function setRenderTarget(tex:Null<h3d.mat.Texture>, layer:Int = 0, mipLevel:Int = 0, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  576. if( tex != null ) {
  577. if( tex.t == null ) tex.alloc();
  578. transition(tex.t, RENDER_TARGET);
  579. }
  580. depthEnabled = depthBinding != NotBound;
  581. var isArr = tex != null && (tex.flags.has(IsArray) || tex.flags.has(Cube));
  582. var desc = null;
  583. if( layer != 0 || mipLevel != 0 || isArr ) {
  584. desc = tmp.rtvDesc;
  585. desc.format = tex.t.format;
  586. if( isArr ) {
  587. desc.viewDimension = TEXTURE2DARRAY;
  588. desc.mipSlice = mipLevel;
  589. desc.firstArraySlice = layer;
  590. desc.arraySize = 1;
  591. desc.planeSlice = 0;
  592. } else {
  593. desc.viewDimension = TEXTURE2D;
  594. desc.mipSlice = mipLevel;
  595. desc.planeSlice = 0;
  596. }
  597. }
  598. if (tex != null) {
  599. var texView = renderTargetViews.alloc(1);
  600. Driver.createRenderTargetView(tex.t.res, desc, texView);
  601. tmp.renderTargets[0] = texView;
  602. }
  603. else {
  604. tmp.renderTargets[0] = frame.backBufferView;
  605. }
  606. if ( tex != null && !tex.flags.has(WasCleared) ) {
  607. tex.flags.set(WasCleared);
  608. var clear = tmp.clearColor;
  609. clear.r = 0;
  610. clear.g = 0;
  611. clear.b = 0;
  612. clear.a = 0;
  613. frame.commandList.clearRenderTargetView(tmp.renderTargets[0], clear);
  614. }
  615. frame.commandList.omSetRenderTargets(1, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(tex, depthBinding == ReadOnly ) : null);
  616. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  617. if( tex != null ) currentRenderTargets.push(tex);
  618. var w = tex == null ? currentWidth : tex.width >> mipLevel;
  619. var h = tex == null ? currentHeight : tex.height >> mipLevel;
  620. if( w == 0 ) w = 1;
  621. if( h == 0 ) h = 1;
  622. initViewport(w, h);
  623. pipelineSignature.setI32(PSIGN_RENDER_TARGETS, tex == null ? 0 : getRTBits(tex) | (depthEnabled ? 0x80000000 : 0));
  624. needPipelineFlush = true;
  625. }
  626. override function setRenderTargets(textures:Array<h3d.mat.Texture>, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  627. while( currentRenderTargets.length > textures.length )
  628. currentRenderTargets.pop();
  629. depthEnabled = depthBinding != NotBound;
  630. var t0 = textures[0];
  631. var texViews = renderTargetViews.alloc(textures.length);
  632. var bits = 0;
  633. for( i => t in textures ) {
  634. if ( t.t == null ) {
  635. t.alloc();
  636. if ( hasDeviceError ) return;
  637. }
  638. var view = texViews.offset(renderTargetViews.stride * i);
  639. Driver.createRenderTargetView(t.t.res, null, view);
  640. tmp.renderTargets[i] = view;
  641. currentRenderTargets[i] = t;
  642. bits |= getRTBits(t) << (i << 2);
  643. if ( !t.flags.has(WasCleared) ) {
  644. t.flags.set(WasCleared);
  645. var clear = tmp.clearColor;
  646. clear.r = 0;
  647. clear.g = 0;
  648. clear.b = 0;
  649. clear.a = 0;
  650. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  651. }
  652. transition(t.t, RENDER_TARGET);
  653. }
  654. frame.commandList.omSetRenderTargets(textures.length, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(t0, depthBinding == ReadOnly) : null);
  655. initViewport(t0.width, t0.height);
  656. pipelineSignature.setI32(PSIGN_RENDER_TARGETS, bits | (depthEnabled ? 0x80000000 : 0));
  657. needPipelineFlush = true;
  658. }
  659. override function setDepth(depthBuffer : h3d.mat.Texture) {
  660. var view = getDepthView(depthBuffer, false);
  661. depthEnabled = true;
  662. frame.commandList.omSetRenderTargets(0, null, true, view);
  663. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  664. initViewport(depthBuffer.width, depthBuffer.height);
  665. pipelineSignature.setI32(PSIGN_RENDER_TARGETS, 0x80000000);
  666. needPipelineFlush = true;
  667. }
  668. override function setRenderZone(x:Int, y:Int, width:Int, height:Int) {
  669. if( width < 0 && height < 0 && x == 0 && y == 0 ) {
  670. tmp.rect.left = 0;
  671. tmp.rect.top = 0;
  672. tmp.rect.right = rtWidth;
  673. tmp.rect.bottom = rtHeight;
  674. frame.commandList.rsSetScissorRects(1, tmp.rect);
  675. } else {
  676. tmp.rect.left = x;
  677. tmp.rect.top = y;
  678. tmp.rect.right = x + width;
  679. tmp.rect.bottom = y + height;
  680. frame.commandList.rsSetScissorRects(1, tmp.rect);
  681. }
  682. }
  683. override function captureRenderBuffer( pixels : hxd.Pixels ) {
  684. var rt = currentRenderTargets[0];
  685. if( rt == null )
  686. throw "Can't capture main render buffer in DirectX";
  687. captureTexPixels(pixels, rt, 0, 0);
  688. }
  689. override function capturePixels(tex:h3d.mat.Texture, layer:Int, mipLevel:Int, ?region:h2d.col.IBounds):hxd.Pixels {
  690. var pixels : hxd.Pixels;
  691. if (region != null) {
  692. if (region.xMax > tex.width) region.xMax = tex.width;
  693. if (region.yMax > tex.height) region.yMax = tex.height;
  694. if (region.xMin < 0) region.xMin = 0;
  695. if (region.yMin < 0) region.yMin = 0;
  696. var w = region.width >> mipLevel;
  697. var h = region.height >> mipLevel;
  698. if( w == 0 ) w = 1;
  699. if( h == 0 ) h = 1;
  700. pixels = hxd.Pixels.alloc(w, h, tex.format);
  701. captureTexPixels(pixels, tex, layer, mipLevel, region.xMin, region.yMin);
  702. } else {
  703. var w = tex.width >> mipLevel;
  704. var h = tex.height >> mipLevel;
  705. if( w == 0 ) w = 1;
  706. if( h == 0 ) h = 1;
  707. pixels = hxd.Pixels.alloc(w, h, tex.format);
  708. captureTexPixels(pixels, tex, layer, mipLevel);
  709. }
  710. return pixels;
  711. }
  712. function captureTexPixels( pixels: hxd.Pixels, tex:h3d.mat.Texture, layer:Int, mipLevel:Int, x : Int = 0, y : Int = 0) {
  713. if( pixels.width == 0 || pixels.height == 0 )
  714. return;
  715. var totalSize : hl.BytesAccess<Int64> = new hl.Bytes(8);
  716. var src = new TextureCopyLocation();
  717. src.res = tex.t.res;
  718. src.subResourceIndex = mipLevel + layer * tex.mipLevels;
  719. var srcDesc = makeTextureDesc(tex);
  720. var dst = new TextureCopyLocation();
  721. dst.type = PLACED_FOOTPRINT;
  722. Driver.getCopyableFootprints(srcDesc, src.subResourceIndex, 1, 0, dst.placedFootprint, null, null, totalSize);
  723. var desc = new ResourceDesc();
  724. var flags = new haxe.EnumFlags();
  725. desc.dimension = BUFFER;
  726. desc.width = totalSize[0];
  727. desc.height = 1;
  728. desc.depthOrArraySize = 1;
  729. desc.mipLevels = 1;
  730. desc.sampleDesc.count = 1;
  731. desc.layout = ROW_MAJOR;
  732. tmp.heap.type = READBACK;
  733. var tmpBuf = Driver.createCommittedResource(tmp.heap, flags, desc, COPY_DEST, null);
  734. var box = new Box();
  735. box.left = x;
  736. box.right = pixels.width;
  737. box.top = y;
  738. box.bottom = pixels.height;
  739. box.back = 1;
  740. transition(tex.t, COPY_SOURCE);
  741. dst.res = tmpBuf;
  742. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, box);
  743. flushFrame();
  744. waitGpu();
  745. var output = tmpBuf.map(0, null);
  746. var stride = hxd.Pixels.calcStride(pixels.width, tex.format);
  747. var rowStride = dst.placedFootprint.footprint.rowPitch;
  748. if( rowStride == stride )
  749. (pixels.bytes:hl.Bytes).blit(pixels.offset, output, 0, stride * pixels.height);
  750. else {
  751. for( i in 0...pixels.height )
  752. (pixels.bytes:hl.Bytes).blit(pixels.offset + i * stride, output, i * rowStride, stride);
  753. }
  754. tmpBuf.unmap(0,null);
  755. tmpBuf.release();
  756. beginFrame();
  757. }
  758. // ---- SHADERS -----
  759. static var VERTEX_FORMATS = [null,null,R32G32_FLOAT,R32G32B32_FLOAT,R32G32B32A32_FLOAT];
  760. function getBinaryPayload( code : String ) {
  761. var bin = code.indexOf("//BIN=");
  762. if( bin >= 0 ) {
  763. var end = code.indexOf("#", bin);
  764. if( end >= 0 )
  765. return haxe.crypto.Base64.decode(code.substr(bin + 6, end - bin - 6));
  766. }
  767. if( shaderCache != null )
  768. return shaderCache.resolveShaderBinary(code);
  769. return null;
  770. }
  771. function compileSource( sh : hxsl.RuntimeShader.RuntimeShaderData, profile, baseRegister, rootStr = "" ) {
  772. var args = [];
  773. var out = new hxsl.HlslOut();
  774. out.baseRegister = baseRegister;
  775. if ( sh.code == null ) {
  776. sh.code = out.run(sh.data);
  777. sh.code = rootStr + sh.code;
  778. }
  779. var bytes = getBinaryPayload(sh.code);
  780. if ( bytes == null ) {
  781. return compiler.compile(sh.code, profile, args);
  782. }
  783. return bytes;
  784. }
  785. override function getNativeShaderCode( shader : hxsl.RuntimeShader ) {
  786. var out = new hxsl.HlslOut();
  787. var vsSource = out.run(shader.vertex.data);
  788. var out = new hxsl.HlslOut();
  789. var psSource = out.run(shader.fragment.data);
  790. return vsSource+"\n\n\n\n"+psSource;
  791. }
  792. function stringifyRootSignature( sign : RootSignatureDesc, name : String, params : hl.CArray<RootParameterConstants>, paramsCount : Int ) : String {
  793. var s = '#define ${name} "RootFlags(';
  794. if ( sign.flags.toInt() == 0 )
  795. s += '0'; // no flags
  796. else {
  797. // RootFlags
  798. for ( f in haxe.EnumTools.getConstructors(RootSignatureFlag) ) {
  799. if ( !sign.flags.has(haxe.EnumTools.createByName(RootSignatureFlag, f)) )
  800. continue;
  801. s += Std.string(f) + '|';
  802. }
  803. s = s.substr(0, s.length - 1);
  804. }
  805. s += ')",';
  806. for ( i in 0...paramsCount ) {
  807. var param = params[i];
  808. var vis = 'SHADER_VISIBILITY_${param.shaderVisibility == VERTEX ? "VERTEX" : "PIXEL"}';
  809. if ( param.parameterType == CONSTANTS ) {
  810. var shaderRegister = param.shaderRegister;
  811. s += 'RootConstants(num32BitConstants=${param.num32BitValues},b${shaderRegister}, visibility=${vis}),';
  812. } else {
  813. try {
  814. var p = unsafeCastTo(param, RootParameterDescriptorTable);
  815. if( p == null || p.descriptorRanges == null ) continue;
  816. var descRange = p.descriptorRanges[0];
  817. var baseShaderRegister = descRange.baseShaderRegister;
  818. switch ( descRange.rangeType) {
  819. case CBV:
  820. s += 'DescriptorTable(CBV(b${baseShaderRegister}), visibility = ${vis}),';
  821. case SRV:
  822. s += 'DescriptorTable(SRV(t${baseShaderRegister},numDescriptors = ${descRange.numDescriptors}), visibility = ${vis}),';
  823. case SAMPLER:
  824. var baseShaderRegister = descRange.baseShaderRegister;
  825. s += 'DescriptorTable(Sampler(s${baseShaderRegister}, space=${descRange.registerSpace}, numDescriptors = ${descRange.numDescriptors}), visibility = ${vis}),';
  826. case UAV:
  827. throw "Not supported";
  828. }
  829. } catch ( e : Dynamic ) {
  830. continue;
  831. }
  832. }
  833. }
  834. s += '\n';
  835. return s;
  836. }
  837. inline function unsafeCastTo<T,R>( v : T, c : Class<R> ) : R {
  838. var arr = new hl.NativeArray<T>(1);
  839. arr[0] = v;
  840. return (cast arr : hl.NativeArray<R>)[0];
  841. }
  842. function computeRootSignature( shader : hxsl.RuntimeShader ) {
  843. var allocatedParams = 16;
  844. var params = hl.CArray.alloc(RootParameterConstants,allocatedParams);
  845. var paramsCount = 0, regCount = 0;
  846. var texDescs = [];
  847. var vertexParamsCBV = false;
  848. var fragmentParamsCBV = false;
  849. function allocDescTable(vis) {
  850. var p = unsafeCastTo(params[paramsCount++], RootParameterDescriptorTable);
  851. p.parameterType = DESCRIPTOR_TABLE;
  852. p.numDescriptorRanges = 1;
  853. var rangeArr = hl.CArray.alloc(DescriptorRange,1);
  854. var range = rangeArr[0];
  855. texDescs.push(range);
  856. p.descriptorRanges = rangeArr;
  857. p.shaderVisibility = vis;
  858. return range;
  859. }
  860. function allocConsts(size,vis,useCBV) {
  861. var reg = regCount++;
  862. if( size == 0 ) return -1;
  863. if( useCBV ) {
  864. var pid = paramsCount;
  865. var r = allocDescTable(vis);
  866. r.rangeType = CBV;
  867. r.numDescriptors = 1;
  868. r.baseShaderRegister = reg;
  869. r.registerSpace = 0;
  870. return pid | 0x100;
  871. }
  872. var pid = paramsCount++;
  873. var p = params[pid];
  874. p.parameterType = CONSTANTS;
  875. p.shaderRegister = reg;
  876. p.shaderVisibility = vis;
  877. p.num32BitValues = size << 2;
  878. return pid;
  879. }
  880. function allocParams( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  881. var vis = sh.kind == Vertex ? VERTEX : PIXEL;
  882. var regs = new ShaderRegisters();
  883. regs.globals = allocConsts(sh.globalsSize, vis, false);
  884. regs.params = allocConsts(sh.paramsSize, vis, sh.kind == Vertex ? vertexParamsCBV : fragmentParamsCBV);
  885. if( sh.bufferCount > 0 ) {
  886. regs.buffers = paramsCount;
  887. for( i in 0...sh.bufferCount )
  888. allocConsts(1, vis, true);
  889. }
  890. if( sh.texturesCount > 0 ) {
  891. regs.texturesCount = sh.texturesCount;
  892. regs.textures = paramsCount;
  893. var p = sh.textures;
  894. while( p != null ) {
  895. switch( p.type ) {
  896. case TArray( TSampler2D , SConst(n) ): regs.textures2DCount = n;
  897. default:
  898. }
  899. p = p.next;
  900. }
  901. var r = allocDescTable(vis);
  902. r.rangeType = SRV;
  903. r.baseShaderRegister = 0;
  904. r.registerSpace = 0;
  905. r.numDescriptors = sh.texturesCount;
  906. regs.samplers = paramsCount;
  907. var r = allocDescTable(vis);
  908. r.rangeType = SAMPLER;
  909. r.baseShaderRegister = 0;
  910. r.registerSpace = 0;
  911. r.numDescriptors = sh.texturesCount;
  912. }
  913. return regs;
  914. }
  915. // Costs in units:
  916. // Descriptor Tables cost 1 each
  917. // Root CBVs cost 2 each
  918. // Root SRVs cost 2 each
  919. // Root UAVs cost 2 each
  920. // Root Constants cost 1 per 32-bit value
  921. function calcSize( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  922. var s = (sh.globalsSize + sh.paramsSize) << 2;
  923. s += sh.texturesCount;
  924. return s;
  925. }
  926. var totalVertex = calcSize(shader.vertex);
  927. var totalFragment = calcSize(shader.fragment);
  928. var total = totalVertex + totalFragment;
  929. if( total > 64 ) {
  930. var vertexParamSizeCost = (shader.vertex.paramsSize << 2);
  931. var fragmentParamSizeCost = (shader.fragment.paramsSize << 2);
  932. // Remove the size cost of the root constant and add one descriptor table.
  933. var withoutVP = total - vertexParamSizeCost + 1;
  934. var withoutFP = total - fragmentParamSizeCost + 1;
  935. if( withoutVP < 64 || withoutFP > 64 ) {
  936. vertexParamsCBV = true;
  937. total = withoutVP;
  938. }
  939. if( total > 64 ) {
  940. fragmentParamsCBV = true;
  941. total = total - fragmentParamSizeCost + 1;
  942. }
  943. if( total > 64 )
  944. throw "Too many globals";
  945. }
  946. var vertexRegisters = allocParams(shader.vertex);
  947. var fragmentRegStart = regCount;
  948. var fragmentRegisters = allocParams(shader.fragment);
  949. if( paramsCount > allocatedParams )
  950. throw "ASSERT : Too many parameters";
  951. var sign = new RootSignatureDesc();
  952. sign.flags.set(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
  953. sign.flags.set(DENY_HULL_SHADER_ROOT_ACCESS);
  954. sign.flags.set(DENY_DOMAIN_SHADER_ROOT_ACCESS);
  955. sign.flags.set(DENY_GEOMETRY_SHADER_ROOT_ACCESS);
  956. sign.numParameters = paramsCount;
  957. sign.parameters = cast params;
  958. return { sign : sign, fragmentRegStart : fragmentRegStart, vertexRegisters : vertexRegisters, fragmentRegisters : fragmentRegisters, params : params, paramsCount : paramsCount, texDescs : texDescs };
  959. }
  960. function compileShader( shader : hxsl.RuntimeShader ) : CompiledShader {
  961. var res = computeRootSignature(shader);
  962. var c = new CompiledShader();
  963. c.vertexRegisters = res.vertexRegisters;
  964. c.fragmentRegisters = res.fragmentRegisters;
  965. var rootStr = stringifyRootSignature(res.sign, "ROOT_SIGNATURE", res.params, res.paramsCount);
  966. var vs = compileSource(shader.vertex, "vs_6_0", 0, rootStr);
  967. var ps = compileSource(shader.fragment, "ps_6_0", res.fragmentRegStart, rootStr);
  968. var signSize = 0;
  969. var signBytes = Driver.serializeRootSignature(res.sign, 1, signSize);
  970. var sign = new RootSignature(signBytes,signSize);
  971. var inputs = [];
  972. for( v in shader.vertex.data.vars )
  973. switch( v.kind ) {
  974. case Input: inputs.push(v);
  975. default:
  976. }
  977. var inputLayout = hl.CArray.alloc(InputElementDesc, inputs.length);
  978. var format : Array<hxd.BufferFormat.BufferInput> = [];
  979. for( i => v in inputs ) {
  980. var d = inputLayout[i];
  981. var perInst = 0;
  982. if( v.qualifiers != null )
  983. for( q in v.qualifiers )
  984. switch( q ) {
  985. case PerInstance(k): perInst = k;
  986. default:
  987. }
  988. d.semanticName = @:privateAccess hxsl.HlslOut.semanticName(v.name).toUtf8();
  989. d.inputSlot = i;
  990. format.push({ name : v.name, type : hxd.BufferFormat.InputFormat.fromHXSL(v.type) });
  991. if( perInst > 0 ) {
  992. d.inputSlotClass = PER_INSTANCE_DATA;
  993. d.instanceDataStepRate = perInst;
  994. } else
  995. d.inputSlotClass = PER_VERTEX_DATA;
  996. }
  997. var p = new GraphicsPipelineStateDesc();
  998. p.rootSignature = sign;
  999. p.vs.bytecodeLength = vs.length;
  1000. p.vs.shaderBytecode = vs;
  1001. p.ps.bytecodeLength = ps.length;
  1002. p.ps.shaderBytecode = ps;
  1003. p.rasterizerState.fillMode = SOLID;
  1004. p.rasterizerState.cullMode = NONE;
  1005. p.primitiveTopologyType = TRIANGLE;
  1006. p.numRenderTargets = 1;
  1007. p.rtvFormats[0] = R8G8B8A8_UNORM;
  1008. p.dsvFormat = UNKNOWN;
  1009. p.sampleDesc.count = 1;
  1010. p.sampleMask = -1;
  1011. p.inputLayout.inputElementDescs = inputLayout;
  1012. p.inputLayout.numElements = inputs.length;
  1013. //Driver.createGraphicsPipelineState(p);
  1014. c.format = hxd.BufferFormat.make(format);
  1015. c.pipeline = p;
  1016. c.rootSignature = sign;
  1017. c.inputLayout = inputLayout;
  1018. c.inputCount = inputs.length;
  1019. c.shader = shader;
  1020. for( i in 0...inputs.length )
  1021. inputLayout[i].alignedByteOffset = 1; // will trigger error if not set in makePipeline()
  1022. return c;
  1023. }
  1024. function disposeResource( r : ResourceData ) {
  1025. frame.toRelease.push(r.res);
  1026. r.res = null;
  1027. r.state = PRESENT;
  1028. }
  1029. // ----- BUFFERS
  1030. function allocGPU( size : Int, heapType, state ) {
  1031. var desc = new ResourceDesc();
  1032. var flags = new haxe.EnumFlags();
  1033. desc.dimension = BUFFER;
  1034. desc.width = size;
  1035. desc.height = 1;
  1036. desc.depthOrArraySize = 1;
  1037. desc.mipLevels = 1;
  1038. desc.sampleDesc.count = 1;
  1039. desc.layout = ROW_MAJOR;
  1040. tmp.heap.type = heapType;
  1041. return Driver.createCommittedResource(tmp.heap, flags, desc, state, null);
  1042. }
  1043. override function allocBuffer( m : h3d.Buffer ) : GPUBuffer {
  1044. var buf = new VertexBufferData();
  1045. var size = m.getMemSize();
  1046. var bufSize = m.flags.has(UniformBuffer) ? calcCBVSize(size) : size;
  1047. buf.state = COPY_DEST;
  1048. buf.res = allocGPU(bufSize, DEFAULT, COMMON);
  1049. if( m.flags.has(UniformBuffer) ) {
  1050. // no view
  1051. } else if( m.flags.has(IndexBuffer) ) {
  1052. var view = new IndexBufferView();
  1053. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1054. view.format = m.format.strideBytes == 4 ? R32_UINT : R16_UINT;
  1055. view.sizeInBytes = size;
  1056. buf.iview = view;
  1057. } else {
  1058. var view = new VertexBufferView();
  1059. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1060. view.sizeInBytes = size;
  1061. view.strideInBytes = m.format.strideBytes;
  1062. buf.view = view;
  1063. }
  1064. buf.size = bufSize;
  1065. buf.uploaded = m.flags.has(Dynamic);
  1066. return buf;
  1067. }
  1068. override function allocInstanceBuffer(b:InstanceBuffer, bytes:haxe.io.Bytes) {
  1069. var dataSize = b.commandCount * 5 * 4;
  1070. var buf = allocGPU(dataSize, DEFAULT, COMMON);
  1071. var tmpBuf = allocDynamicBuffer(bytes, dataSize);
  1072. frame.commandList.copyBufferRegion(buf, 0, tmpBuf, 0, dataSize);
  1073. b.data = buf;
  1074. var b = tmp.barrier;
  1075. b.resource = buf;
  1076. b.stateBefore = COPY_DEST;
  1077. b.stateAfter = NON_PIXEL_SHADER_RESOURCE;
  1078. frame.commandList.resourceBarrier(b);
  1079. }
  1080. override function disposeBuffer(v:Buffer) {
  1081. disposeResource(v.vbuf);
  1082. }
  1083. override function disposeInstanceBuffer(b:InstanceBuffer) {
  1084. frame.toRelease.push((b.data:GpuResource));
  1085. // disposeResource(b.data);
  1086. b.data = null;
  1087. }
  1088. function updateBuffer( b : BufferData, bytes : hl.Bytes, startByte : Int, bytesCount : Int ) {
  1089. var tmpBuf;
  1090. if( b.uploaded )
  1091. tmpBuf = allocDynamicBuffer(bytes.offset(startByte), bytesCount);
  1092. else {
  1093. var size = calcCBVSize(bytesCount);
  1094. tmpBuf = allocGPU(size, UPLOAD, GENERIC_READ);
  1095. var ptr = tmpBuf.map(0, null);
  1096. ptr.blit(0, bytes, 0, bytesCount);
  1097. tmpBuf.unmap(0,null);
  1098. }
  1099. frame.commandList.copyBufferRegion(b.res, startByte, tmpBuf, 0, bytesCount);
  1100. if( !b.uploaded ) {
  1101. frame.toRelease.push(tmpBuf);
  1102. b.uploaded = true;
  1103. }
  1104. }
  1105. override function uploadIndexData(i:Buffer, startIndice:Int, indiceCount:Int, buf:hxd.IndexBuffer, bufPos:Int) {
  1106. var bits = i.format.strideBytes >> 1;
  1107. transition(i.vbuf, COPY_DEST);
  1108. updateBuffer(i.vbuf, hl.Bytes.getArray(buf.getNative()).offset(bufPos << bits), startIndice << bits, indiceCount << bits);
  1109. transition(i.vbuf, INDEX_BUFFER);
  1110. }
  1111. override function uploadBufferData(b:Buffer, startVertex:Int, vertexCount:Int, buf:hxd.FloatBuffer, bufPos:Int) {
  1112. var data = hl.Bytes.getArray(buf.getNative()).offset(bufPos<<2);
  1113. transition(b.vbuf, COPY_DEST);
  1114. updateBuffer(b.vbuf, data, startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1115. transition(b.vbuf, b.flags.has(IndexBuffer) ? INDEX_BUFFER : VERTEX_AND_CONSTANT_BUFFER);
  1116. }
  1117. override function uploadBufferBytes(b:Buffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
  1118. transition(b.vbuf, COPY_DEST);
  1119. updateBuffer(b.vbuf, @:privateAccess buf.b.offset(bufPos), startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1120. transition(b.vbuf, b.flags.has(IndexBuffer) ? INDEX_BUFFER : VERTEX_AND_CONSTANT_BUFFER);
  1121. }
  1122. // ------------ TEXTURES -------
  1123. function getTextureFormat( t : h3d.mat.Texture ) : DxgiFormat {
  1124. return switch( t.format ) {
  1125. case RGBA: R8G8B8A8_UNORM;
  1126. case RGBA16F: R16G16B16A16_FLOAT;
  1127. case RGBA32F: R32G32B32A32_FLOAT;
  1128. case R32F: R32_FLOAT;
  1129. case R16F: R16_FLOAT;
  1130. case R8: R8_UNORM;
  1131. case RG8: R8G8_UNORM;
  1132. case RG16F: R16G16_FLOAT;
  1133. case RG32F: R32G32_FLOAT;
  1134. case RGB32F: R32G32B32_FLOAT;
  1135. case RGB10A2: R10G10B10A2_UNORM;
  1136. case RG11B10UF: R11G11B10_FLOAT;
  1137. case SRGB_ALPHA: R8G8B8A8_UNORM_SRGB;
  1138. case R16U: R16_UNORM;
  1139. case RG16U: R16G16_UNORM;
  1140. case RGBA16U: R16G16B16A16_UNORM;
  1141. case S3TC(n):
  1142. switch( n ) {
  1143. case 1: BC1_UNORM;
  1144. case 2: BC2_UNORM;
  1145. case 3: BC3_UNORM;
  1146. case 4: BC4_UNORM;
  1147. case 5: BC5_UNORM;
  1148. case 6: BC6H_UF16;
  1149. case 7: BC7_UNORM;
  1150. default: throw "assert";
  1151. }
  1152. default: throw "Unsupported texture format " + t.format;
  1153. }
  1154. }
  1155. function makeTextureDesc(t:h3d.mat.Texture) {
  1156. var desc = new ResourceDesc();
  1157. desc.dimension = TEXTURE2D;
  1158. desc.width = t.width;
  1159. desc.height = t.height;
  1160. desc.depthOrArraySize = t.layerCount;
  1161. desc.mipLevels = t.mipLevels;
  1162. desc.sampleDesc.count = 1;
  1163. desc.format = getTextureFormat(t);
  1164. return desc;
  1165. }
  1166. override function allocTexture(t:h3d.mat.Texture):Texture {
  1167. if( t.format.match(S3TC(_)) && (t.width & 3 != 0 || t.height & 3 != 0) )
  1168. throw t+" is compressed "+t.width+"x"+t.height+" but should be a 4x4 multiple";
  1169. var isRT = t.flags.has(Target);
  1170. var flags = new haxe.EnumFlags();
  1171. var desc = makeTextureDesc(t);
  1172. var td = new TextureData();
  1173. td.format = desc.format;
  1174. tmp.heap.type = DEFAULT;
  1175. var clear = null;
  1176. if( isRT ) {
  1177. var color = t.t == null || t.t.color == null ? new h3d.Vector4(0,0,0,0) : t.t.color; // reuse prev color
  1178. desc.flags.set(ALLOW_RENDER_TARGET);
  1179. clear = tmp.clearValue;
  1180. clear.format = desc.format;
  1181. clear.color.r = color.r;
  1182. clear.color.g = color.g;
  1183. clear.color.b = color.b;
  1184. clear.color.a = color.a;
  1185. td.color = color;
  1186. }
  1187. td.state = isRT ? RENDER_TARGET : COPY_DEST;
  1188. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, isRT ? RENDER_TARGET : COMMON, clear);
  1189. td.res.setName(t.name == null ? "Texture#"+t.id : t.name);
  1190. t.lastFrame = frameCount;
  1191. t.flags.unset(WasCleared);
  1192. return td;
  1193. }
  1194. override function allocDepthBuffer(b:h3d.mat.Texture):Texture {
  1195. var td = new TextureData();
  1196. var desc = new ResourceDesc();
  1197. var flags = new haxe.EnumFlags();
  1198. desc.dimension = TEXTURE2D;
  1199. desc.width = b.width;
  1200. desc.height = b.height;
  1201. desc.depthOrArraySize = 1;
  1202. desc.mipLevels = 1;
  1203. desc.sampleDesc.count = 1;
  1204. desc.format = R24G8_TYPELESS;
  1205. desc.flags.set(ALLOW_DEPTH_STENCIL);
  1206. tmp.heap.type = DEFAULT;
  1207. tmp.clearValue.format = D24_UNORM_S8_UINT;
  1208. tmp.clearValue.depth = 1;
  1209. tmp.clearValue.stencil= 0;
  1210. td.state = DEPTH_WRITE;
  1211. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  1212. return td;
  1213. }
  1214. override function disposeTexture(t:h3d.mat.Texture) {
  1215. disposeResource(t.t);
  1216. t.t = null;
  1217. }
  1218. override function disposeDepthBuffer(t:h3d.mat.Texture) {
  1219. disposeResource(t.t);
  1220. t.t = null;
  1221. }
  1222. override function uploadTextureBitmap(t:h3d.mat.Texture, bmp:hxd.BitmapData, mipLevel:Int, side:Int) {
  1223. var pixels = bmp.getPixels();
  1224. uploadTexturePixels(t, pixels, mipLevel, side);
  1225. pixels.dispose();
  1226. }
  1227. override function uploadTexturePixels(t:h3d.mat.Texture, pixels:hxd.Pixels, mipLevel:Int, side:Int) {
  1228. pixels.convert(t.format);
  1229. if( mipLevel >= t.mipLevels ) throw "Mip level outside texture range : " + mipLevel + " (max = " + (t.mipLevels - 1) + ")";
  1230. tmp.heap.type = UPLOAD;
  1231. var subRes = mipLevel + side * t.mipLevels;
  1232. var nbRes = t.mipLevels * t.layerCount;
  1233. // Todo : optimize for video, currently allocating a new tmpBuf every frame.
  1234. if ( t.t.tmpBuf == null ) {
  1235. var tmpSize = t.t.res.getRequiredIntermediateSize(0, nbRes).low;
  1236. t.t.tmpBuf = allocGPU(tmpSize, UPLOAD, GENERIC_READ);
  1237. }
  1238. var previousSize : hl.BytesAccess<Int64> = new hl.Bytes(8);
  1239. Driver.getCopyableFootprints(makeTextureDesc(t), 0, subRes, 0, null, null, null, previousSize);
  1240. var offsetAligned = ((previousSize[0] + 512 - 1) / 512) * 512;
  1241. var upd = new SubResourceData();
  1242. var stride = @:privateAccess pixels.stride;
  1243. switch( t.format ) {
  1244. case S3TC(n): stride = pixels.width * ((n == 1 || n == 4) ? 2 : 4); // "uncompressed" stride ?
  1245. default:
  1246. }
  1247. upd.data = (pixels.bytes:hl.Bytes).offset(pixels.offset);
  1248. upd.rowPitch = stride;
  1249. upd.slicePitch = pixels.dataSize;
  1250. transition(t.t, COPY_DEST);
  1251. if( !Driver.updateSubResource(frame.commandList, t.t.res, t.t.tmpBuf, offsetAligned, subRes, 1, upd) )
  1252. throw "Failed to update sub resource";
  1253. transition(t.t, PIXEL_SHADER_RESOURCE);
  1254. frame.tmpBufToNullify.push(t.t);
  1255. t.flags.set(WasCleared);
  1256. }
  1257. override function copyTexture(from:h3d.mat.Texture, to:h3d.mat.Texture):Bool {
  1258. if( from.t == null || from.format != to.format || from.width != to.width || from.height != to.height || from.layerCount != to.layerCount )
  1259. return false;
  1260. if( to.t == null ) {
  1261. var prev = from.lastFrame;
  1262. from.preventAutoDispose();
  1263. to.alloc();
  1264. from.lastFrame = prev;
  1265. if( from.t == null ) throw "assert";
  1266. if( to.t == null ) return false;
  1267. }
  1268. transition(from.t, COPY_SOURCE);
  1269. transition(to.t, COPY_DEST);
  1270. var dst = new TextureCopyLocation();
  1271. var src = new TextureCopyLocation();
  1272. dst.res = to.t.res;
  1273. src.res = from.t.res;
  1274. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, null);
  1275. to.flags.set(WasCleared);
  1276. for( t in currentRenderTargets )
  1277. if( t == to || t == from ) {
  1278. transition(t.t, RENDER_TARGET);
  1279. break;
  1280. }
  1281. return true;
  1282. }
  1283. // ----- PIPELINE UPDATE
  1284. override function uploadShaderBuffers(buffers:h3d.shader.Buffers, which:h3d.shader.Buffers.BufferKind) {
  1285. uploadBuffers(buffers, buffers.vertex, which, currentShader.shader.vertex, currentShader.vertexRegisters);
  1286. uploadBuffers(buffers, buffers.fragment, which, currentShader.shader.fragment, currentShader.fragmentRegisters);
  1287. }
  1288. function calcCBVSize( dataSize : Int ) {
  1289. // the view must be a mult of 256
  1290. var sz = dataSize & ~0xFF;
  1291. if( sz != dataSize ) sz += 0x100;
  1292. return sz;
  1293. }
  1294. function allocDynamicBuffer( data : hl.Bytes, dataSize : Int ) {
  1295. var b = frame.availableBuffers, prev = null;
  1296. var tmpBuf = null;
  1297. var size = calcCBVSize(dataSize);
  1298. if ( size == 0 ) size = 1;
  1299. while( b != null ) {
  1300. if( b.size >= size && b.size < size << 1 ) {
  1301. tmpBuf = b.buffer;
  1302. if( prev == null )
  1303. frame.availableBuffers = b.next;
  1304. else
  1305. prev.next = b.next;
  1306. b.lastUse = frameCount;
  1307. b.next = frame.usedBuffers;
  1308. frame.usedBuffers = b;
  1309. break;
  1310. }
  1311. prev = b;
  1312. b = b.next;
  1313. }
  1314. if( tmpBuf == null ) {
  1315. tmpBuf = allocGPU(size, UPLOAD, GENERIC_READ);
  1316. var b = new TempBuffer();
  1317. b.buffer = tmpBuf;
  1318. b.size = size;
  1319. b.lastUse = frameCount;
  1320. b.next = frame.usedBuffers;
  1321. frame.usedBuffers = b;
  1322. }
  1323. var ptr = tmpBuf.map(0, null);
  1324. ptr.blit(0, data, 0, dataSize);
  1325. tmpBuf.unmap(0,null);
  1326. return tmpBuf;
  1327. }
  1328. function uploadBuffers( buffers : h3d.shader.Buffers, buf : h3d.shader.Buffers.ShaderBuffers, which:h3d.shader.Buffers.BufferKind, shader : hxsl.RuntimeShader.RuntimeShaderData, regs : ShaderRegisters ) {
  1329. switch( which ) {
  1330. case Params:
  1331. if( shader.paramsSize > 0 ) {
  1332. var data = hl.Bytes.getArray(buf.params.toData());
  1333. var dataSize = shader.paramsSize << 4;
  1334. if( regs.params & 0x100 != 0 ) {
  1335. // update CBV
  1336. var srv = frame.shaderResourceViews.alloc(1);
  1337. var cbv = allocDynamicBuffer(data,dataSize);
  1338. var desc = tmp.cbvDesc;
  1339. desc.bufferLocation = cbv.getGpuVirtualAddress();
  1340. desc.sizeInBytes = calcCBVSize(dataSize);
  1341. Driver.createConstantBufferView(desc, srv);
  1342. frame.commandList.setGraphicsRootDescriptorTable(regs.params & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1343. } else
  1344. frame.commandList.setGraphicsRoot32BitConstants(regs.params, dataSize >> 2, data, 0);
  1345. }
  1346. case Globals:
  1347. if( shader.globalsSize > 0 )
  1348. frame.commandList.setGraphicsRoot32BitConstants(regs.globals, shader.globalsSize << 2, hl.Bytes.getArray(buf.globals.toData()), 0);
  1349. case Textures:
  1350. if( regs.texturesCount > 0 ) {
  1351. var srv = frame.shaderResourceViews.alloc(regs.texturesCount);
  1352. var sampler = frame.samplerViews.alloc(regs.texturesCount);
  1353. for( i in 0...regs.texturesCount ) {
  1354. var t = buf.tex[i];
  1355. if( t == null || t.isDisposed() ) {
  1356. if( i < regs.textures2DCount ) {
  1357. var color = h3d.mat.Defaults.loadingTextureColor;
  1358. t = h3d.mat.Texture.fromColor(color, (color >>> 24) / 255);
  1359. } else {
  1360. t = h3d.mat.Texture.defaultCubeTexture();
  1361. }
  1362. }
  1363. if( t != null && t.t == null && t.realloc != null ) {
  1364. var s = currentShader;
  1365. t.alloc();
  1366. t.realloc();
  1367. if( hasDeviceError ) return;
  1368. if( s != currentShader ) {
  1369. // realloc triggered a shader change !
  1370. // we need to reset the original shader and reupload everything
  1371. currentShader = null;
  1372. selectShader(s.shader);
  1373. uploadShaderBuffers(buffers,Globals);
  1374. uploadShaderBuffers(buffers,Params);
  1375. uploadShaderBuffers(buffers,Textures);
  1376. return;
  1377. }
  1378. }
  1379. var tdesc : ShaderResourceViewDesc;
  1380. if( t.flags.has(Cube) ) {
  1381. var desc = tmp.texCubeSRV;
  1382. desc.format = t.t.format;
  1383. desc.mostDetailedMip = t.startingMip;
  1384. tdesc = desc;
  1385. } else if( t.flags.has(IsArray) ) {
  1386. var desc = tmp.tex2DArraySRV;
  1387. desc.format = t.t.format;
  1388. desc.arraySize = t.layerCount;
  1389. desc.mostDetailedMip = t.startingMip;
  1390. tdesc = desc;
  1391. } else if ( t.isDepth() ) {
  1392. var desc = tmp.tex2DSRV;
  1393. desc.format = R24_UNORM_X8_TYPELESS;
  1394. desc.mostDetailedMip = t.startingMip;
  1395. tdesc = desc;
  1396. } else {
  1397. var desc = tmp.tex2DSRV;
  1398. desc.format = t.t.format;
  1399. desc.mostDetailedMip = t.startingMip;
  1400. tdesc = desc;
  1401. }
  1402. t.lastFrame = frameCount;
  1403. var state = if ( t.isDepth() )
  1404. DEPTH_READ;
  1405. else if ( shader.kind == Fragment )
  1406. PIXEL_SHADER_RESOURCE
  1407. else
  1408. NON_PIXEL_SHADER_RESOURCE;
  1409. transition(t.t, state);
  1410. Driver.createShaderResourceView(t.t.res, tdesc, srv.offset(i * frame.shaderResourceViews.stride));
  1411. var desc = tmp.samplerDesc;
  1412. desc.filter = switch( [t.filter, t.mipMap] ) {
  1413. case [Nearest, None|Nearest]: MIN_MAG_MIP_POINT;
  1414. case [Nearest, Linear]: MIN_MAG_POINT_MIP_LINEAR;
  1415. case [Linear, None|Nearest]: MIN_MAG_LINEAR_MIP_POINT;
  1416. case [Linear, Linear]: MIN_MAG_MIP_LINEAR;
  1417. }
  1418. desc.addressU = desc.addressV = desc.addressW = switch( t.wrap ) {
  1419. case Clamp: CLAMP;
  1420. case Repeat: WRAP;
  1421. }
  1422. desc.mipLODBias = t.lodBias;
  1423. Driver.createSampler(desc, sampler.offset(i * frame.samplerViews.stride));
  1424. }
  1425. frame.commandList.setGraphicsRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(srv));
  1426. frame.commandList.setGraphicsRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(sampler));
  1427. }
  1428. case Buffers:
  1429. if( shader.bufferCount > 0 ) {
  1430. for( i in 0...shader.bufferCount ) {
  1431. var srv = frame.shaderResourceViews.alloc(1);
  1432. var b = buf.buffers[i];
  1433. var cbv = b.vbuf;
  1434. if( cbv.view != null )
  1435. throw "Buffer was allocated without UniformBuffer flag";
  1436. transition(cbv, VERTEX_AND_CONSTANT_BUFFER);
  1437. var desc = tmp.cbvDesc;
  1438. desc.bufferLocation = cbv.res.getGpuVirtualAddress();
  1439. desc.sizeInBytes = cbv.size;
  1440. Driver.createConstantBufferView(desc, srv);
  1441. frame.commandList.setGraphicsRootDescriptorTable(regs.buffers + i, frame.shaderResourceViews.toGPU(srv));
  1442. }
  1443. }
  1444. }
  1445. }
  1446. override function selectShader( shader : hxsl.RuntimeShader ) {
  1447. var sh = compiledShaders.get(shader.id);
  1448. if( sh == null ) {
  1449. sh = compileShader(shader);
  1450. compiledShaders.set(shader.id, sh);
  1451. }
  1452. if( currentShader == sh )
  1453. return false;
  1454. currentShader = sh;
  1455. needPipelineFlush = true;
  1456. frame.commandList.setGraphicsRootSignature(currentShader.rootSignature);
  1457. return true;
  1458. }
  1459. override function selectMaterial( pass : h3d.mat.Pass ) @:privateAccess {
  1460. needPipelineFlush = true;
  1461. pipelineSignature.setI32(PSIGN_MATID, pass.bits);
  1462. pipelineSignature.setUI8(PSIGN_COLOR_MASK, pass.colorMask);
  1463. var st = pass.stencil;
  1464. if( st != null ) {
  1465. pipelineSignature.setUI16(PSIGN_STENCIL_MASK, st.maskBits & 0xFFFF);
  1466. pipelineSignature.setI32(PSIGN_STENCIL_OPS, st.opBits);
  1467. if( curStencilRef != st.reference ) {
  1468. curStencilRef = st.reference;
  1469. frame.commandList.omSetStencilRef(st.reference);
  1470. }
  1471. } else {
  1472. pipelineSignature.setUI16(PSIGN_STENCIL_MASK, 0);
  1473. pipelineSignature.setI32(PSIGN_STENCIL_OPS, 0);
  1474. }
  1475. }
  1476. override function selectBuffer(buffer:Buffer) {
  1477. var views = tmp.vertexViews;
  1478. var bview = buffer.vbuf.view;
  1479. var map = buffer.format.resolveMapping(currentShader.format);
  1480. var vbuf = buffer.vbuf;
  1481. for( i in 0...currentShader.inputCount ) {
  1482. var v = views[i];
  1483. var inf = map[i];
  1484. v.bufferLocation = bview.bufferLocation;
  1485. v.sizeInBytes = bview.sizeInBytes;
  1486. v.strideInBytes = bview.strideInBytes;
  1487. if( inf.offset >= 256 ) throw "assert";
  1488. pipelineSignature.setUI8(PSIGN_LAYOUT + i, inf.offset | inf.precision.toInt());
  1489. }
  1490. needPipelineFlush = true;
  1491. frame.commandList.iaSetVertexBuffers(0, currentShader.inputCount, views[0]);
  1492. }
  1493. override function selectMultiBuffers(formats:hxd.BufferFormat.MultiFormat,buffers:Array<h3d.Buffer>) {
  1494. var views = tmp.vertexViews;
  1495. var map = formats.resolveMapping(currentShader.format);
  1496. for( i in 0...map.length ) {
  1497. var v = views[i];
  1498. var inf = map[i];
  1499. var bview = @:privateAccess buffers[inf.bufferIndex].vbuf.view;
  1500. v.bufferLocation = bview.bufferLocation;
  1501. v.sizeInBytes = bview.sizeInBytes;
  1502. v.strideInBytes = bview.strideInBytes;
  1503. if( inf.offset >= 256 ) throw "assert";
  1504. pipelineSignature.setUI8(PSIGN_LAYOUT + i, inf.offset | inf.precision.toInt());
  1505. }
  1506. needPipelineFlush = true;
  1507. frame.commandList.iaSetVertexBuffers(0, map.length, views[0]);
  1508. }
  1509. static var CULL : Array<CullMode> = [NONE,BACK,FRONT,NONE];
  1510. static var BLEND_OP : Array<BlendOp> = [ADD,SUBTRACT,REV_SUBTRACT,MIN,MAX];
  1511. static var COMP : Array<ComparisonFunc> = [ALWAYS, NEVER, EQUAL, NOT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL];
  1512. static var BLEND : Array<Blend> = [
  1513. ONE,ZERO,SRC_ALPHA,SRC_COLOR,DEST_ALPHA,DEST_COLOR,INV_SRC_ALPHA,INV_SRC_COLOR,INV_DEST_ALPHA,INV_DEST_COLOR,
  1514. SRC1_COLOR,SRC1_ALPHA,INV_SRC1_COLOR,INV_SRC1_ALPHA,SRC_ALPHA_SAT
  1515. ];
  1516. static var BLEND_ALPHA : Array<Blend> = [
  1517. ONE,ZERO,SRC_ALPHA,SRC_ALPHA,DEST_ALPHA,DEST_ALPHA,INV_SRC_ALPHA,INV_SRC_ALPHA,INV_DEST_ALPHA,INV_DEST_ALPHA,
  1518. SRC1_ALPHA,SRC1_ALPHA,INV_SRC1_ALPHA,INV_SRC1_ALPHA,SRC_ALPHA_SAT,
  1519. ];
  1520. static var STENCIL_OP : Array<StencilOp> = [KEEP, ZERO, REPLACE, INCR_SAT, INCR, DECR_SAT, DECR, INVERT];
  1521. function makePipeline( shader : CompiledShader ) {
  1522. var p = shader.pipeline;
  1523. var passBits = pipelineSignature.getI32(PSIGN_MATID);
  1524. var colorMask = pipelineSignature.getUI8(PSIGN_COLOR_MASK);
  1525. var stencilMask = pipelineSignature.getUI16(PSIGN_STENCIL_MASK);
  1526. var stencilOp = pipelineSignature.getI32(PSIGN_STENCIL_OPS);
  1527. var csrc = Pass.getBlendSrc(passBits);
  1528. var cdst = Pass.getBlendDst(passBits);
  1529. var asrc = Pass.getBlendAlphaSrc(passBits);
  1530. var adst = Pass.getBlendAlphaDst(passBits);
  1531. var cop = Pass.getBlendOp(passBits);
  1532. var aop = Pass.getBlendAlphaOp(passBits);
  1533. var dw = Pass.getDepthWrite(passBits);
  1534. var cmp = Pass.getDepthTest(passBits);
  1535. var cull = Pass.getCulling(passBits);
  1536. var wire = Pass.getWireframe(passBits);
  1537. if( wire != 0 ) cull = 0;
  1538. var rtCount = currentRenderTargets.length;
  1539. if( rtCount == 0 ) rtCount = 1;
  1540. p.numRenderTargets = rtCount;
  1541. p.rasterizerState.cullMode = CULL[cull];
  1542. p.rasterizerState.fillMode = wire == 0 ? SOLID : WIREFRAME;
  1543. p.depthStencilDesc.depthEnable = cmp != 0;
  1544. p.depthStencilDesc.depthWriteMask = dw == 0 || !depthEnabled ? ZERO : ALL;
  1545. p.depthStencilDesc.depthFunc = COMP[cmp];
  1546. var bl = p.blendState;
  1547. for( i in 0...rtCount ) {
  1548. var t = bl.renderTargets[i];
  1549. t.blendEnable = csrc != 0 || cdst != 1;
  1550. t.srcBlend = BLEND[csrc];
  1551. t.dstBlend = BLEND[cdst];
  1552. t.srcBlendAlpha = BLEND_ALPHA[asrc];
  1553. t.dstBlendAlpha = BLEND_ALPHA[adst];
  1554. t.blendOp = BLEND_OP[cop];
  1555. t.blendOpAlpha = BLEND_OP[aop];
  1556. t.renderTargetWriteMask = colorMask;
  1557. var t = currentRenderTargets[i];
  1558. p.rtvFormats[i] = t == null ? R8G8B8A8_UNORM : t.t.format;
  1559. }
  1560. p.dsvFormat = depthEnabled ? D24_UNORM_S8_UINT : UNKNOWN;
  1561. for( i in 0...shader.inputCount ) {
  1562. var d = shader.inputLayout[i];
  1563. var offset = pipelineSignature.getUI8(PSIGN_LAYOUT + i);
  1564. d.alignedByteOffset = offset & ~3;
  1565. d.format = @:privateAccess switch( [shader.format.inputs[i].type, new hxd.BufferFormat.Precision(offset&3)] ) {
  1566. case [DFloat, F32]: R32_FLOAT;
  1567. case [DFloat, F16]: R16_FLOAT;
  1568. case [DFloat, S8]: R8_SNORM;
  1569. case [DFloat, U8]: R8_UNORM;
  1570. case [DVec2, F32]: R32G32_FLOAT;
  1571. case [DVec2, F16]: R16G16_FLOAT;
  1572. case [DVec2, S8]: R8G8_SNORM;
  1573. case [DVec2, U8]: R8G8_UNORM;
  1574. case [DVec3, F32]: R32G32B32_FLOAT;
  1575. case [DVec3, F16]: R16G16B16A16_FLOAT; // padding
  1576. case [DVec3, S8]: R8G8B8A8_SNORM; // padding
  1577. case [DVec3, U8]: R8G8B8A8_UNORM; // padding
  1578. case [DVec4, F32]: R32G32B32A32_FLOAT;
  1579. case [DVec4, F16]: R16G16B16A16_FLOAT;
  1580. case [DVec4, S8]: R8G8B8A8_SNORM;
  1581. case [DVec4, U8]: R8G8B8A8_UNORM;
  1582. case [DBytes4, _]: R8G8B8A8_UINT;
  1583. default: throw "assert";
  1584. };
  1585. }
  1586. var stencil = stencilMask != 0 || stencilOp != 0;
  1587. var st = p.depthStencilDesc;
  1588. st.stencilEnable = stencil;
  1589. if( stencil ) {
  1590. var front = st.frontFace;
  1591. var back = st.backFace;
  1592. st.stencilReadMask = stencilMask & 0xFF;
  1593. st.stencilWriteMask = stencilMask >> 8;
  1594. front.stencilFunc = COMP[Stencil.getFrontTest(stencilOp)];
  1595. front.stencilPassOp = STENCIL_OP[Stencil.getFrontPass(stencilOp)];
  1596. front.stencilFailOp = STENCIL_OP[Stencil.getFrontSTfail(stencilOp)];
  1597. front.stencilDepthFailOp = STENCIL_OP[Stencil.getFrontDPfail(stencilOp)];
  1598. back.stencilFunc = COMP[Stencil.getBackTest(stencilOp)];
  1599. back.stencilPassOp = STENCIL_OP[Stencil.getBackPass(stencilOp)];
  1600. back.stencilFailOp = STENCIL_OP[Stencil.getBackSTfail(stencilOp)];
  1601. back.stencilDepthFailOp = STENCIL_OP[Stencil.getBackDPfail(stencilOp)];
  1602. }
  1603. return Driver.createGraphicsPipelineState(p);
  1604. }
  1605. function flushPipeline() {
  1606. if( !needPipelineFlush ) return;
  1607. needPipelineFlush = false;
  1608. var signature = pipelineSignature;
  1609. var signatureSize = PSIGN_LAYOUT + currentShader.inputCount;
  1610. adlerOut.setI32(0, 0);
  1611. hl.Format.digest(adlerOut, signature, signatureSize, 3);
  1612. var hash = adlerOut.getI32(0);
  1613. var pipes = currentShader.pipelines.get(hash);
  1614. if( pipes == null ) {
  1615. pipes = new hl.NativeArray(1);
  1616. currentShader.pipelines.set(hash, pipes);
  1617. }
  1618. var insert = -1;
  1619. for( i in 0...pipes.length ) {
  1620. var p = pipes[i];
  1621. if( p == null ) {
  1622. insert = i;
  1623. break;
  1624. }
  1625. if( p.size == signatureSize && p.bytes.compare(0, signature, 0, signatureSize) == 0 ) {
  1626. frame.commandList.setPipelineState(p.pipeline);
  1627. return;
  1628. }
  1629. }
  1630. var signatureBytes = @:privateAccess new haxe.io.Bytes(pipelineSignature, signatureSize);
  1631. if( insert < 0 ) {
  1632. var pipes2 = new hl.NativeArray(pipes.length + 1);
  1633. pipes2.blit(0, pipes, 0, insert);
  1634. currentShader.pipelines.set(hash, pipes2);
  1635. pipes = pipes2;
  1636. }
  1637. var cp = new CachedPipeline();
  1638. cp.bytes = signature.sub(0, signatureSize);
  1639. cp.size = signatureSize;
  1640. cp.pipeline = makePipeline(currentShader);
  1641. pipes[insert] = cp;
  1642. frame.commandList.setPipelineState(cp.pipeline);
  1643. }
  1644. // QUERIES
  1645. static inline var QUERY_COUNT = 128;
  1646. override function allocQuery( queryKind : QueryKind ) : Query {
  1647. if( queryKind != TimeStamp )
  1648. throw "Not implemented";
  1649. return new Query();
  1650. }
  1651. override function deleteQuery( q : Query ) {
  1652. // nothing to do
  1653. }
  1654. override function beginQuery( q : Query ) {
  1655. // nothing
  1656. }
  1657. override function endQuery( q : Query ) {
  1658. var heap = frame.queryHeaps[frame.queryCurrentHeap];
  1659. if( heap == null ) {
  1660. var desc = new QueryHeapDesc();
  1661. desc.type = TIMESTAMP;
  1662. desc.count = QUERY_COUNT;
  1663. heap = Driver.createQueryHeap(desc);
  1664. frame.queryHeaps[frame.queryCurrentHeap] = heap;
  1665. if( frame.queryBuffer != null ) {
  1666. frame.queryBuffer.release();
  1667. frame.queryBuffer = null;
  1668. }
  1669. }
  1670. q.offset = frame.queryHeapOffset++;
  1671. q.heap = frame.queryCurrentHeap;
  1672. frame.commandList.endQuery(heap, TIMESTAMP, q.offset);
  1673. frame.queriesPending.push(q);
  1674. if( frame.queryHeapOffset == QUERY_COUNT ) {
  1675. frame.queryHeapOffset = 0;
  1676. frame.queryCurrentHeap++;
  1677. }
  1678. }
  1679. override function queryResultAvailable( q : Query ) {
  1680. return q.heap < 0;
  1681. }
  1682. override function queryResult( q : Query ) {
  1683. return q.result;
  1684. }
  1685. function beginQueries() {
  1686. if( frame.queryBuffer == null || frame.queriesPending.length == 0 )
  1687. return;
  1688. var ptr : hl.BytesAccess<Int64> = frame.queryBuffer.map(0, null);
  1689. while( true ) {
  1690. var q = frame.queriesPending.pop();
  1691. if( q == null ) break;
  1692. if( q.heap >= 0 ) {
  1693. var position = q.heap * QUERY_COUNT + q.offset;
  1694. var v = ptr[position];
  1695. q.result = ((v / tsFreq).low + (v % tsFreq).low / tsFreq.low) * 1e9;
  1696. q.heap = -1;
  1697. }
  1698. }
  1699. frame.queryBuffer.unmap(0, null);
  1700. }
  1701. function flushQueries() {
  1702. if( frame.queryHeapOffset > 0 )
  1703. frame.queryCurrentHeap++;
  1704. if( frame.queryCurrentHeap == 0 )
  1705. return;
  1706. if( frame.queryBuffer == null )
  1707. frame.queryBuffer = allocGPU(frame.queryHeaps.length * QUERY_COUNT * 8, READBACK, COPY_DEST);
  1708. var position = 0;
  1709. for( i in 0...frame.queryCurrentHeap ) {
  1710. var count = i < frame.queryCurrentHeap - 1 ? QUERY_COUNT : frame.queryHeapOffset;
  1711. frame.commandList.resolveQueryData(frame.queryHeaps[i], TIMESTAMP, 0, count, frame.queryBuffer, position);
  1712. position += count * 8;
  1713. }
  1714. frame.queryCurrentHeap = 0;
  1715. frame.queryHeapOffset = 0;
  1716. }
  1717. // --- DRAW etc.
  1718. override function draw( ibuf : Buffer, startIndex : Int, ntriangles : Int ) {
  1719. flushPipeline();
  1720. if( currentIndex != ibuf ) {
  1721. currentIndex = ibuf;
  1722. frame.commandList.iaSetIndexBuffer(ibuf.vbuf.iview);
  1723. }
  1724. frame.commandList.drawIndexedInstanced(ntriangles * 3,1,startIndex,0,0);
  1725. flushResources();
  1726. }
  1727. override function drawInstanced(ibuf:Buffer, commands:InstanceBuffer) {
  1728. flushPipeline();
  1729. if( currentIndex != ibuf ) {
  1730. currentIndex = ibuf;
  1731. frame.commandList.iaSetIndexBuffer(ibuf.vbuf.iview);
  1732. }
  1733. if( commands.data != null ) {
  1734. frame.commandList.executeIndirect(indirectCommand, commands.commandCount, commands.data, 0, null, 0);
  1735. } else {
  1736. frame.commandList.drawIndexedInstanced(commands.indexCount, commands.commandCount, commands.startIndex, 0, 0);
  1737. }
  1738. flushResources();
  1739. }
  1740. function flushResources() {
  1741. if( frame.shaderResourceViews.available < 128 || frame.samplerViews.available < 64 ) {
  1742. frame.shaderResourceViews = frame.shaderResourceCache.next();
  1743. frame.samplerViews = frame.samplerCache.next();
  1744. var arr = tmp.descriptors2;
  1745. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  1746. arr[1] = @:privateAccess frame.samplerViews.heap;
  1747. frame.commandList.setDescriptorHeaps(arr);
  1748. }
  1749. }
  1750. function flushFrame( onResize : Bool = false ) {
  1751. flushQueries();
  1752. frame.commandList.close();
  1753. frame.commandList.execute();
  1754. currentShader = null;
  1755. Driver.flushMessages();
  1756. frame.fenceValue = fenceValue++;
  1757. Driver.signal(fence, frame.fenceValue);
  1758. }
  1759. override function present() {
  1760. transition(frame.backBuffer, PRESENT);
  1761. flushFrame();
  1762. Driver.present(window.vsync);
  1763. waitForFrame(Driver.getCurrentBackBufferIndex());
  1764. beginFrame();
  1765. if( hasDeviceError ) {
  1766. Sys.println("----------- OnContextLost ----------");
  1767. hasDeviceError = false;
  1768. dispose();
  1769. reset();
  1770. onContextLost();
  1771. }
  1772. }
  1773. function waitForFrame( index : Int ) {
  1774. var frame = frames[index];
  1775. if( fence.getValue() < frame.fenceValue ) {
  1776. fence.setEvent(frame.fenceValue, fenceEvent);
  1777. fenceEvent.wait(-1);
  1778. }
  1779. }
  1780. }
  1781. #end