DX12Driver.hx 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353
  1. package h3d.impl;
  2. #if (hldx && dx12)
  3. import h3d.impl.Driver;
  4. import dx.Dx12;
  5. import haxe.Int64;
  6. import h3d.mat.Pass;
  7. import h3d.mat.Stencil;
  8. private typedef Driver = Dx12;
  9. class TempBuffer {
  10. public var next : TempBuffer;
  11. public var buffer : GpuResource;
  12. public var size : Int;
  13. public var lastUse : Int;
  14. public function new() {
  15. }
  16. public inline function count() {
  17. var b = this;
  18. var k = 0;
  19. while( b != null ) {
  20. k++;
  21. b = b.next;
  22. }
  23. return k;
  24. }
  25. }
  26. class ManagedHeapArray {
  27. var heaps : Array<ManagedHeap>;
  28. var type : DescriptorHeapType;
  29. var size : Int;
  30. var cursor : Int;
  31. public function new(type,size) {
  32. this.type = type;
  33. this.size = size;
  34. heaps = [];
  35. }
  36. public function reset() {
  37. cursor = 0;
  38. }
  39. public function next() {
  40. var h = heaps[cursor++];
  41. if( h == null ) {
  42. h = new ManagedHeap(type, size);
  43. heaps.push(h);
  44. } else
  45. h.clear();
  46. return h;
  47. }
  48. }
  49. @:struct class BumpAllocation {
  50. public var resource : GpuResource = null;
  51. public var cpuAdress : hl.Bytes = null;
  52. public var offset : Int = 0;
  53. public var byteSize : Int = 0;
  54. public function new() {
  55. }
  56. }
  57. class BumpAllocator {
  58. var resource : GpuResource;
  59. var capacity : Int;
  60. var cpuAdress : hl.Bytes;
  61. var heap : HeapProperties;
  62. var offset : Int = 0;
  63. var next : BumpAllocator;
  64. public function new( size : Int ) {
  65. this.capacity = size;
  66. heap = new HeapProperties();
  67. var desc = new ResourceDesc();
  68. var flags = new haxe.EnumFlags();
  69. desc.dimension = BUFFER;
  70. desc.width = capacity;
  71. desc.height = 1;
  72. desc.depthOrArraySize = 1;
  73. desc.mipLevels = 1;
  74. desc.sampleDesc.count = 1;
  75. desc.layout = ROW_MAJOR;
  76. heap.type = UPLOAD;
  77. resource = Driver.createCommittedResource(heap, flags, desc, GENERIC_READ, null);
  78. cpuAdress = resource.map(0, null);
  79. }
  80. public function reset() {
  81. offset = 0;
  82. if ( next != null) {
  83. next.release();
  84. next = null;
  85. }
  86. }
  87. public function release() {
  88. resource.release();
  89. resource = null;
  90. offset = 0;
  91. capacity = 0;
  92. heap = null;
  93. cpuAdress = null;
  94. if ( next != null) {
  95. next.release();
  96. next = null;
  97. }
  98. }
  99. public inline function alloc( size : Int, alignment = 256, ?allocation : BumpAllocation ) {
  100. var sz = size & ~(alignment - 1);
  101. if( sz != size ) sz += alignment;
  102. if ( allocation == null )
  103. allocation = new BumpAllocation();
  104. return tryAlloc(sz, alignment, allocation);
  105. }
  106. function tryAlloc( size, alignment = 256, allocation : BumpAllocation ) {
  107. var offsetAligned = offset & ~(alignment - 1);
  108. if( offsetAligned != offset ) offsetAligned += alignment;
  109. var newOffset = size + offsetAligned;
  110. if ( newOffset > capacity ) {
  111. if ( next == null )
  112. next = new BumpAllocator(hxd.Math.imax(h3d.impl.DX12Driver.INITIAL_BUMP_ALLOCATOR_SIZE, size));
  113. return next.tryAlloc(size, alignment, allocation);
  114. }
  115. allocation.byteSize = size;
  116. allocation.offset = offsetAligned;
  117. allocation.cpuAdress = cpuAdress.offset(offsetAligned);
  118. allocation.resource = resource;
  119. offset = newOffset;
  120. return allocation;
  121. }
  122. }
  123. class DxFrame {
  124. public var backBuffer : ResourceData;
  125. public var backBufferView : Address;
  126. public var depthBuffer : GpuResource;
  127. public var allocator : CommandAllocator;
  128. public var commandList : CommandList;
  129. public var fenceValue : Int64;
  130. public var toRelease : Array<Resource> = [];
  131. public var tmpBufToNullify : Array<Texture> = [];
  132. public var tmpBufToRelease : Array<dx.Dx12.GpuResource> = [];
  133. public var shaderResourceViews : ManagedHeap;
  134. public var samplerViews : ManagedHeap;
  135. public var shaderResourceCache : ManagedHeapArray;
  136. public var samplerCache : ManagedHeapArray;
  137. public var availableBuffers : TempBuffer;
  138. public var usedBuffers : TempBuffer;
  139. public var queryHeaps : Array<QueryHeap> = [];
  140. public var queriesPending : Array<Query> = [];
  141. public var queryCurrentHeap : Int;
  142. public var queryHeapOffset : Int;
  143. public var queryBuffer : GpuResource;
  144. public var bumpAllocator : BumpAllocator;
  145. public function new() {
  146. }
  147. }
  148. class ShaderRegisters {
  149. public var globals : Int;
  150. public var params : Int;
  151. public var buffers : Int;
  152. public var textures : Int;
  153. public var samplers : Int;
  154. public var texturesCount : Int;
  155. public var texturesTypes : Array<hxsl.Ast.Type>;
  156. public var bufferTypes : Array<hxsl.Ast.BufferKind>;
  157. public var srv : Address;
  158. public var samplersView : Address;
  159. public var lastHeapCount : Int;
  160. public var lastTextures : Array<Texture> = [];
  161. public var lastTexturesBits : Array<Int>= [];
  162. public function new() {
  163. }
  164. }
  165. class CompiledShader {
  166. public var vertexRegisters : ShaderRegisters;
  167. public var fragmentRegisters : ShaderRegisters;
  168. public var format : hxd.BufferFormat;
  169. public var pipeline : GraphicsPipelineStateDesc;
  170. public var pipelines : PipelineCache<GraphicsPipelineState> = new PipelineCache();
  171. public var rootSignature : RootSignature;
  172. public var inputLayout : hl.CArray<InputElementDesc>;
  173. public var inputCount : Int;
  174. public var shader : hxsl.RuntimeShader;
  175. public var isCompute : Bool;
  176. public var computePipeline : ComputePipelineState;
  177. public function new() {
  178. }
  179. }
  180. @:struct class SrvArgs {
  181. public var res : GpuResource;
  182. @:packed public var resourceDesc : Tex2DSRV;
  183. @:packed public var samplerDesc : SamplerDesc;
  184. public var srvAddr : Address;
  185. public var samplerAddr : Address;
  186. }
  187. @:struct class TempObjects {
  188. public var renderTargets : hl.BytesAccess<Address>;
  189. public var depthStencils : hl.BytesAccess<Address>;
  190. public var vertexViews : hl.CArray<VertexBufferView>;
  191. public var descriptors2 : hl.NativeArray<DescriptorHeap>;
  192. public var barriers : hl.CArray<ResourceBarrier>;
  193. public var resourcesToTransition : Array<ResourceData>;
  194. public var maxBarriers : Int;
  195. public var barrierCount : Int;
  196. @:packed public var heap(default,null) : HeapProperties;
  197. @:packed public var barrier(default,null) : ResourceBarrier;
  198. @:packed public var clearColor(default,null) : ClearColor;
  199. @:packed public var clearValue(default,null) : ClearValue;
  200. @:packed public var viewport(default,null) : Viewport;
  201. @:packed public var rect(default,null) : Rect;
  202. @:packed public var bufferSRV(default,null) : BufferSRV;
  203. @:packed public var samplerDesc(default,null) : SamplerDesc;
  204. @:packed public var cbvDesc(default,null) : ConstantBufferViewDesc;
  205. @:packed public var rtvDesc(default,null) : RenderTargetViewDesc;
  206. @:packed public var uavDesc(default,null) : UAVBufferViewDesc;
  207. @:packed public var wtexDesc(default,null) : UAVTextureViewDesc;
  208. @:packed public var subResourceData(default, null) : SubResourceData;
  209. @:packed public var bumpAllocation(default,null) : BumpAllocation;
  210. public var pass : h3d.mat.Pass;
  211. public function new() {
  212. renderTargets = new hl.Bytes(8 * 8);
  213. depthStencils = new hl.Bytes(8);
  214. vertexViews = hl.CArray.alloc(VertexBufferView, 16);
  215. maxBarriers = 100;
  216. barriers = hl.CArray.alloc( ResourceBarrier, maxBarriers );
  217. resourcesToTransition = new Array<ResourceData>();
  218. resourcesToTransition.resize(maxBarriers);
  219. barrierCount = 0;
  220. pass = new h3d.mat.Pass("default");
  221. pass.stencil = new h3d.mat.Stencil();
  222. bufferSRV.dimension = BUFFER;
  223. bufferSRV.flags = RAW;
  224. bufferSRV.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  225. samplerDesc.comparisonFunc = NEVER;
  226. samplerDesc.maxLod = 1e30;
  227. descriptors2 = new hl.NativeArray(2);
  228. uavDesc.viewDimension = BUFFER;
  229. barrier.subResource = -1; // all
  230. }
  231. }
  232. class ManagedHeap {
  233. public var stride(default,null) : Int;
  234. var size : Int;
  235. var cursor : Int;
  236. var type : DescriptorHeapType;
  237. var heap : DescriptorHeap;
  238. var address : Address;
  239. var cpuToGpu : Int64;
  240. public var available(get,never) : Int;
  241. public function new(type,size=8) {
  242. this.type = type;
  243. this.stride = Driver.getDescriptorHandleIncrementSize(type);
  244. allocHeap(size);
  245. }
  246. function allocHeap( size : Int ) {
  247. var desc = new DescriptorHeapDesc();
  248. desc.type = type;
  249. desc.numDescriptors = size;
  250. if( type == CBV_SRV_UAV || type == SAMPLER )
  251. desc.flags = SHADER_VISIBLE;
  252. heap = new DescriptorHeap(desc);
  253. cursor = 0;
  254. this.size = size;
  255. address = heap.getHandle(false);
  256. cpuToGpu = desc.flags == SHADER_VISIBLE ? ( heap.getHandle(true).value - address.value ) : 0;
  257. }
  258. public dynamic function onFree( prev : DescriptorHeap ) {
  259. throw "Too many buffers";
  260. }
  261. public function alloc( count : Int ) {
  262. if( cursor + count > size ) {
  263. cursor = 0;
  264. var prev = heap;
  265. allocHeap((size * 3) >> 1);
  266. onFree(prev);
  267. }
  268. var pos = cursor;
  269. cursor += count;
  270. return address.offset(pos * stride);
  271. }
  272. inline function get_available() {
  273. return size - cursor;
  274. }
  275. public function clear() {
  276. cursor = 0;
  277. }
  278. public inline function toGPU( address : Address ) : Address {
  279. return new Address(address.value + cpuToGpu);
  280. }
  281. }
  282. class ResourceData {
  283. public var res : GpuResource;
  284. public var state : ResourceState;
  285. public var targetState : ResourceState;
  286. public function new() {
  287. }
  288. }
  289. class BufferData extends ResourceData {
  290. public var uploaded : Bool;
  291. }
  292. class VertexBufferData extends BufferData {
  293. public var view : dx.Dx12.VertexBufferView;
  294. public var iview : dx.Dx12.IndexBufferView;
  295. public var size : Int;
  296. }
  297. class TextureUploadBuffer {
  298. public var tmpBuf : dx.Dx12.GpuResource;
  299. public var lastMipMapUploadPerSide : hl.Bytes;
  300. public function new() {
  301. }
  302. }
  303. class TextureData extends ResourceData {
  304. public var format : DxgiFormat;
  305. public var color : h3d.Vector4;
  306. public var uploadBuffer : TextureUploadBuffer;
  307. var clearColorChanges : Int;
  308. public function setClearColor( c : h3d.Vector4 ) {
  309. var color = color;
  310. if( clearColorChanges > 10 || (color.r == c.r && color.g == c.g && color.b == c.b && color.a == c.a) )
  311. return false;
  312. clearColorChanges++;
  313. color.load(c);
  314. return true;
  315. }
  316. }
  317. class QueryData {
  318. public var heap : Int;
  319. public var offset : Int;
  320. public var result : Float;
  321. public function new() {
  322. }
  323. }
  324. class DX12Driver extends h3d.impl.Driver {
  325. var pipelineBuilder = new PipelineCache.PipelineBuilder();
  326. var driver : DriverInstance;
  327. var hasDeviceError = false;
  328. var window : dx.Window;
  329. var onContextLost : Void -> Void;
  330. var frames : Array<DxFrame>;
  331. var frame : DxFrame;
  332. var fence : Fence;
  333. var fenceEvent : WaitEvent;
  334. var renderTargetViews : ManagedHeap;
  335. var depthStenciViews : ManagedHeap;
  336. var indirectCommand : CommandSignature;
  337. var currentFrame : Int;
  338. var fenceValue : Int64 = 0;
  339. var currentPass : h3d.mat.Pass;
  340. var currentWidth : Int;
  341. var currentHeight : Int;
  342. var currentShader : CompiledShader;
  343. var compiledShaders : Map<Int,CompiledShader> = new Map();
  344. var compiler : ShaderCompiler;
  345. var currentIndex : Buffer;
  346. var tmp : TempObjects;
  347. var currentRenderTargets : Array<h3d.mat.Texture> = [];
  348. var defaultDepth : h3d.mat.Texture;
  349. var depthEnabled = true;
  350. var curStencilRef : Int = -1;
  351. var lastRtvDesc : RenderTargetViewDesc;
  352. var rtWidth : Int;
  353. var rtHeight : Int;
  354. var frameCount : Int;
  355. var tsFreq : haxe.Int64;
  356. var heapCount : Int;
  357. public static var INITIAL_RT_COUNT = 1024;
  358. public static var INITIAL_BUMP_ALLOCATOR_SIZE = 2 * 1024 * 1024;
  359. public static var BUFFER_COUNT = #if console 3 #else 2 #end;
  360. public static var DEVICE_NAME = null;
  361. public static var DEBUG = false; // requires dxil.dll when set to true
  362. public function new() {
  363. window = @:privateAccess dx.Window.windows[0];
  364. reset();
  365. }
  366. override function hasFeature(f:Feature) {
  367. return switch(f) {
  368. case Queries, BottomLeftCoords:
  369. false;
  370. default:
  371. true;
  372. };
  373. }
  374. override function isSupportedFormat(fmt:h3d.mat.Data.TextureFormat):Bool {
  375. return true;
  376. }
  377. function reset() {
  378. var flags = new DriverInitFlags();
  379. if( DEBUG ) flags.set(DriverInitFlag.DEBUG);
  380. driver = Driver.create(window, flags, DEVICE_NAME);
  381. frames = [];
  382. for(i in 0...BUFFER_COUNT) {
  383. var f = new DxFrame();
  384. f.backBuffer = new ResourceData();
  385. f.allocator = new CommandAllocator(DIRECT);
  386. f.commandList = new CommandList(DIRECT, f.allocator, null);
  387. f.commandList.close();
  388. f.shaderResourceCache = new ManagedHeapArray(CBV_SRV_UAV, 1024);
  389. f.samplerCache = new ManagedHeapArray(SAMPLER, 1024);
  390. if ( f.bumpAllocator != null )
  391. f.bumpAllocator.release();
  392. f.bumpAllocator = new BumpAllocator(INITIAL_BUMP_ALLOCATOR_SIZE);
  393. frames.push(f);
  394. }
  395. fence = new Fence(0, NONE);
  396. fenceEvent = new WaitEvent(false);
  397. tmp = new TempObjects();
  398. renderTargetViews = new ManagedHeap(RTV, INITIAL_RT_COUNT);
  399. depthStenciViews = new ManagedHeap(DSV, INITIAL_RT_COUNT);
  400. renderTargetViews.onFree = function(prev) frame.toRelease.push(prev);
  401. depthStenciViews.onFree = function(prev) frame.toRelease.push(prev);
  402. if ( h3d.Engine.getCurrent() != null ) {
  403. defaultDepth = new h3d.mat.Texture(0,0, Depth24Stencil8);
  404. defaultDepth.t = new TextureData();
  405. defaultDepth.t.state = defaultDepth.t.targetState = DEPTH_WRITE;
  406. defaultDepth.name = "defaultDepth";
  407. }
  408. var desc = new CommandSignatureDesc();
  409. var adesc = hl.CArray.alloc(IndirectArgumentDesc, 1);
  410. desc.byteStride = 5 * 4;
  411. desc.numArgumentDescs = 1;
  412. desc.argumentDescs = adesc;
  413. adesc[0].type = DRAW_INDEXED;
  414. indirectCommand = Driver.createCommandSignature(desc,null);
  415. tsFreq = Driver.getTimestampFrequency();
  416. compiler = new ShaderCompiler();
  417. resize(window.width, window.height);
  418. }
  419. function beginFrame() {
  420. frameCount = hxd.Timer.frameCount;
  421. heapCount++;
  422. currentFrame = Driver.getCurrentBackBufferIndex();
  423. var prevFrame = frame;
  424. frame = frames[currentFrame];
  425. defaultDepth.t.res = frame.depthBuffer;
  426. frame.allocator.reset();
  427. frame.commandList.reset(frame.allocator, null);
  428. frame.bumpAllocator.reset();
  429. while( frame.toRelease.length > 0 )
  430. frame.toRelease.pop().release();
  431. while( frame.tmpBufToRelease.length > 0 ) {
  432. var tmpBuf = frame.tmpBufToRelease.pop();
  433. if ( tmpBuf != null )
  434. tmpBuf.release();
  435. }
  436. if ( prevFrame != null ) {
  437. while ( prevFrame.tmpBufToNullify.length > 0 ) {
  438. var t = prevFrame.tmpBufToNullify.pop();
  439. t.uploadBuffer = null;
  440. }
  441. }
  442. beginQueries();
  443. var used = frame.usedBuffers;
  444. var b = frame.availableBuffers;
  445. var prev = null;
  446. while( b != null ) {
  447. if( b.lastUse < frameCount - 120 ) {
  448. b.buffer.release();
  449. b = b.next;
  450. } else {
  451. var n = b.next;
  452. b.next = used;
  453. used = b;
  454. b = n;
  455. }
  456. }
  457. frame.availableBuffers = used;
  458. frame.usedBuffers = null;
  459. transition(frame.backBuffer, RENDER_TARGET);
  460. frame.commandList.iaSetPrimitiveTopology(TRIANGLELIST);
  461. renderTargetViews.clear();
  462. depthStenciViews.clear();
  463. curStencilRef = -1;
  464. currentIndex = null;
  465. frame.backBufferView = renderTargetViews.alloc(1);
  466. Driver.createRenderTargetView(frame.backBuffer.res, null, frame.backBufferView);
  467. setRenderTarget(null);
  468. frame.shaderResourceCache.reset();
  469. frame.samplerCache.reset();
  470. frame.shaderResourceViews = frame.shaderResourceCache.next();
  471. frame.samplerViews = frame.samplerCache.next();
  472. var arr = tmp.descriptors2;
  473. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  474. arr[1] = @:privateAccess frame.samplerViews.heap;
  475. frame.commandList.setDescriptorHeaps(arr);
  476. }
  477. override function clear(?color:Vector4, ?depth:Float, ?stencil:Int) {
  478. if( color != null ) {
  479. var clear = tmp.clearColor;
  480. clear.r = color.r;
  481. clear.g = color.g;
  482. clear.b = color.b;
  483. clear.a = color.a;
  484. var count = currentRenderTargets.length;
  485. var needRebind = false;
  486. for( i in 0...count ) {
  487. var tex = currentRenderTargets[i];
  488. if( tex != null && tex.t.setClearColor(color) ) {
  489. needRebind = true;
  490. // update texture to use another clear value
  491. var prev = tex.t;
  492. tex.t = allocTexture(tex);
  493. @:privateAccess tex.t.clearColorChanges = prev.clearColorChanges;
  494. frame.toRelease.push(prev.res);
  495. Driver.createRenderTargetView(tex.t.res, lastRtvDesc, tmp.renderTargets[i]);
  496. }
  497. tex.flags.set(WasCleared);
  498. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  499. }
  500. if ( needRebind )
  501. frame.commandList.omSetRenderTargets(count, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(currentRenderTargets[0], currentRenderTargets[0].depthBuffer.t.state & DEPTH_WRITE == COMMON ) : null);
  502. // clear backbuffer
  503. if( count == 0 )
  504. frame.commandList.clearRenderTargetView(frame.backBufferView, clear);
  505. }
  506. if( depth != null || stencil != null )
  507. frame.commandList.clearDepthStencilView(tmp.depthStencils[0], depth != null ? (stencil != null ? BOTH : DEPTH) : STENCIL, (depth:Float), stencil);
  508. }
  509. function waitGpu() {
  510. Driver.signal(fence, fenceValue);
  511. fence.setEvent(fenceValue, fenceEvent);
  512. fenceEvent.wait(-1);
  513. fenceValue++;
  514. }
  515. override function resize(width:Int, height:Int) {
  516. if( defaultDepth == null || (currentWidth == width && currentHeight == height) )
  517. return;
  518. currentWidth = rtWidth = width;
  519. currentHeight = rtHeight = height;
  520. @:privateAccess defaultDepth.width = width;
  521. @:privateAccess defaultDepth.height = height;
  522. if( frame != null )
  523. flushFrame(true);
  524. waitGpu();
  525. for( f in frames ) {
  526. if( f.backBuffer.res != null )
  527. f.backBuffer.res.release();
  528. if( f.depthBuffer != null )
  529. f.depthBuffer.release();
  530. }
  531. Driver.resize(width, height, BUFFER_COUNT, R8G8B8A8_UNORM);
  532. renderTargetViews.clear();
  533. depthStenciViews.clear();
  534. for( i => f in frames ) {
  535. f.backBuffer.res = Driver.getBackBuffer(i);
  536. f.backBuffer.res.setName("Backbuffer#"+i);
  537. f.backBuffer.state = f.backBuffer.targetState = PRESENT;
  538. var desc = new ResourceDesc();
  539. var flags = new haxe.EnumFlags();
  540. desc.dimension = TEXTURE2D;
  541. desc.width = width;
  542. desc.height = height;
  543. desc.depthOrArraySize = 1;
  544. desc.mipLevels = 1;
  545. desc.sampleDesc.count = 1;
  546. desc.format = D24_UNORM_S8_UINT;
  547. desc.flags.set(ALLOW_DEPTH_STENCIL);
  548. tmp.heap.type = DEFAULT;
  549. tmp.clearValue.format = desc.format;
  550. tmp.clearValue.depth = 1;
  551. tmp.clearValue.stencil= 0;
  552. f.depthBuffer = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  553. f.depthBuffer.setName("Depthbuffer#"+i);
  554. }
  555. beginFrame();
  556. }
  557. override function begin(frame:Int) {
  558. }
  559. override function isDisposed() {
  560. return hasDeviceError;
  561. }
  562. override function init( onCreate : Bool -> Void, forceSoftware = false ) {
  563. onContextLost = onCreate.bind(true);
  564. haxe.Timer.delay(onCreate.bind(false), 1);
  565. }
  566. override function getDriverName(details:Bool) {
  567. var desc = "DX12";
  568. if( details ) desc += " "+Driver.getDeviceName();
  569. return desc;
  570. }
  571. public function forceDeviceError() {
  572. hasDeviceError = true;
  573. }
  574. function transition( res : ResourceData, to : ResourceState ) {
  575. if( res.targetState == to )
  576. return;
  577. // Cancel transition
  578. if ( res.state == to ) {
  579. var found = false;
  580. for (i in 0...tmp.barrierCount) {
  581. if (tmp.resourcesToTransition[i] == res) {
  582. tmp.barrierCount -= 1;
  583. for (j in i...tmp.barrierCount) {
  584. tmp.resourcesToTransition[j] = tmp.resourcesToTransition[j + 1];
  585. }
  586. found = true;
  587. break;
  588. }
  589. }
  590. if (!found)
  591. throw "Resource not found";
  592. res.targetState = to;
  593. return;
  594. }
  595. if( tmp.maxBarriers == tmp.barrierCount) {
  596. flushTransitions();
  597. tmp.maxBarriers += 100;
  598. tmp.barriers = hl.CArray.alloc(ResourceBarrier, tmp.maxBarriers);
  599. tmp.resourcesToTransition = new Array<ResourceData>();
  600. tmp.resourcesToTransition.resize(tmp.maxBarriers);
  601. }
  602. // If state is different from targetState, a barrier has already been requested so we just have to update the targetState
  603. if (res.state == res.targetState)
  604. tmp.resourcesToTransition[tmp.barrierCount++] = res;
  605. res.targetState = to;
  606. }
  607. function flushTransitions() {
  608. if (tmp.barrierCount > 0) {
  609. var totalBarrier = 0;
  610. for (i in 0...tmp.barrierCount) {
  611. var res = tmp.resourcesToTransition[i];
  612. // Resource has been disposed
  613. if (res.res == null)
  614. continue;
  615. var b = tmp.barriers[totalBarrier];
  616. b.resource = res.res;
  617. b.stateBefore = res.state;
  618. b.stateAfter = res.targetState;
  619. res.state = res.targetState;
  620. totalBarrier++;
  621. }
  622. if (totalBarrier > 0)
  623. #if (hldx >= version("1.15.0"))
  624. frame.commandList.resourceBarriers(tmp.barriers, totalBarrier);
  625. #else
  626. for (i in 0...totalBarrier)
  627. frame.commandList.resourceBarrier(tmp.barriers[i]);
  628. #end
  629. tmp.barrierCount = 0;
  630. }
  631. }
  632. function getDepthViewFromTexture( tex : h3d.mat.Texture, readOnly : Bool ) {
  633. if ( tex != null && tex.depthBuffer == null ) {
  634. depthEnabled = false;
  635. return null;
  636. }
  637. if ( tex != null ) {
  638. var w = tex.depthBuffer.width;
  639. var h = tex.depthBuffer.height;
  640. if( w != tex.width || h != tex.height )
  641. throw "Depth size mismatch";
  642. }
  643. return getDepthView(tex == null ? null : tex.depthBuffer, readOnly);
  644. }
  645. function getDepthView( depthBuffer : h3d.mat.Texture, readOnly : Bool ) {
  646. var res = depthBuffer == null ? frame.depthBuffer : depthBuffer.t.res;
  647. var depthView = depthStenciViews.alloc(1);
  648. var viewDesc = new DepthStencilViewDesc();
  649. viewDesc.arraySize = 1;
  650. viewDesc.mipSlice = 0;
  651. viewDesc.firstArraySlice = 0;
  652. viewDesc.format = (depthBuffer == null) ? D24_UNORM_S8_UINT : toDxgiDepthFormat(depthBuffer.format);
  653. viewDesc.viewDimension = TEXTURE2D;
  654. if ( readOnly ) {
  655. viewDesc.flags.set(READ_ONLY_DEPTH);
  656. viewDesc.flags.set(READ_ONLY_STENCIL);
  657. }
  658. Driver.createDepthStencilView(res, viewDesc, depthView);
  659. var depths = tmp.depthStencils;
  660. depths[0] = depthView;
  661. depthEnabled = true;
  662. if ( depthBuffer != null && (depthBuffer.t.state & ( DEPTH_READ | DEPTH_WRITE ) == COMMON) )
  663. transition(depthBuffer.t, readOnly ? DEPTH_READ : DEPTH_WRITE);
  664. return depths;
  665. }
  666. override function getDefaultDepthBuffer():h3d.mat.Texture {
  667. return defaultDepth;
  668. }
  669. function initViewport(w,h) {
  670. rtWidth = w;
  671. rtHeight = h;
  672. tmp.viewport.width = w;
  673. tmp.viewport.height = h;
  674. tmp.viewport.maxDepth = 1;
  675. tmp.rect.top = 0;
  676. tmp.rect.left = 0;
  677. tmp.rect.right = w;
  678. tmp.rect.bottom = h;
  679. frame.commandList.rsSetScissorRects(1, tmp.rect);
  680. frame.commandList.rsSetViewports(1, tmp.viewport);
  681. }
  682. override function setRenderTarget(tex:Null<h3d.mat.Texture>, layer:Int = 0, mipLevel:Int = 0, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  683. if( tex != null ) {
  684. if( tex.t == null ) tex.alloc();
  685. transition(tex.t, RENDER_TARGET);
  686. }
  687. depthEnabled = depthBinding != NotBound;
  688. var isArr = tex != null && (tex.flags.has(IsArray) || tex.flags.has(Cube));
  689. var desc = null;
  690. if( layer != 0 || mipLevel != 0 || isArr ) {
  691. desc = tmp.rtvDesc;
  692. desc.format = tex.t.format;
  693. if( isArr ) {
  694. desc.viewDimension = TEXTURE2DARRAY;
  695. desc.mipSlice = mipLevel;
  696. desc.firstArraySlice = layer;
  697. desc.arraySize = 1;
  698. desc.planeSlice = 0;
  699. } else {
  700. desc.viewDimension = TEXTURE2D;
  701. desc.mipSlice = mipLevel;
  702. desc.planeSlice = 0;
  703. }
  704. }
  705. lastRtvDesc = desc;
  706. if (tex != null) {
  707. var texView = renderTargetViews.alloc(1);
  708. Driver.createRenderTargetView(tex.t.res, desc, texView);
  709. tmp.renderTargets[0] = texView;
  710. }
  711. else {
  712. tmp.renderTargets[0] = frame.backBufferView;
  713. }
  714. flushTransitions();
  715. if ( tex != null && !tex.flags.has(WasCleared) ) {
  716. tex.flags.set(WasCleared);
  717. var clear = tmp.clearColor;
  718. clear.r = 0;
  719. clear.g = 0;
  720. clear.b = 0;
  721. clear.a = 0;
  722. frame.commandList.clearRenderTargetView(tmp.renderTargets[0], clear);
  723. }
  724. frame.commandList.omSetRenderTargets(1, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(tex, depthBinding == ReadOnly ) : null);
  725. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  726. if( tex != null ) currentRenderTargets.push(tex);
  727. var w = tex == null ? currentWidth : tex.width >> mipLevel;
  728. var h = tex == null ? currentHeight : tex.height >> mipLevel;
  729. if( w == 0 ) w = 1;
  730. if( h == 0 ) h = 1;
  731. initViewport(w, h);
  732. pipelineBuilder.setRenderTarget(tex, depthEnabled);
  733. }
  734. function toDxgiDepthFormat( format : hxd.PixelFormat ) {
  735. switch( format ) {
  736. case null:
  737. return cast 0;
  738. case Depth16:
  739. return D16_UNORM;
  740. case Depth24Stencil8, Depth24:
  741. return D24_UNORM_S8_UINT;
  742. case Depth32:
  743. return D32_FLOAT;
  744. default:
  745. throw "Unsupported depth format "+ format;
  746. }
  747. }
  748. override function setRenderTargets(textures:Array<h3d.mat.Texture>, depthBinding : h3d.Engine.DepthBinding = ReadWrite) {
  749. while( currentRenderTargets.length > textures.length )
  750. currentRenderTargets.pop();
  751. depthEnabled = depthBinding != NotBound;
  752. lastRtvDesc = null;
  753. var t0 = textures[0];
  754. var texViews = renderTargetViews.alloc(textures.length);
  755. for( i => t in textures ) {
  756. if ( t.t == null ) {
  757. t.alloc();
  758. if ( hasDeviceError ) return;
  759. }
  760. var view = texViews.offset(renderTargetViews.stride * i);
  761. Driver.createRenderTargetView(t.t.res, null, view);
  762. tmp.renderTargets[i] = view;
  763. currentRenderTargets[i] = t;
  764. transition( t.t, RENDER_TARGET);
  765. if ( !t.flags.has(WasCleared) ) {
  766. t.flags.set(WasCleared);
  767. var clear = tmp.clearColor;
  768. clear.r = 0;
  769. clear.g = 0;
  770. clear.b = 0;
  771. clear.a = 0;
  772. flushTransitions();
  773. frame.commandList.clearRenderTargetView(tmp.renderTargets[i], clear);
  774. }
  775. }
  776. flushTransitions();
  777. frame.commandList.omSetRenderTargets(textures.length, tmp.renderTargets, true, depthEnabled ? getDepthViewFromTexture(t0, depthBinding == ReadOnly) : null);
  778. initViewport(t0.width, t0.height);
  779. pipelineBuilder.setRenderTargets(textures, depthEnabled);
  780. }
  781. override function setDepth(depthBuffer : h3d.mat.Texture) {
  782. var view = getDepthView(depthBuffer, false);
  783. depthEnabled = true;
  784. flushTransitions();
  785. frame.commandList.omSetRenderTargets(0, null, true, view);
  786. while( currentRenderTargets.length > 0 ) currentRenderTargets.pop();
  787. initViewport(depthBuffer.width, depthBuffer.height);
  788. pipelineBuilder.setDepth(depthBuffer);
  789. }
  790. override function setRenderZone(x:Int, y:Int, width:Int, height:Int) {
  791. if( width < 0 && height < 0 && x == 0 && y == 0 ) {
  792. tmp.rect.left = 0;
  793. tmp.rect.top = 0;
  794. tmp.rect.right = rtWidth;
  795. tmp.rect.bottom = rtHeight;
  796. frame.commandList.rsSetScissorRects(1, tmp.rect);
  797. } else {
  798. tmp.rect.left = x;
  799. tmp.rect.top = y;
  800. tmp.rect.right = x + width;
  801. tmp.rect.bottom = y + height;
  802. frame.commandList.rsSetScissorRects(1, tmp.rect);
  803. }
  804. }
  805. override function captureRenderBuffer( pixels : hxd.Pixels ) {
  806. var rt = currentRenderTargets[0];
  807. if( rt == null )
  808. throw "Can't capture main render buffer in DirectX";
  809. captureTexPixels(pixels, rt, 0, 0);
  810. }
  811. override function capturePixels(tex:h3d.mat.Texture, layer:Int, mipLevel:Int, ?region:h2d.col.IBounds):hxd.Pixels {
  812. var pixels : hxd.Pixels;
  813. if (region != null) {
  814. if (region.xMax > tex.width) region.xMax = tex.width;
  815. if (region.yMax > tex.height) region.yMax = tex.height;
  816. if (region.xMin < 0) region.xMin = 0;
  817. if (region.yMin < 0) region.yMin = 0;
  818. var w = region.width >> mipLevel;
  819. var h = region.height >> mipLevel;
  820. if( w == 0 ) w = 1;
  821. if( h == 0 ) h = 1;
  822. pixels = hxd.Pixels.alloc(w, h, tex.format);
  823. captureTexPixels(pixels, tex, layer, mipLevel, region.xMin, region.yMin);
  824. } else {
  825. var w = tex.width >> mipLevel;
  826. var h = tex.height >> mipLevel;
  827. if( w == 0 ) w = 1;
  828. if( h == 0 ) h = 1;
  829. pixels = hxd.Pixels.alloc(w, h, tex.format);
  830. captureTexPixels(pixels, tex, layer, mipLevel);
  831. }
  832. return pixels;
  833. }
  834. function captureTexPixels( pixels: hxd.Pixels, tex:h3d.mat.Texture, layer:Int, mipLevel:Int, x : Int = 0, y : Int = 0) {
  835. if( pixels.width == 0 || pixels.height == 0 )
  836. return;
  837. var totalSize : hl.BytesAccess<Int64> = new hl.Bytes(8);
  838. var src = new TextureCopyLocation();
  839. src.res = tex.t.res;
  840. src.subResourceIndex = mipLevel + layer * tex.mipLevels;
  841. var srcDesc = makeTextureDesc(tex);
  842. var dst = new TextureCopyLocation();
  843. dst.type = PLACED_FOOTPRINT;
  844. Driver.getCopyableFootprints(srcDesc, src.subResourceIndex, 1, 0, dst.placedFootprint, null, null, totalSize);
  845. var desc = new ResourceDesc();
  846. var flags = new haxe.EnumFlags();
  847. desc.dimension = BUFFER;
  848. desc.width = totalSize[0];
  849. desc.height = 1;
  850. desc.depthOrArraySize = 1;
  851. desc.mipLevels = 1;
  852. desc.sampleDesc.count = 1;
  853. desc.layout = ROW_MAJOR;
  854. tmp.heap.type = READBACK;
  855. var tmpBuf = Driver.createCommittedResource(tmp.heap, flags, desc, COPY_DEST, null);
  856. var box = new Box();
  857. box.left = x;
  858. box.right = pixels.width;
  859. box.top = y;
  860. box.bottom = pixels.height;
  861. box.back = 1;
  862. transition(tex.t, COPY_SOURCE);
  863. flushTransitions();
  864. dst.res = tmpBuf;
  865. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, box);
  866. flushFrame();
  867. waitGpu();
  868. var output = tmpBuf.map(0, null);
  869. var stride = hxd.Pixels.calcStride(pixels.width, tex.format);
  870. var rowStride = dst.placedFootprint.footprint.rowPitch;
  871. if( rowStride == stride )
  872. (pixels.bytes:hl.Bytes).blit(pixels.offset, output, 0, stride * pixels.height);
  873. else {
  874. for( i in 0...pixels.height )
  875. (pixels.bytes:hl.Bytes).blit(pixels.offset + i * stride, output, i * rowStride, stride);
  876. }
  877. tmpBuf.unmap(0,null);
  878. tmpBuf.release();
  879. beginFrame();
  880. }
  881. // ---- SHADERS -----
  882. static var VERTEX_FORMATS = [null,null,R32G32_FLOAT,R32G32B32_FLOAT,R32G32B32A32_FLOAT];
  883. function getBinaryPayload( code : String ) {
  884. var bin = code.indexOf("//BIN=");
  885. if( bin >= 0 ) {
  886. var end = code.indexOf("#", bin);
  887. if( end >= 0 )
  888. return haxe.crypto.Base64.decode(code.substr(bin + 6, end - bin - 6));
  889. }
  890. if( shaderCache != null )
  891. return shaderCache.resolveShaderBinary(code);
  892. return null;
  893. }
  894. function compileSource( sh : hxsl.RuntimeShader.RuntimeShaderData, profile, baseRegister, rootStr = "" ) {
  895. var args = [];
  896. var out = new hxsl.HlslOut();
  897. out.baseRegister = baseRegister;
  898. if( sh.code == null ) {
  899. sh.code = out.run(sh.data);
  900. sh.code = rootStr + sh.code;
  901. }
  902. var bytes = getBinaryPayload(sh.code);
  903. if( bytes == null ) {
  904. return compiler.compile(sh.code, profile, args);
  905. }
  906. return bytes;
  907. }
  908. override function getNativeShaderCode( shader : hxsl.RuntimeShader ) {
  909. var out = new hxsl.HlslOut();
  910. var vsSource = out.run(shader.vertex.data);
  911. if( shader.mode == Compute )
  912. return vsSource;
  913. var out = new hxsl.HlslOut();
  914. var psSource = out.run(shader.fragment.data);
  915. return vsSource+"\n\n\n\n"+psSource;
  916. }
  917. function stringifyRootSignature( sign : RootSignatureDesc, name : String, params : hl.CArray<RootParameterDescriptorTable>, paramsCount : Int ) : String {
  918. var s = '#define ${name} "RootFlags(';
  919. if ( sign.flags.toInt() == 0 )
  920. s += '0'; // no flags
  921. else {
  922. // RootFlags
  923. for ( f in haxe.EnumTools.getConstructors(RootSignatureFlag) ) {
  924. if ( !sign.flags.has(haxe.EnumTools.createByName(RootSignatureFlag, f)) )
  925. continue;
  926. s += Std.string(f) + '|';
  927. }
  928. s = s.substr(0, s.length - 1);
  929. }
  930. s += ')",';
  931. for ( i in 0...paramsCount ) {
  932. var param = params[i];
  933. var vis = "SHADER_VISIBILITY_"+switch( param.shaderVisibility ) { case VERTEX: "VERTEX"; case PIXEL: "PIXEL"; default: "ALL"; };
  934. if ( param.parameterType == CONSTANTS ) {
  935. var p = unsafeCastTo(param, RootParameterConstants);
  936. var shaderRegister = p.shaderRegister;
  937. s += 'RootConstants(num32BitConstants=${p.num32BitValues},b${shaderRegister}, visibility=${vis}),';
  938. } else {
  939. try {
  940. var p = param;
  941. if( p.descriptorRanges == null ) continue;
  942. var descRange = p.descriptorRanges[0];
  943. var baseShaderRegister = descRange.baseShaderRegister;
  944. switch ( descRange.rangeType) {
  945. case CBV:
  946. s += 'DescriptorTable(CBV(b${baseShaderRegister}), visibility = ${vis}),';
  947. case SRV:
  948. s += 'DescriptorTable(SRV(t${baseShaderRegister},numDescriptors = ${descRange.numDescriptors}), visibility = ${vis}),';
  949. case SAMPLER:
  950. var baseShaderRegister = descRange.baseShaderRegister;
  951. s += 'DescriptorTable(Sampler(s${baseShaderRegister}, space=${descRange.registerSpace}, numDescriptors = ${descRange.numDescriptors}), visibility = ${vis}),';
  952. case UAV:
  953. var reg = descRange.baseShaderRegister;
  954. s += 'UAV(u${reg}, visibility = ${vis}),';
  955. }
  956. } catch ( e : Dynamic ) {
  957. continue;
  958. }
  959. }
  960. }
  961. s += '\n';
  962. return s;
  963. }
  964. inline function unsafeCastTo<T,R>( v : T, c : Class<R> ) : R {
  965. #if (hl_ver < version("1.14.0"))
  966. var arr = new hl.NativeArray<T>(1);
  967. arr[0] = v;
  968. return (cast arr : hl.NativeArray<R>)[0];
  969. #else
  970. return hl.Api.unsafeCast(v);
  971. #end
  972. }
  973. function computeRootSignature( shader : hxsl.RuntimeShader ) {
  974. var allocatedParams = 16;
  975. var params = hl.CArray.alloc(RootParameterDescriptorTable,allocatedParams);
  976. var paramsCount = 0, regCount = 0;
  977. var texDescs = [];
  978. var globalsParamsCBV = false;
  979. var vertexParamsCBV = false;
  980. var fragmentParamsCBV = false;
  981. function allocDescTable(vis) {
  982. var p = params[paramsCount++];
  983. p.parameterType = DESCRIPTOR_TABLE;
  984. p.numDescriptorRanges = 1;
  985. var rangeArr = hl.CArray.alloc(DescriptorRange,1);
  986. var range = rangeArr[0];
  987. texDescs.push(range);
  988. p.descriptorRanges = rangeArr;
  989. p.shaderVisibility = vis;
  990. return range;
  991. }
  992. function allocConsts(size,vis,type) {
  993. var reg = regCount++;
  994. if( size == 0 ) return -1;
  995. if( type != null ) {
  996. var pid = paramsCount;
  997. var r = allocDescTable(vis);
  998. r.rangeType = type;
  999. r.numDescriptors = 1;
  1000. r.baseShaderRegister = reg;
  1001. r.registerSpace = 0;
  1002. return pid | 0x100;
  1003. }
  1004. var pid = paramsCount++;
  1005. var p = unsafeCastTo(params[pid], RootParameterConstants);
  1006. p.parameterType = CONSTANTS;
  1007. p.shaderRegister = reg;
  1008. p.shaderVisibility = vis;
  1009. p.num32BitValues = size << 2;
  1010. return pid;
  1011. }
  1012. function allocParams( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  1013. var vis = switch( sh.kind ) {
  1014. case Vertex: VERTEX;
  1015. case Fragment: PIXEL;
  1016. default: ALL;
  1017. }
  1018. var regs = new ShaderRegisters();
  1019. regs.globals = allocConsts(sh.globalsSize, vis, globalsParamsCBV ? CBV : null);
  1020. regs.params = allocConsts(sh.paramsSize, vis, (sh.kind == Fragment ? fragmentParamsCBV : vertexParamsCBV) ? CBV : null);
  1021. regs.buffers = paramsCount;
  1022. if( sh.bufferCount > 0 ) {
  1023. regs.bufferTypes = [];
  1024. var p = sh.buffers;
  1025. while( p != null ) {
  1026. var kind = switch( p.type ) {
  1027. case TBuffer(_,_,kind): kind;
  1028. default: throw "assert";
  1029. }
  1030. regs.bufferTypes.push(kind);
  1031. allocConsts(1, vis, switch( kind ) {
  1032. case Uniform: CBV;
  1033. case RW: UAV;
  1034. default: throw "assert";
  1035. });
  1036. p = p.next;
  1037. }
  1038. }
  1039. if( sh.texturesCount > 0 ) {
  1040. regs.texturesCount = 0;
  1041. regs.texturesTypes = [];
  1042. var p = sh.data.vars;
  1043. for( v in sh.data.vars ) {
  1044. switch( v.type ) {
  1045. case TArray(t = TSampler(_) | TRWTexture(_), SConst(n)):
  1046. for( i in 0...n )
  1047. regs.texturesTypes.push(t);
  1048. if( t.match(TSampler(_)) )
  1049. regs.texturesCount += n;
  1050. else {
  1051. for( i in 0...n )
  1052. allocConsts(1, vis, UAV);
  1053. }
  1054. default:
  1055. }
  1056. }
  1057. if( regs.texturesCount > 0 ) {
  1058. regs.textures = paramsCount;
  1059. var r = allocDescTable(vis);
  1060. r.rangeType = SRV;
  1061. r.baseShaderRegister = 0;
  1062. r.registerSpace = 0;
  1063. r.numDescriptors = regs.texturesCount;
  1064. regs.samplers = paramsCount;
  1065. var r = allocDescTable(vis);
  1066. r.rangeType = SAMPLER;
  1067. r.baseShaderRegister = 0;
  1068. r.registerSpace = 0;
  1069. r.numDescriptors = regs.texturesCount;
  1070. }
  1071. }
  1072. return regs;
  1073. }
  1074. // Costs in units:
  1075. // Descriptor Tables cost 1 each
  1076. // Root CBVs cost 2 each
  1077. // Root SRVs cost 2 each
  1078. // Root UAVs cost 2 each
  1079. // Root Constants cost 1 per 32-bit value
  1080. function calcSize( sh : hxsl.RuntimeShader.RuntimeShaderData ) {
  1081. var s = (sh.globalsSize + sh.paramsSize) << 2;
  1082. // 1 descriptor table for all textures and 1 descriptor table for all samplers
  1083. s += ( sh.texturesCount > 0 ) ? 2 : 0;
  1084. // 1 descriptor table for all buffers
  1085. s += ( sh.bufferCount > 0 ) ? 1 : 0;
  1086. return s;
  1087. }
  1088. var totalVertex = calcSize(shader.vertex);
  1089. var totalFragment = shader.mode == Compute ? 0 : calcSize(shader.fragment);
  1090. var total = totalVertex + totalFragment;
  1091. if( total > 64 ) {
  1092. var vertexParamSizeCost = (shader.vertex.paramsSize << 2);
  1093. var fragmentParamSizeCost = (shader.fragment.paramsSize << 2);
  1094. // Remove the size cost of the root constant and add one descriptor table.
  1095. var withoutVP = total - vertexParamSizeCost + 1;
  1096. var withoutFP = total - fragmentParamSizeCost + 1;
  1097. if( withoutVP <= 64 || ( withoutFP > 64 && withoutVP > 64 ) ) {
  1098. vertexParamsCBV = true;
  1099. total = withoutVP;
  1100. }
  1101. if( total > 64 ) {
  1102. fragmentParamsCBV = true;
  1103. total = total - fragmentParamSizeCost + 1;
  1104. }
  1105. if( total > 64 ) {
  1106. globalsParamsCBV = true;
  1107. var withoutGlobal = total - (shader.vertex.globalsSize << 2) - (shader.fragment.globalsSize << 2) + 2;
  1108. if ( withoutGlobal > 64 )
  1109. throw "Too many params. Should not be possible if every params fall into descriptor table.";
  1110. }
  1111. }
  1112. var regs = [];
  1113. for( s in shader.getShaders() )
  1114. regs.push({ start : regCount, registers : allocParams(s) });
  1115. if( paramsCount > allocatedParams )
  1116. throw "ASSERT : Too many parameters";
  1117. var sign = new RootSignatureDesc();
  1118. if( shader.mode == Compute ) {
  1119. sign.flags.set(DENY_PIXEL_SHADER_ROOT_ACCESS);
  1120. sign.flags.set(DENY_VERTEX_SHADER_ROOT_ACCESS);
  1121. } else
  1122. sign.flags.set(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT);
  1123. sign.flags.set(DENY_HULL_SHADER_ROOT_ACCESS);
  1124. sign.flags.set(DENY_DOMAIN_SHADER_ROOT_ACCESS);
  1125. sign.flags.set(DENY_GEOMETRY_SHADER_ROOT_ACCESS);
  1126. #if !xbogdk
  1127. sign.flags.set(DENY_AMPLIFICATION_SHADER_ROOT_ACCESS);
  1128. sign.flags.set(DENY_MESH_SHADER_ROOT_ACCESS);
  1129. #end
  1130. sign.numParameters = paramsCount;
  1131. sign.parameters = cast params;
  1132. return { sign : sign, registers : regs, params : params, paramsCount : paramsCount, texDescs : texDescs };
  1133. }
  1134. function compileShader( shader : hxsl.RuntimeShader ) : CompiledShader {
  1135. var res = computeRootSignature(shader);
  1136. var c = new CompiledShader();
  1137. var rootStr = stringifyRootSignature(res.sign, "ROOT_SIGNATURE", res.params, res.paramsCount);
  1138. var vs = shader.mode == Compute ? null : compileSource(shader.vertex, "vs_6_0", 0, rootStr);
  1139. var ps = shader.mode == Compute ? null : compileSource(shader.fragment, "ps_6_0", res.registers[1].start, rootStr);
  1140. var cs = shader.mode == Compute ? compileSource(shader.compute, "cs_6_0", 0, rootStr) : null;
  1141. var signSize = 0;
  1142. var signBytes = Driver.serializeRootSignature(res.sign, 1, signSize);
  1143. var sign = new RootSignature(signBytes,signSize);
  1144. c.rootSignature = sign;
  1145. c.shader = shader;
  1146. if( shader.mode == Compute ) {
  1147. c.isCompute = true;
  1148. var desc = new ComputePipelineStateDesc();
  1149. desc.rootSignature = sign;
  1150. desc.cs.shaderBytecode = cs;
  1151. desc.cs.bytecodeLength = cs.length;
  1152. c.computePipeline = Driver.createComputePipelineState(desc);
  1153. c.vertexRegisters = res.registers[0].registers;
  1154. return c;
  1155. }
  1156. c.vertexRegisters = res.registers[0].registers;
  1157. c.fragmentRegisters = res.registers[1].registers;
  1158. var inputs = [];
  1159. for( v in shader.vertex.data.vars )
  1160. switch( v.kind ) {
  1161. case Input: inputs.push(v);
  1162. default:
  1163. }
  1164. var inputLayout = hl.CArray.alloc(InputElementDesc, inputs.length);
  1165. var format : Array<hxd.BufferFormat.BufferInput> = [];
  1166. for( i => v in inputs ) {
  1167. var d = inputLayout[i];
  1168. var perInst = 0;
  1169. if( v.qualifiers != null )
  1170. for( q in v.qualifiers )
  1171. switch( q ) {
  1172. case PerInstance(k): perInst = k;
  1173. default:
  1174. }
  1175. d.semanticName = @:privateAccess hxsl.HlslOut.semanticName(v.name).toUtf8();
  1176. d.inputSlot = i;
  1177. format.push({ name : v.name, type : hxd.BufferFormat.InputFormat.fromHXSL(v.type) });
  1178. if( perInst > 0 ) {
  1179. d.inputSlotClass = PER_INSTANCE_DATA;
  1180. d.instanceDataStepRate = perInst;
  1181. } else
  1182. d.inputSlotClass = PER_VERTEX_DATA;
  1183. }
  1184. var p = new GraphicsPipelineStateDesc();
  1185. p.rootSignature = sign;
  1186. p.vs.bytecodeLength = vs.length;
  1187. p.vs.shaderBytecode = vs;
  1188. p.ps.bytecodeLength = ps.length;
  1189. p.ps.shaderBytecode = ps;
  1190. p.rasterizerState.fillMode = SOLID;
  1191. p.rasterizerState.cullMode = NONE;
  1192. p.primitiveTopologyType = TRIANGLE;
  1193. p.numRenderTargets = 1;
  1194. p.rtvFormats[0] = R8G8B8A8_UNORM;
  1195. p.dsvFormat = UNKNOWN;
  1196. p.sampleDesc.count = 1;
  1197. p.sampleMask = -1;
  1198. p.inputLayout.inputElementDescs = inputLayout;
  1199. p.inputLayout.numElements = inputs.length;
  1200. //Driver.createGraphicsPipelineState(p);
  1201. c.format = hxd.BufferFormat.make(format);
  1202. c.pipeline = p;
  1203. c.inputLayout = inputLayout;
  1204. c.inputCount = inputs.length;
  1205. for( i in 0...inputs.length )
  1206. inputLayout[i].alignedByteOffset = 1; // will trigger error if not set in makePipeline()
  1207. return c;
  1208. }
  1209. function disposeResource( r : ResourceData ) {
  1210. frame.toRelease.push(r.res);
  1211. r.res = null;
  1212. r.state = r.targetState = PRESENT;
  1213. }
  1214. // ----- BUFFERS
  1215. function allocGPU( size : Int, heapType, state, uav=false ) {
  1216. var desc = new ResourceDesc();
  1217. var flags = new haxe.EnumFlags();
  1218. desc.dimension = BUFFER;
  1219. desc.width = size;
  1220. desc.height = 1;
  1221. desc.depthOrArraySize = 1;
  1222. desc.mipLevels = 1;
  1223. desc.sampleDesc.count = 1;
  1224. desc.layout = ROW_MAJOR;
  1225. if( uav ) desc.flags.set(ALLOW_UNORDERED_ACCESS);
  1226. tmp.heap.type = heapType;
  1227. return Driver.createCommittedResource(tmp.heap, flags, desc, state, null);
  1228. }
  1229. override function allocBuffer( m : h3d.Buffer ) : GPUBuffer {
  1230. var buf = new VertexBufferData();
  1231. var size = m.getMemSize();
  1232. var bufSize = m.flags.has(UniformBuffer) || m.flags.has(ReadWriteBuffer) ? calcCBVSize(size) : size;
  1233. buf.state = buf.targetState = COPY_DEST;
  1234. buf.res = allocGPU(bufSize, DEFAULT, COMMON, m.flags.has(ReadWriteBuffer));
  1235. if( m.flags.has(UniformBuffer) ) {
  1236. // no view
  1237. } else if( m.flags.has(IndexBuffer) ) {
  1238. var view = new IndexBufferView();
  1239. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1240. view.format = m.format.strideBytes == 4 ? R32_UINT : R16_UINT;
  1241. view.sizeInBytes = size;
  1242. buf.iview = view;
  1243. } else {
  1244. var view = new VertexBufferView();
  1245. view.bufferLocation = buf.res.getGpuVirtualAddress();
  1246. view.sizeInBytes = size;
  1247. view.strideInBytes = m.format.strideBytes;
  1248. buf.view = view;
  1249. }
  1250. buf.size = bufSize;
  1251. buf.uploaded = m.flags.has(Dynamic);
  1252. return buf;
  1253. }
  1254. override function allocInstanceBuffer(b:InstanceBuffer, bytes:haxe.io.Bytes) {
  1255. var dataSize = b.commandCount * 5 * 4;
  1256. var buf = new VertexBufferData();
  1257. buf.state = buf.targetState = COPY_DEST;
  1258. buf.res = allocGPU(dataSize, DEFAULT, COMMON);
  1259. var alloc = allocDynamicBuffer(bytes, dataSize);
  1260. frame.commandList.copyBufferRegion(buf.res, 0, alloc.resource, alloc.offset, dataSize);
  1261. b.data = buf;
  1262. }
  1263. override function disposeBuffer(v:Buffer) {
  1264. disposeResource(v.vbuf);
  1265. }
  1266. override function disposeInstanceBuffer(b:InstanceBuffer) {
  1267. frame.toRelease.push((b.data.res:GpuResource));
  1268. // disposeResource(b.data);
  1269. b.data = null;
  1270. }
  1271. function updateBuffer( b : BufferData, bytes : hl.Bytes, startByte : Int, bytesCount : Int ) {
  1272. var alloc = allocDynamicBuffer(bytes, bytesCount);
  1273. frame.commandList.copyBufferRegion(b.res, startByte, alloc.resource, alloc.offset, bytesCount);
  1274. }
  1275. override function uploadIndexData(i:Buffer, startIndice:Int, indiceCount:Int, buf:hxd.IndexBuffer, bufPos:Int) {
  1276. var bits = i.format.strideBytes >> 1;
  1277. transition(i.vbuf, COPY_DEST);
  1278. flushTransitions();
  1279. updateBuffer(i.vbuf, hl.Bytes.getArray(buf.getNative()).offset(bufPos << bits), startIndice << bits, indiceCount << bits);
  1280. transition(i.vbuf, INDEX_BUFFER);
  1281. }
  1282. override function uploadBufferData(b:Buffer, startVertex:Int, vertexCount:Int, buf:hxd.FloatBuffer, bufPos:Int) {
  1283. var data = hl.Bytes.getArray(buf.getNative()).offset(bufPos<<2);
  1284. transition(b.vbuf, COPY_DEST);
  1285. flushTransitions();
  1286. updateBuffer(b.vbuf, data, startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1287. transition(b.vbuf, b.flags.has(IndexBuffer) ? INDEX_BUFFER : ((b.flags.has(ReadWriteBuffer)) ? UNORDERED_ACCESS : VERTEX_AND_CONSTANT_BUFFER));
  1288. }
  1289. override function uploadBufferBytes(b:Buffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
  1290. transition(b.vbuf, COPY_DEST);
  1291. flushTransitions();
  1292. updateBuffer(b.vbuf, @:privateAccess buf.b.offset(bufPos), startVertex * b.format.strideBytes, vertexCount * b.format.strideBytes);
  1293. transition(b.vbuf, b.flags.has(IndexBuffer) ? INDEX_BUFFER : ((b.flags.has(ReadWriteBuffer)) ? UNORDERED_ACCESS : VERTEX_AND_CONSTANT_BUFFER));
  1294. }
  1295. // ------------ TEXTURES -------
  1296. function getTextureFormat( t : h3d.mat.Texture ) : DxgiFormat {
  1297. return switch( t.format ) {
  1298. case RGBA: R8G8B8A8_UNORM;
  1299. case RGBA16F: R16G16B16A16_FLOAT;
  1300. case RGBA32F: R32G32B32A32_FLOAT;
  1301. case R32F: R32_FLOAT;
  1302. case R16F: R16_FLOAT;
  1303. case R8: R8_UNORM;
  1304. case RG8: R8G8_UNORM;
  1305. case RG16F: R16G16_FLOAT;
  1306. case RG32F: R32G32_FLOAT;
  1307. case RGB32F: R32G32B32_FLOAT;
  1308. case RGB10A2: R10G10B10A2_UNORM;
  1309. case RG11B10UF: R11G11B10_FLOAT;
  1310. case SRGB_ALPHA: R8G8B8A8_UNORM_SRGB;
  1311. case R16U: R16_UNORM;
  1312. case RG16U: R16G16_UNORM;
  1313. case RGBA16U: R16G16B16A16_UNORM;
  1314. case S3TC(n):
  1315. switch( n ) {
  1316. case 1: BC1_UNORM;
  1317. case 2: BC2_UNORM;
  1318. case 3: BC3_UNORM;
  1319. case 4: BC4_UNORM;
  1320. case 5: BC5_UNORM;
  1321. case 6: BC6H_UF16;
  1322. case 7: BC7_UNORM;
  1323. default: throw "assert";
  1324. }
  1325. default: throw "Unsupported texture format " + t.format;
  1326. }
  1327. }
  1328. function makeTextureDesc(t:h3d.mat.Texture) {
  1329. var desc = new ResourceDesc();
  1330. desc.dimension = TEXTURE2D;
  1331. desc.width = t.width;
  1332. desc.height = t.height;
  1333. desc.depthOrArraySize = t.layerCount;
  1334. desc.mipLevels = t.mipLevels;
  1335. desc.sampleDesc.count = 1;
  1336. desc.format = getTextureFormat(t);
  1337. return desc;
  1338. }
  1339. override function allocTexture(t:h3d.mat.Texture):Texture {
  1340. if( t.format.match(S3TC(_)) && (t.width & 3 != 0 || t.height & 3 != 0) )
  1341. throw t+" is compressed "+t.width+"x"+t.height+" but should be a 4x4 multiple";
  1342. var isRT = t.flags.has(Target);
  1343. var flags = new haxe.EnumFlags();
  1344. var desc = makeTextureDesc(t);
  1345. var td = new TextureData();
  1346. td.format = desc.format;
  1347. tmp.heap.type = DEFAULT;
  1348. var clear = null;
  1349. if( isRT ) {
  1350. var color = t.t == null || t.t.color == null ? new h3d.Vector4(0,0,0,0) : t.t.color; // reuse prev color
  1351. desc.flags.set(ALLOW_RENDER_TARGET);
  1352. clear = tmp.clearValue;
  1353. clear.format = desc.format;
  1354. clear.color.r = color.r;
  1355. clear.color.g = color.g;
  1356. clear.color.b = color.b;
  1357. clear.color.a = color.a;
  1358. td.color = color;
  1359. }
  1360. if( t.flags.has(Writable) )
  1361. desc.flags.set(ALLOW_UNORDERED_ACCESS);
  1362. td.state = td.targetState = isRT ? RENDER_TARGET : COPY_DEST;
  1363. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, isRT ? RENDER_TARGET : COMMON, clear);
  1364. td.res.setName(t.name == null ? "Texture#"+t.id : t.name);
  1365. t.lastFrame = frameCount;
  1366. t.flags.unset(WasCleared);
  1367. return td;
  1368. }
  1369. override function allocDepthBuffer(b:h3d.mat.Texture):Texture {
  1370. var td = new TextureData();
  1371. var desc = new ResourceDesc();
  1372. var flags = new haxe.EnumFlags();
  1373. desc.dimension = TEXTURE2D;
  1374. desc.width = b.width;
  1375. desc.height = b.height;
  1376. desc.depthOrArraySize = 1;
  1377. desc.mipLevels = 1;
  1378. desc.sampleDesc.count = 1;
  1379. desc.format = toDxgiDepthFormat(b.format);
  1380. desc.flags.set(ALLOW_DEPTH_STENCIL);
  1381. #if console
  1382. desc.flags = new haxe.EnumFlags<ResourceFlag>( desc.flags.toInt() | 0x00800000 ); // FORCE_TEXTURE_COMPATIBILITY
  1383. #end
  1384. tmp.heap.type = DEFAULT;
  1385. tmp.clearValue.format = desc.format;
  1386. tmp.clearValue.depth = 1;
  1387. tmp.clearValue.stencil= 0;
  1388. td.state = td.targetState = DEPTH_WRITE;
  1389. td.res = Driver.createCommittedResource(tmp.heap, flags, desc, DEPTH_WRITE, tmp.clearValue);
  1390. return td;
  1391. }
  1392. override function disposeTexture(t:h3d.mat.Texture) {
  1393. disposeResource(t.t);
  1394. t.t = null;
  1395. }
  1396. override function disposeDepthBuffer(t:h3d.mat.Texture) {
  1397. disposeResource(t.t);
  1398. t.t = null;
  1399. }
  1400. override function uploadTextureBitmap(t:h3d.mat.Texture, bmp:hxd.BitmapData, mipLevel:Int, side:Int) {
  1401. var pixels = bmp.getPixels();
  1402. uploadTexturePixels(t, pixels, mipLevel, side);
  1403. pixels.dispose();
  1404. }
  1405. override function uploadTexturePixels(t:h3d.mat.Texture, pixels:hxd.Pixels, mipLevel:Int, side:Int) {
  1406. pixels.convert(t.format);
  1407. if( mipLevel >= t.mipLevels ) throw "Mip level outside texture range : " + mipLevel + " (max = " + (t.mipLevels - 1) + ")";
  1408. var offset : Int64 = 0;
  1409. if ( mipLevel != 0 )
  1410. offset += t.t.res.getRequiredIntermediateSize( 0, mipLevel );
  1411. if ( side != 0 )
  1412. offset += t.t.res.getRequiredIntermediateSize( 0, t.mipLevels ) * side;
  1413. var stride = @:privateAccess pixels.stride;
  1414. switch( t.format ) {
  1415. case S3TC(n): stride = pixels.width * ((n == 1 || n == 4) ? 2 : 4); // "uncompressed" stride ?
  1416. default:
  1417. }
  1418. var upd = tmp.subResourceData;
  1419. upd.data = (pixels.bytes:hl.Bytes).offset(pixels.offset);
  1420. upd.rowPitch = stride;
  1421. upd.slicePitch = pixels.dataSize;
  1422. var subRes = mipLevel + side * t.mipLevels;
  1423. var tmpSize = t.t.res.getRequiredIntermediateSize(subRes, 1).low;
  1424. var allocation = frame.bumpAllocator.alloc(tmpSize, 512, tmp.bumpAllocation);
  1425. transition(t.t, COPY_DEST);
  1426. flushTransitions();
  1427. if( !Driver.updateSubResource(frame.commandList, t.t.res, allocation.resource, allocation.offset, subRes, 1, upd) )
  1428. throw "Failed to update sub resource";
  1429. transition(t.t, PIXEL_SHADER_RESOURCE);
  1430. t.flags.set(WasCleared);
  1431. }
  1432. override function copyTexture(from:h3d.mat.Texture, to:h3d.mat.Texture):Bool {
  1433. if( from.t == null || from.format != to.format || from.width != to.width || from.height != to.height || from.layerCount != to.layerCount )
  1434. return false;
  1435. if( to.t == null ) {
  1436. var prev = from.lastFrame;
  1437. from.preventAutoDispose();
  1438. to.alloc();
  1439. from.lastFrame = prev;
  1440. if( from.t == null ) throw "assert";
  1441. if( to.t == null ) return false;
  1442. }
  1443. transition( from.t, COPY_SOURCE);
  1444. transition( to.t, COPY_DEST);
  1445. flushTransitions();
  1446. var dst = new TextureCopyLocation();
  1447. var src = new TextureCopyLocation();
  1448. dst.res = to.t.res;
  1449. src.res = from.t.res;
  1450. frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, null);
  1451. to.flags.set(WasCleared);
  1452. for( t in currentRenderTargets )
  1453. if( t == to || t == from ) {
  1454. transition( t.t, RENDER_TARGET );
  1455. break;
  1456. }
  1457. return true;
  1458. }
  1459. // ----- PIPELINE UPDATE
  1460. override function uploadShaderBuffers(buffers:h3d.shader.Buffers, which:h3d.shader.Buffers.BufferKind) {
  1461. uploadBuffers(buffers, buffers.vertex, which, currentShader.shader.vertex, currentShader.vertexRegisters);
  1462. if( !currentShader.isCompute )
  1463. uploadBuffers(buffers, buffers.fragment, which, currentShader.shader.fragment, currentShader.fragmentRegisters);
  1464. }
  1465. function calcCBVSize( dataSize : Int ) {
  1466. // the view must be a mult of 256
  1467. var sz = dataSize & ~0xFF;
  1468. if( sz != dataSize ) sz += 0x100;
  1469. return sz;
  1470. }
  1471. function allocDynamicBuffer( data : hl.Bytes, dataSize : Int ) : BumpAllocation {
  1472. var allocation = frame.bumpAllocator.alloc(dataSize, tmp.bumpAllocation);
  1473. allocation.cpuAdress.blit(0, data, 0, dataSize);
  1474. return allocation;
  1475. }
  1476. function hasBuffersTexturesChanged ( buf : h3d.shader.Buffers.ShaderBuffers, regs : ShaderRegisters ) : Bool {
  1477. var changed = regs.lastHeapCount != heapCount;
  1478. if( !changed ) {
  1479. for( i in 0...regs.texturesCount )
  1480. if( regs.lastTextures[i] != ( buf.tex[i] != null ? buf.tex[i].t : null ) || regs.lastTexturesBits[i] != ( buf.tex[i] != null ? buf.tex[i].bits : -1 ) ) {
  1481. changed = true;
  1482. break;
  1483. }
  1484. }
  1485. return changed;
  1486. }
  1487. var srvRingBuf : hl.CArray<SrvArgs>;
  1488. var srvHead : Int = 1;
  1489. var srvTail : Int = 0;
  1490. var srvThreadLaunched : Bool = false;
  1491. inline function computeSRVBufferDistance() : Int {
  1492. return (srvHead + (~(srvTail - 1 ) & 0xFF)) & 0xFF;
  1493. }
  1494. inline function processSRV() {
  1495. var index = (srvTail + 1) & 0xFF;
  1496. var args = srvRingBuf[index];
  1497. Driver.createShaderResourceView(args.res, args.resourceDesc, args.srvAddr);
  1498. Driver.createSampler(args.samplerDesc, args.samplerAddr);
  1499. srvTail = index;
  1500. }
  1501. function runThread() {
  1502. while(true) {
  1503. // Check if ring buffer is empty
  1504. if ( computeSRVBufferDistance() != 1 )
  1505. processSRV();
  1506. else
  1507. Sys.sleep(0);
  1508. }
  1509. }
  1510. function createSRV( t : h3d.mat.Texture, srvAddr : Address, samplerAddr : Address ) {
  1511. if (!srvThreadLaunched) {
  1512. srvThreadLaunched = true;
  1513. srvRingBuf = hl.CArray.alloc(SrvArgs, 256);
  1514. #if !console
  1515. sys.thread.Thread.create(runThread);
  1516. #end
  1517. }
  1518. // Check if ring buffer is full
  1519. while ( computeSRVBufferDistance() == 0 ) {};
  1520. var srvArgs = srvRingBuf[srvHead];
  1521. if( t.flags.has(Cube) ) {
  1522. var desc = unsafeCastTo(srvArgs.resourceDesc, TexCubeSRV);
  1523. desc.format = t.t.format;
  1524. desc.dimension = TEXTURECUBE;
  1525. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1526. desc.mostDetailedMip = t.startingMip;
  1527. desc.mipLevels = -1;
  1528. desc.resourceMinLODClamp = 0;
  1529. } else if( t.flags.has(IsArray) ) {
  1530. var desc = unsafeCastTo(srvArgs.resourceDesc, Tex2DArraySRV);
  1531. desc.format = t.t.format;
  1532. desc.dimension = TEXTURE2DARRAY;
  1533. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1534. desc.mostDetailedMip = t.startingMip;
  1535. desc.mipLevels = -1;
  1536. desc.firstArraySlice = 0;
  1537. desc.arraySize = t.layerCount;
  1538. desc.planeSlice = 0;
  1539. desc.resourceMinLODClamp = 0;
  1540. } else if ( t.isDepth() ) {
  1541. var desc = srvArgs.resourceDesc;
  1542. switch (t.format) {
  1543. case Depth16:
  1544. desc.format = R16_UNORM;
  1545. case Depth24, Depth24Stencil8:
  1546. desc.format = R24_UNORM_X8_TYPELESS;
  1547. case Depth32:
  1548. desc.format = R32_FLOAT;
  1549. default:
  1550. throw "Unsupported depth format "+ t.format;
  1551. }
  1552. desc.dimension = TEXTURE2D;
  1553. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1554. desc.mostDetailedMip = t.startingMip;
  1555. desc.mipLevels = -1;
  1556. desc.planeSlice = 0;
  1557. desc.resourceMinLODClamp = 0;
  1558. } else {
  1559. var desc = srvArgs.resourceDesc;
  1560. desc.format = t.t.format;
  1561. desc.dimension = TEXTURE2D;
  1562. desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
  1563. desc.mostDetailedMip = t.startingMip;
  1564. desc.mipLevels = -1;
  1565. desc.planeSlice = 0;
  1566. desc.resourceMinLODClamp = 0;
  1567. }
  1568. var desc = srvArgs.samplerDesc;
  1569. desc.comparisonFunc = NEVER;
  1570. desc.maxLod = 1e30;
  1571. desc.filter = switch( [t.filter, t.mipMap] ) {
  1572. case [Nearest, None|Nearest]: MIN_MAG_MIP_POINT;
  1573. case [Nearest, Linear]: MIN_MAG_POINT_MIP_LINEAR;
  1574. case [Linear, None|Nearest]: MIN_MAG_LINEAR_MIP_POINT;
  1575. case [Linear, Linear]: MIN_MAG_MIP_LINEAR;
  1576. }
  1577. desc.addressU = desc.addressV = desc.addressW = switch( t.wrap ) {
  1578. case Clamp: CLAMP;
  1579. case Repeat: WRAP;
  1580. }
  1581. desc.mipLODBias = t.lodBias;
  1582. srvArgs.res = t.t.res;
  1583. srvArgs.srvAddr = srvAddr;
  1584. srvArgs.samplerAddr = samplerAddr;
  1585. srvHead = (srvHead + 1) & 0xFF;
  1586. #if console
  1587. processSRV();
  1588. #end
  1589. }
  1590. function uploadBuffers( buffers : h3d.shader.Buffers, buf : h3d.shader.Buffers.ShaderBuffers, which:h3d.shader.Buffers.BufferKind, shader : hxsl.RuntimeShader.RuntimeShaderData, regs : ShaderRegisters ) {
  1591. switch( which ) {
  1592. case Params:
  1593. if( shader.paramsSize > 0 ) {
  1594. var data = hl.Bytes.getArray(buf.params.toData());
  1595. var dataSize = shader.paramsSize << 4;
  1596. if( regs.params & 0x100 != 0 ) {
  1597. // update CBV
  1598. var srv = frame.shaderResourceViews.alloc(1);
  1599. var alloc = allocDynamicBuffer(data,dataSize);
  1600. var desc = tmp.cbvDesc;
  1601. desc.bufferLocation = alloc.resource.getGpuVirtualAddress() + alloc.offset;
  1602. desc.sizeInBytes = alloc.byteSize;
  1603. Driver.createConstantBufferView(desc, srv);
  1604. if( currentShader.isCompute )
  1605. frame.commandList.setComputeRootDescriptorTable(regs.params & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1606. else
  1607. frame.commandList.setGraphicsRootDescriptorTable(regs.params & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1608. } else if( currentShader.isCompute )
  1609. frame.commandList.setComputeRoot32BitConstants(regs.params, dataSize >> 2, data, 0);
  1610. else
  1611. frame.commandList.setGraphicsRoot32BitConstants(regs.params, dataSize >> 2, data, 0);
  1612. }
  1613. case Globals:
  1614. if( shader.globalsSize > 0 ) {
  1615. var data = hl.Bytes.getArray(buf.globals.toData());
  1616. var dataSize = shader.globalsSize << 4;
  1617. if( regs.globals & 0x100 != 0 ) {
  1618. // update CBV
  1619. var srv = frame.shaderResourceViews.alloc(1);
  1620. var alloc = allocDynamicBuffer(data,dataSize);
  1621. var desc = tmp.cbvDesc;
  1622. desc.bufferLocation = alloc.resource.getGpuVirtualAddress() + alloc.offset;
  1623. desc.sizeInBytes = alloc.byteSize;
  1624. Driver.createConstantBufferView(desc, srv);
  1625. if( currentShader.isCompute )
  1626. frame.commandList.setComputeRootDescriptorTable(regs.globals & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1627. else
  1628. frame.commandList.setGraphicsRootDescriptorTable(regs.globals & 0xFF, frame.shaderResourceViews.toGPU(srv));
  1629. } else if( currentShader.isCompute )
  1630. frame.commandList.setComputeRoot32BitConstants(regs.globals, dataSize >> 2, data, 0);
  1631. else
  1632. frame.commandList.setGraphicsRoot32BitConstants(regs.globals, dataSize >> 2, data, 0);
  1633. }
  1634. case Textures:
  1635. if( shader.texturesCount > 0 ) {
  1636. if ( hasBuffersTexturesChanged(buf, regs) ) {
  1637. regs.lastHeapCount = heapCount;
  1638. regs.srv = frame.shaderResourceViews.alloc(shader.texturesCount);
  1639. regs.samplersView = frame.samplerViews.alloc(shader.texturesCount);
  1640. if ( regs.lastTextures.length < shader.texturesCount ) {
  1641. regs.lastTextures.resize(shader.texturesCount);
  1642. regs.lastTexturesBits.resize(shader.texturesCount);
  1643. }
  1644. var regIndex = regs.buffers + shader.bufferCount;
  1645. var outIndex = 0;
  1646. for( i in 0...shader.texturesCount ) {
  1647. var t = buf.tex[i];
  1648. var pt = regs.texturesTypes[i];
  1649. if( t == null || t.isDisposed() ) {
  1650. switch ( pt ) {
  1651. case TSampler(TCube, false):
  1652. t = h3d.mat.Texture.defaultCubeTexture();
  1653. case TSampler(_, false):
  1654. var color = h3d.mat.Defaults.loadingTextureColor;
  1655. t = h3d.mat.Texture.fromColor(color, (color >>> 24) / 255);
  1656. default:
  1657. throw "Missing texture";
  1658. }
  1659. }
  1660. if( t != null && t.t == null && t.realloc != null ) {
  1661. var s = currentShader;
  1662. t.alloc();
  1663. t.realloc();
  1664. if( hasDeviceError ) return;
  1665. if( s != currentShader ) {
  1666. // realloc triggered a shader change !
  1667. // we need to reset the original shader and reupload everything
  1668. currentShader = null;
  1669. selectShader(s.shader);
  1670. uploadShaderBuffers(buffers,Globals);
  1671. uploadShaderBuffers(buffers,Params);
  1672. uploadShaderBuffers(buffers,Textures);
  1673. return;
  1674. }
  1675. }
  1676. regs.lastTextures[i] = buf.tex[i] != null ? buf.tex[i].t : null;
  1677. regs.lastTexturesBits[i] = buf.tex[i] != null ? buf.tex[i].bits : -1;
  1678. switch( pt ) {
  1679. case TRWTexture(dim,arr,chans):
  1680. var tdim : hxsl.Ast.TexDimension = t.flags.has(Cube) ? TCube : T2D;
  1681. var fmt;
  1682. if( (arr != t.flags.has(IsArray)) || dim != tdim )
  1683. throw "Texture format does not match: "+t+"["+t.format+"] should be "+hxsl.Ast.Tools.toString(pt);
  1684. var srv = frame.shaderResourceViews.alloc(1);
  1685. if( !t.flags.has(Writable) )
  1686. throw "Texture was allocated without Writable flag";
  1687. transition(t.t, UNORDERED_ACCESS);
  1688. var desc = tmp.wtexDesc;
  1689. desc.format = cast getTextureFormat(t);
  1690. desc.viewDimension = switch( [dim,arr] ) {
  1691. case [T1D, false]: TEXTURE1D;
  1692. case [T2D, false]: TEXTURE2D;
  1693. case [T3D, false]: TEXTURE3D;
  1694. case [T1D, true]: TEXTURE1DARRAY;
  1695. case [T2D, true]: TEXTURE2DARRAY;
  1696. default: throw "Unsupported RWTexture "+t;
  1697. }
  1698. desc.mipSlice = 0;
  1699. desc.planeSlice = 0;
  1700. if( arr ) {
  1701. desc.firstArraySlice = 0;
  1702. desc.arraySize = 1;
  1703. }
  1704. Driver.createUnorderedAccessView(t.t.res, null, desc, srv);
  1705. if( currentShader.isCompute )
  1706. frame.commandList.setComputeRootDescriptorTable(regIndex++, frame.shaderResourceViews.toGPU(srv));
  1707. else
  1708. frame.commandList.setGraphicsRootDescriptorTable(regIndex++, frame.shaderResourceViews.toGPU(srv));
  1709. continue;
  1710. default:
  1711. }
  1712. t.lastFrame = frameCount;
  1713. var state = if ( shader.kind == Fragment )
  1714. PIXEL_SHADER_RESOURCE;
  1715. else
  1716. NON_PIXEL_SHADER_RESOURCE;
  1717. transition(t.t, state);
  1718. createSRV(t, regs.srv.offset(outIndex * frame.shaderResourceViews.stride), regs.samplersView.offset(outIndex * frame.samplerViews.stride));
  1719. outIndex++;
  1720. }
  1721. }
  1722. else {
  1723. for( i in 0...regs.texturesCount ) {
  1724. var t = buf.tex[i];
  1725. if (t == null || t.t == null)
  1726. continue;
  1727. var state = if ( shader.kind == Fragment )
  1728. PIXEL_SHADER_RESOURCE;
  1729. else
  1730. NON_PIXEL_SHADER_RESOURCE;
  1731. transition(t.t, state);
  1732. }
  1733. }
  1734. if( regs.texturesCount > 0 ) {
  1735. if( currentShader.isCompute ) {
  1736. frame.commandList.setComputeRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
  1737. frame.commandList.setComputeRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(regs.samplersView));
  1738. } else {
  1739. frame.commandList.setGraphicsRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
  1740. frame.commandList.setGraphicsRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(regs.samplersView));
  1741. }
  1742. }
  1743. }
  1744. case Buffers:
  1745. if( shader.bufferCount > 0 ) {
  1746. for( i in 0...shader.bufferCount ) {
  1747. var srv = frame.shaderResourceViews.alloc(1);
  1748. var b = buf.buffers[i];
  1749. var cbv = b.vbuf;
  1750. switch( regs.bufferTypes[i] ) {
  1751. case Uniform:
  1752. if( cbv.view != null )
  1753. throw "Buffer was allocated without UniformBuffer flag";
  1754. transition(cbv, VERTEX_AND_CONSTANT_BUFFER);
  1755. var desc = tmp.cbvDesc;
  1756. desc.bufferLocation = cbv.res.getGpuVirtualAddress();
  1757. desc.sizeInBytes = cbv.size;
  1758. Driver.createConstantBufferView(desc, srv);
  1759. case RW:
  1760. if( !b.flags.has(ReadWriteBuffer) )
  1761. throw "Buffer was allocated without ReadWriteBuffer flag";
  1762. transition(cbv, UNORDERED_ACCESS);
  1763. var desc = tmp.uavDesc;
  1764. desc.numElements = b.vertices;
  1765. desc.structureSizeInBytes = b.format.strideBytes;
  1766. Driver.createUnorderedAccessView(cbv.res, null, desc, srv);
  1767. default:
  1768. throw "assert";
  1769. }
  1770. if( currentShader.isCompute )
  1771. frame.commandList.setComputeRootDescriptorTable(regs.buffers + i, frame.shaderResourceViews.toGPU(srv));
  1772. else
  1773. frame.commandList.setGraphicsRootDescriptorTable(regs.buffers + i, frame.shaderResourceViews.toGPU(srv));
  1774. }
  1775. }
  1776. }
  1777. }
  1778. override function selectShader( shader : hxsl.RuntimeShader ) {
  1779. var sh = compiledShaders.get(shader.id);
  1780. if( sh == null ) {
  1781. sh = compileShader(shader);
  1782. compiledShaders.set(shader.id, sh);
  1783. }
  1784. if( currentShader == sh )
  1785. return false;
  1786. currentShader = sh;
  1787. pipelineBuilder.setShader(shader);
  1788. if( sh.isCompute ) {
  1789. frame.commandList.setComputeRootSignature(currentShader.rootSignature);
  1790. frame.commandList.setPipelineState(currentShader.computePipeline);
  1791. } else {
  1792. frame.commandList.setGraphicsRootSignature(currentShader.rootSignature);
  1793. }
  1794. return true;
  1795. }
  1796. override function selectMaterial( pass : h3d.mat.Pass ) @:privateAccess {
  1797. pipelineBuilder.selectMaterial(pass);
  1798. var st = pass.stencil;
  1799. if( st != null && curStencilRef != st.reference ) {
  1800. curStencilRef = st.reference;
  1801. frame.commandList.omSetStencilRef(st.reference);
  1802. }
  1803. }
  1804. override function selectBuffer(buffer:Buffer) {
  1805. var views = tmp.vertexViews;
  1806. var bview = buffer.vbuf.view;
  1807. var map = buffer.format.resolveMapping(currentShader.format);
  1808. var vbuf = buffer.vbuf;
  1809. for( i in 0...currentShader.inputCount ) {
  1810. var v = views[i];
  1811. var inf = map[i];
  1812. v.bufferLocation = bview.bufferLocation;
  1813. v.sizeInBytes = bview.sizeInBytes;
  1814. v.strideInBytes = bview.strideInBytes;
  1815. if( inf.offset >= 256 ) throw "assert";
  1816. pipelineBuilder.setBuffer(i, inf, v.strideInBytes);
  1817. }
  1818. flushTransitions();
  1819. frame.commandList.iaSetVertexBuffers(0, currentShader.inputCount, views[0]);
  1820. }
  1821. override function selectMultiBuffers(formats:hxd.BufferFormat.MultiFormat,buffers:Array<h3d.Buffer>) {
  1822. var views = tmp.vertexViews;
  1823. var map = formats.resolveMapping(currentShader.format);
  1824. for( i in 0...map.length ) {
  1825. var v = views[i];
  1826. var inf = map[i];
  1827. var bview = @:privateAccess buffers[inf.bufferIndex].vbuf.view;
  1828. v.bufferLocation = bview.bufferLocation;
  1829. v.sizeInBytes = bview.sizeInBytes;
  1830. v.strideInBytes = bview.strideInBytes;
  1831. pipelineBuilder.setBuffer(i, inf, v.strideInBytes);
  1832. }
  1833. frame.commandList.iaSetVertexBuffers(0, map.length, views[0]);
  1834. }
  1835. static var CULL : Array<CullMode> = [NONE,BACK,FRONT,NONE];
  1836. static var BLEND_OP : Array<BlendOp> = [ADD,SUBTRACT,REV_SUBTRACT,MIN,MAX];
  1837. static var COMP : Array<ComparisonFunc> = [ALWAYS, NEVER, EQUAL, NOT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL];
  1838. static var BLEND : Array<Blend> = [
  1839. ONE,ZERO,SRC_ALPHA,SRC_COLOR,DEST_ALPHA,DEST_COLOR,INV_SRC_ALPHA,INV_SRC_COLOR,INV_DEST_ALPHA,INV_DEST_COLOR,
  1840. SRC1_COLOR,SRC1_ALPHA,INV_SRC1_COLOR,INV_SRC1_ALPHA,SRC_ALPHA_SAT
  1841. ];
  1842. static var BLEND_ALPHA : Array<Blend> = [
  1843. ONE,ZERO,SRC_ALPHA,SRC_ALPHA,DEST_ALPHA,DEST_ALPHA,INV_SRC_ALPHA,INV_SRC_ALPHA,INV_DEST_ALPHA,INV_DEST_ALPHA,
  1844. SRC1_ALPHA,SRC1_ALPHA,INV_SRC1_ALPHA,INV_SRC1_ALPHA,SRC_ALPHA_SAT,
  1845. ];
  1846. static var STENCIL_OP : Array<StencilOp> = [KEEP, ZERO, REPLACE, INCR_SAT, INCR, DECR_SAT, DECR, INVERT];
  1847. function makePipeline( shader : CompiledShader ) {
  1848. var p = shader.pipeline;
  1849. var pass = pipelineBuilder.getCurrentPass();
  1850. var depth = pipelineBuilder.getDepthProps();
  1851. if( pass.wireframe ) pass.culling = None;
  1852. var rtCount = currentRenderTargets.length;
  1853. if( rtCount == 0 ) rtCount = 1;
  1854. p.numRenderTargets = rtCount;
  1855. p.rasterizerState.cullMode = CULL[pass.culling.getIndex()];
  1856. p.rasterizerState.fillMode = pass.wireframe ? WIREFRAME : SOLID;
  1857. p.depthStencilDesc.depthEnable = pass.depthTest != Always;
  1858. p.depthStencilDesc.depthWriteMask = !pass.depthWrite || !depthEnabled ? ZERO : ALL;
  1859. p.depthStencilDesc.depthFunc = COMP[pass.depthTest.getIndex()];
  1860. p.rasterizerState.depthBias = Std.int(depth.bias);
  1861. p.rasterizerState.slopeScaledDepthBias = depth.slopeScaledBias;
  1862. p.rasterizerState.depthClipEnable = depth.clamp;
  1863. var bl = p.blendState;
  1864. for( i in 0...rtCount ) {
  1865. var t = bl.renderTargets[i];
  1866. t.blendEnable = pass.blendSrc != One || pass.blendDst != Zero;
  1867. t.srcBlend = BLEND[pass.blendSrc.getIndex()];
  1868. t.dstBlend = BLEND[pass.blendDst.getIndex()];
  1869. t.srcBlendAlpha = BLEND_ALPHA[pass.blendAlphaSrc.getIndex()];
  1870. t.dstBlendAlpha = BLEND_ALPHA[pass.blendAlphaDst.getIndex()];
  1871. t.blendOp = BLEND_OP[pass.blendOp.getIndex()];
  1872. t.blendOpAlpha = BLEND_OP[pass.blendAlphaOp.getIndex()];
  1873. t.renderTargetWriteMask = pass.colorMask;
  1874. var t = currentRenderTargets[i];
  1875. p.rtvFormats[i] = t == null ? R8G8B8A8_UNORM : t.t.format;
  1876. }
  1877. p.dsvFormat = toDxgiDepthFormat(depth.format);
  1878. for ( i in rtCount...8 )
  1879. p.rtvFormats[i] = DxgiFormat.UNKNOWN;
  1880. for( i in 0...shader.inputCount ) {
  1881. var d = shader.inputLayout[i];
  1882. var inf = pipelineBuilder.getBufferInput(i);
  1883. d.alignedByteOffset = inf.offset;
  1884. d.format = @:privateAccess switch( [shader.format.inputs[i].type, inf.precision] ) {
  1885. case [DFloat, F32]: R32_FLOAT;
  1886. case [DFloat, F16]: R16_FLOAT;
  1887. case [DFloat, S8]: R8_SNORM;
  1888. case [DFloat, U8]: R8_UNORM;
  1889. case [DVec2, F32]: R32G32_FLOAT;
  1890. case [DVec2, F16]: R16G16_FLOAT;
  1891. case [DVec2, S8]: R8G8_SNORM;
  1892. case [DVec2, U8]: R8G8_UNORM;
  1893. case [DVec3, F32]: R32G32B32_FLOAT;
  1894. case [DVec3, F16]: R16G16B16A16_FLOAT; // padding
  1895. case [DVec3, S8]: R8G8B8A8_SNORM; // padding
  1896. case [DVec3, U8]: R8G8B8A8_UNORM; // padding
  1897. case [DVec4, F32]: R32G32B32A32_FLOAT;
  1898. case [DVec4, F16]: R16G16B16A16_FLOAT;
  1899. case [DVec4, S8]: R8G8B8A8_SNORM;
  1900. case [DVec4, U8]: R8G8B8A8_UNORM;
  1901. case [DBytes4, _]: R8G8B8A8_UINT;
  1902. default: throw "assert";
  1903. };
  1904. }
  1905. var stencil = pass.stencil;
  1906. var st = p.depthStencilDesc;
  1907. st.stencilEnable = stencil != null;
  1908. if( stencil != null ) {
  1909. var front = st.frontFace;
  1910. var back = st.backFace;
  1911. st.stencilReadMask = stencil.readMask;
  1912. st.stencilWriteMask = stencil.writeMask;
  1913. front.stencilFunc = COMP[stencil.frontTest.getIndex()];
  1914. front.stencilPassOp = STENCIL_OP[stencil.frontPass.getIndex()];
  1915. front.stencilFailOp = STENCIL_OP[stencil.frontSTfail.getIndex()];
  1916. front.stencilDepthFailOp = STENCIL_OP[stencil.frontDPfail.getIndex()];
  1917. back.stencilFunc = COMP[stencil.backTest.getIndex()];
  1918. back.stencilPassOp = STENCIL_OP[stencil.backPass.getIndex()];
  1919. back.stencilFailOp = STENCIL_OP[stencil.backSTfail.getIndex()];
  1920. back.stencilDepthFailOp = STENCIL_OP[stencil.backDPfail.getIndex()];
  1921. }
  1922. return Driver.createGraphicsPipelineState(p);
  1923. }
  1924. function flushPipeline() {
  1925. if( !pipelineBuilder.needFlush ) return;
  1926. var cache = pipelineBuilder.lookup(currentShader.pipelines, currentShader.inputCount);
  1927. if( cache.pipeline == null )
  1928. cache.pipeline = makePipeline(currentShader);
  1929. frame.commandList.setPipelineState(cache.pipeline);
  1930. }
  1931. // QUERIES
  1932. static inline var QUERY_COUNT = 128;
  1933. override function allocQuery( queryKind : QueryKind ) : Query {
  1934. if( queryKind != TimeStamp )
  1935. throw "Not implemented";
  1936. return new Query();
  1937. }
  1938. override function deleteQuery( q : Query ) {
  1939. // nothing to do
  1940. }
  1941. override function beginQuery( q : Query ) {
  1942. // nothing
  1943. }
  1944. override function endQuery( q : Query ) {
  1945. var heap = frame.queryHeaps[frame.queryCurrentHeap];
  1946. if( heap == null ) {
  1947. var desc = new QueryHeapDesc();
  1948. desc.type = TIMESTAMP;
  1949. desc.count = QUERY_COUNT;
  1950. heap = Driver.createQueryHeap(desc);
  1951. frame.queryHeaps[frame.queryCurrentHeap] = heap;
  1952. if( frame.queryBuffer != null ) {
  1953. frame.queryBuffer.release();
  1954. frame.queryBuffer = null;
  1955. }
  1956. }
  1957. q.offset = frame.queryHeapOffset++;
  1958. q.heap = frame.queryCurrentHeap;
  1959. frame.commandList.endQuery(heap, TIMESTAMP, q.offset);
  1960. frame.queriesPending.push(q);
  1961. if( frame.queryHeapOffset == QUERY_COUNT ) {
  1962. frame.queryHeapOffset = 0;
  1963. frame.queryCurrentHeap++;
  1964. }
  1965. }
  1966. override function queryResultAvailable( q : Query ) {
  1967. return q.heap < 0;
  1968. }
  1969. override function queryResult( q : Query ) {
  1970. return q.result;
  1971. }
  1972. function beginQueries() {
  1973. if( frame.queryBuffer == null || frame.queriesPending.length == 0 )
  1974. return;
  1975. var ptr : hl.BytesAccess<Int64> = frame.queryBuffer.map(0, null);
  1976. while( true ) {
  1977. var q = frame.queriesPending.pop();
  1978. if( q == null ) break;
  1979. if( q.heap >= 0 ) {
  1980. var position = q.heap * QUERY_COUNT + q.offset;
  1981. var v = ptr[position];
  1982. q.result = ((v / tsFreq).low + (v % tsFreq).low / tsFreq.low) * 1e9;
  1983. q.heap = -1;
  1984. }
  1985. }
  1986. frame.queryBuffer.unmap(0, null);
  1987. }
  1988. function flushQueries() {
  1989. if( frame.queryHeapOffset > 0 )
  1990. frame.queryCurrentHeap++;
  1991. if( frame.queryCurrentHeap == 0 )
  1992. return;
  1993. if( frame.queryBuffer == null )
  1994. frame.queryBuffer = allocGPU(frame.queryHeaps.length * QUERY_COUNT * 8, READBACK, COPY_DEST);
  1995. var position = 0;
  1996. for( i in 0...frame.queryCurrentHeap ) {
  1997. var count = i < frame.queryCurrentHeap - 1 ? QUERY_COUNT : frame.queryHeapOffset;
  1998. frame.commandList.resolveQueryData(frame.queryHeaps[i], TIMESTAMP, 0, count, frame.queryBuffer, position);
  1999. position += count * 8;
  2000. }
  2001. frame.queryCurrentHeap = 0;
  2002. frame.queryHeapOffset = 0;
  2003. }
  2004. // --- DRAW etc.
  2005. override function draw( ibuf : Buffer, startIndex : Int, ntriangles : Int ) {
  2006. flushPipeline();
  2007. if( currentIndex != ibuf ) {
  2008. currentIndex = ibuf;
  2009. frame.commandList.iaSetIndexBuffer(ibuf.vbuf.iview);
  2010. }
  2011. frame.commandList.drawIndexedInstanced(ntriangles * 3,1,startIndex,0,0);
  2012. flushResources();
  2013. }
  2014. override function drawInstanced(ibuf:Buffer, commands:InstanceBuffer) {
  2015. flushPipeline();
  2016. if( currentIndex != ibuf ) {
  2017. currentIndex = ibuf;
  2018. frame.commandList.iaSetIndexBuffer(ibuf.vbuf.iview);
  2019. }
  2020. if( commands.data != null ) {
  2021. transition(commands.data, INDIRECT_ARGUMENT);
  2022. if ( commands.countBuffer != null )
  2023. transition(commands.countBuffer, INDIRECT_ARGUMENT);
  2024. flushTransitions();
  2025. frame.commandList.executeIndirect(indirectCommand, commands.commandCount, commands.data.res, 0, commands.countBuffer != null ? commands.countBuffer.res : null, 0);
  2026. } else {
  2027. frame.commandList.drawIndexedInstanced(commands.indexCount, commands.commandCount, commands.startIndex, 0, 0);
  2028. }
  2029. flushResources();
  2030. }
  2031. function flushResources() {
  2032. if( frame.shaderResourceViews.available < 128 || frame.samplerViews.available < 64 ) {
  2033. frame.shaderResourceViews = frame.shaderResourceCache.next();
  2034. frame.samplerViews = frame.samplerCache.next();
  2035. heapCount++;
  2036. var arr = tmp.descriptors2;
  2037. arr[0] = @:privateAccess frame.shaderResourceViews.heap;
  2038. arr[1] = @:privateAccess frame.samplerViews.heap;
  2039. frame.commandList.setDescriptorHeaps(arr);
  2040. }
  2041. }
  2042. function flushSRV() {
  2043. while ( computeSRVBufferDistance() != 1 ) {};
  2044. }
  2045. function flushFrame( onResize : Bool = false ) {
  2046. flushQueries();
  2047. frame.commandList.close();
  2048. flushSRV();
  2049. frame.commandList.execute();
  2050. currentShader = null;
  2051. Driver.flushMessages();
  2052. frame.fenceValue = fenceValue++;
  2053. Driver.signal(fence, frame.fenceValue);
  2054. }
  2055. override function present() {
  2056. transition(frame.backBuffer, PRESENT);
  2057. flushTransitions();
  2058. flushFrame();
  2059. Driver.present(window.vsync);
  2060. waitForFrame(Driver.getCurrentBackBufferIndex());
  2061. beginFrame();
  2062. if( hasDeviceError ) {
  2063. Sys.println("----------- OnContextLost ----------");
  2064. hasDeviceError = false;
  2065. dispose();
  2066. reset();
  2067. onContextLost();
  2068. }
  2069. }
  2070. function waitForFrame( index : Int ) {
  2071. var frame = frames[index];
  2072. if( fence.getValue() < frame.fenceValue ) {
  2073. fence.setEvent(frame.fenceValue, fenceEvent);
  2074. fenceEvent.wait(-1);
  2075. }
  2076. }
  2077. override function computeDispatch( x : Int = 1, y : Int = 1, z : Int = 1 ) {
  2078. flushTransitions();
  2079. frame.commandList.dispatch(x,y,z);
  2080. flushResources();
  2081. }
  2082. }
  2083. #end