direct3d12_gpu.c 80 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024
  1. #define WIN32_LEAN_AND_MEAN
  2. #include <iron_global.h>
  3. #include <stdbool.h>
  4. #include <malloc.h>
  5. #include <math.h>
  6. #include <assert.h>
  7. #include <d3d12.h>
  8. #include <dxgi.h>
  9. #include <dxgi1_4.h>
  10. #include <iron_gpu.h>
  11. #include <iron_system.h>
  12. #include <iron_math.h>
  13. #include <backends/windows_system.h>
  14. bool gpu_transpose_mat = false;
  15. static ID3D12Device *device = NULL;
  16. static ID3D12CommandQueue *queue;
  17. static IDXGISwapChain *window_swapchain;
  18. static ID3D12RootSignature *root_signature = NULL;
  19. static ID3D12CommandAllocator *command_allocator;
  20. static ID3D12GraphicsCommandList *command_list;
  21. static gpu_pipeline_t *current_pipeline;
  22. static D3D12_VIEWPORT current_viewport;
  23. static D3D12_RECT current_scissor;
  24. static gpu_buffer_t *current_vb;
  25. static gpu_buffer_t *current_ib;
  26. static D3D12_CPU_DESCRIPTOR_HANDLE target_descriptors[GPU_MAX_TEXTURES];
  27. static D3D12_CPU_DESCRIPTOR_HANDLE depth_handle;
  28. static D3D12_CPU_DESCRIPTOR_HANDLE *current_depth_handle;
  29. static gpu_texture_t *current_textures[GPU_MAX_TEXTURES] = {
  30. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  31. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
  32. };
  33. static bool window_vsync;
  34. static ID3D12DescriptorHeap *srv_heap;
  35. static int srv_heap_index = 0;
  36. static UINT64 fence_value;
  37. static ID3D12Fence *fence;
  38. static HANDLE fence_event;
  39. static UINT64 frame_fence_values[GPU_FRAMEBUFFER_COUNT] = {0, 0};
  40. static bool resized = false;
  41. static ID3D12Resource *readback_buffer = NULL;
  42. static int readback_buffer_size = 0;
  43. static ID3D12Resource *upload_buffer = NULL;
  44. static int upload_buffer_size = 0;
  45. static ID3D12Resource *resources_to_destroy[256];
  46. static int resources_to_destroy_count = 0;
  47. static D3D12_BLEND convert_blend_factor(gpu_blending_factor_t factor) {
  48. switch (factor) {
  49. case GPU_BLEND_ONE:
  50. return D3D12_BLEND_ONE;
  51. case GPU_BLEND_ZERO:
  52. return D3D12_BLEND_ZERO;
  53. case GPU_BLEND_SOURCE_ALPHA:
  54. return D3D12_BLEND_SRC_ALPHA;
  55. case GPU_BLEND_DEST_ALPHA:
  56. return D3D12_BLEND_DEST_ALPHA;
  57. case GPU_BLEND_INV_SOURCE_ALPHA:
  58. return D3D12_BLEND_INV_SRC_ALPHA;
  59. case GPU_BLEND_INV_DEST_ALPHA:
  60. return D3D12_BLEND_INV_DEST_ALPHA;
  61. }
  62. }
  63. static D3D12_CULL_MODE convert_cull_mode(gpu_cull_mode_t cull_mode) {
  64. switch (cull_mode) {
  65. case GPU_CULL_MODE_CLOCKWISE:
  66. return D3D12_CULL_MODE_FRONT;
  67. case GPU_CULL_MODE_COUNTERCLOCKWISE:
  68. return D3D12_CULL_MODE_BACK;
  69. default:
  70. return D3D12_CULL_MODE_NONE;
  71. }
  72. }
  73. static D3D12_COMPARISON_FUNC convert_compare_mode(gpu_compare_mode_t compare) {
  74. switch (compare) {
  75. default:
  76. case GPU_COMPARE_MODE_ALWAYS:
  77. return D3D12_COMPARISON_FUNC_ALWAYS;
  78. case GPU_COMPARE_MODE_NEVER:
  79. return D3D12_COMPARISON_FUNC_NEVER;
  80. case GPU_COMPARE_MODE_EQUAL:
  81. return D3D12_COMPARISON_FUNC_EQUAL;
  82. case GPU_COMPARE_MODE_LESS:
  83. return D3D12_COMPARISON_FUNC_LESS;
  84. }
  85. }
  86. static DXGI_FORMAT convert_format(gpu_texture_format_t format) {
  87. switch (format) {
  88. case GPU_TEXTURE_FORMAT_RGBA128:
  89. return DXGI_FORMAT_R32G32B32A32_FLOAT;
  90. case GPU_TEXTURE_FORMAT_RGBA64:
  91. return DXGI_FORMAT_R16G16B16A16_FLOAT;
  92. case GPU_TEXTURE_FORMAT_R32:
  93. return DXGI_FORMAT_R32_FLOAT;
  94. case GPU_TEXTURE_FORMAT_R16:
  95. return DXGI_FORMAT_R16_FLOAT;
  96. case GPU_TEXTURE_FORMAT_R8:
  97. return DXGI_FORMAT_R8_UNORM;
  98. case GPU_TEXTURE_FORMAT_D32:
  99. return DXGI_FORMAT_D32_FLOAT;
  100. case GPU_TEXTURE_FORMAT_RGBA32:
  101. default:
  102. return DXGI_FORMAT_R8G8B8A8_UNORM;
  103. }
  104. }
  105. static D3D12_RESOURCE_STATES convert_texture_state(gpu_texture_state_t state) {
  106. switch (state) {
  107. case GPU_TEXTURE_STATE_SHADER_RESOURCE:
  108. return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
  109. case GPU_TEXTURE_STATE_RENDER_TARGET:
  110. return D3D12_RESOURCE_STATE_RENDER_TARGET;
  111. case GPU_TEXTURE_STATE_RENDER_TARGET_DEPTH:
  112. return D3D12_RESOURCE_STATE_DEPTH_WRITE;
  113. case GPU_TEXTURE_STATE_PRESENT:
  114. return D3D12_RESOURCE_STATE_PRESENT;
  115. }
  116. }
  117. static void wait_for_fence(ID3D12Fence *fence, UINT64 completion_value, HANDLE wait_event) {
  118. if (fence->lpVtbl->GetCompletedValue(fence) < completion_value) {
  119. fence->lpVtbl->SetEventOnCompletion(fence, completion_value, wait_event);
  120. WaitForSingleObject(wait_event, INFINITE);
  121. }
  122. }
  123. static void _gpu_barrier(ID3D12Resource *r, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after) {
  124. D3D12_RESOURCE_BARRIER barrier = {
  125. .Transition.pResource = r,
  126. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  127. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  128. .Transition.StateBefore = state_before,
  129. .Transition.StateAfter = state_after,
  130. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  131. };
  132. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  133. }
  134. void gpu_barrier(gpu_texture_t *render_target, gpu_texture_state_t state_after) {
  135. if (render_target->state == state_after) {
  136. return;
  137. }
  138. _gpu_barrier(render_target->impl.image, convert_texture_state(render_target->state), convert_texture_state(state_after));
  139. render_target->state = state_after;
  140. }
  141. void gpu_destroy() {
  142. gpu_wait();
  143. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  144. gpu_texture_destroy(&framebuffers[i]);
  145. }
  146. if (framebuffer_depth.width > 0) {
  147. gpu_texture_destroy(&framebuffer_depth);
  148. }
  149. if (readback_buffer != NULL) {
  150. readback_buffer->lpVtbl->Release(readback_buffer);
  151. }
  152. if (upload_buffer != NULL) {
  153. upload_buffer->lpVtbl->Release(upload_buffer);
  154. }
  155. command_list->lpVtbl->Release(command_list);
  156. command_allocator->lpVtbl->Release(command_allocator);
  157. window_swapchain->lpVtbl->Release(window_swapchain);
  158. queue->lpVtbl->Release(queue);
  159. root_signature->lpVtbl->Release(root_signature);
  160. srv_heap->lpVtbl->Release(srv_heap);
  161. fence->lpVtbl->Release(fence);
  162. CloseHandle(fence_event);
  163. device->lpVtbl->Release(device);
  164. }
  165. void gpu_render_target_init2(gpu_texture_t *render_target, int width, int height, gpu_texture_format_t format, int framebuffer_index) {
  166. render_target->width = width;
  167. render_target->height = height;
  168. render_target->format = format;
  169. render_target->state = (framebuffer_index >= 0) ? GPU_TEXTURE_STATE_PRESENT : GPU_TEXTURE_STATE_SHADER_RESOURCE;
  170. render_target->buffer = NULL;
  171. DXGI_FORMAT dxgi_format = convert_format(format);
  172. D3D12_CLEAR_VALUE clear_value;
  173. clear_value.Format = dxgi_format;
  174. clear_value.Color[0] = 0.0f;
  175. clear_value.Color[1] = 0.0f;
  176. clear_value.Color[2] = 0.0f;
  177. clear_value.Color[3] = 0.0f;
  178. clear_value.DepthStencil.Depth = 1.0f;
  179. D3D12_HEAP_PROPERTIES heap_properties = {
  180. .Type = D3D12_HEAP_TYPE_DEFAULT,
  181. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  182. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  183. .CreationNodeMask = 1,
  184. .VisibleNodeMask = 1,
  185. };
  186. D3D12_RESOURCE_DESC resource_desc = {
  187. .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
  188. .Alignment = 0,
  189. .Width = width,
  190. .Height = height,
  191. .DepthOrArraySize = 1,
  192. .MipLevels = 1,
  193. .Format = dxgi_format,
  194. .SampleDesc.Count = 1,
  195. .SampleDesc.Quality = 0,
  196. .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
  197. .Flags = format == GPU_TEXTURE_FORMAT_D32 ? D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL : D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
  198. };
  199. D3D12_DESCRIPTOR_HEAP_DESC heap_desc = {
  200. .NumDescriptors = 1,
  201. .Type = format == GPU_TEXTURE_FORMAT_D32 ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV : D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
  202. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  203. };
  204. device->lpVtbl->CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, &render_target->impl.rtv_descriptor_heap);
  205. if (framebuffer_index >= 0) {
  206. window_swapchain->lpVtbl->GetBuffer(window_swapchain, framebuffer_index, &IID_ID3D12Resource, &render_target->impl.image);
  207. }
  208. else {
  209. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  210. &clear_value, &IID_ID3D12Resource, &render_target->impl.image);
  211. }
  212. D3D12_RENDER_TARGET_VIEW_DESC view_desc = {
  213. .Format = dxgi_format,
  214. .ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D,
  215. .Texture2D.MipSlice = 0,
  216. .Texture2D.PlaneSlice = 0,
  217. };
  218. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  219. render_target->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(render_target->impl.rtv_descriptor_heap, &handle);
  220. if (format == GPU_TEXTURE_FORMAT_D32) {
  221. device->lpVtbl->CreateDepthStencilView(device, render_target->impl.image, NULL, handle);
  222. }
  223. else {
  224. device->lpVtbl->CreateRenderTargetView(device, render_target->impl.image, &view_desc, handle);
  225. }
  226. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  227. .NumDescriptors = 1,
  228. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  229. .NodeMask = 0,
  230. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  231. };
  232. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &render_target->impl.srv_descriptor_heap);
  233. D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
  234. .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
  235. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  236. .Format = format == GPU_TEXTURE_FORMAT_D32 ? DXGI_FORMAT_R32_FLOAT : dxgi_format,
  237. .Texture2D.MipLevels = 1,
  238. .Texture2D.MostDetailedMip = 0,
  239. .Texture2D.ResourceMinLODClamp = 0.0f,
  240. };
  241. render_target->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(render_target->impl.srv_descriptor_heap, &handle);
  242. device->lpVtbl->CreateShaderResourceView(device, render_target->impl.image, &srv_desc, handle);
  243. }
  244. void gpu_init_internal(int depth_buffer_bits, bool vsync) {
  245. window_vsync = vsync;
  246. #ifdef _DEBUG
  247. ID3D12Debug *debug_controller = NULL;
  248. if (D3D12GetDebugInterface(&IID_ID3D12Debug, &debug_controller) == S_OK) {
  249. debug_controller->lpVtbl->EnableDebugLayer(debug_controller);
  250. }
  251. #endif
  252. D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, &IID_ID3D12Device, &device);
  253. // Root signature
  254. ID3DBlob *root_blob;
  255. ID3DBlob *error_blob;
  256. D3D12_ROOT_PARAMETER parameters[2] = {};
  257. D3D12_DESCRIPTOR_RANGE range = {
  258. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  259. .NumDescriptors = (UINT)GPU_MAX_TEXTURES,
  260. .BaseShaderRegister = 0,
  261. .RegisterSpace = 0,
  262. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  263. };
  264. parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  265. parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  266. parameters[0].DescriptorTable.NumDescriptorRanges = 1;
  267. parameters[0].DescriptorTable.pDescriptorRanges = &range;
  268. parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
  269. parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  270. parameters[1].Descriptor.ShaderRegister = 0;
  271. parameters[1].Descriptor.RegisterSpace = 0;
  272. D3D12_STATIC_SAMPLER_DESC samplers[GPU_MAX_TEXTURES];
  273. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  274. samplers[i].ShaderRegister = i;
  275. samplers[i].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
  276. samplers[i].AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
  277. samplers[i].AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
  278. samplers[i].AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
  279. samplers[i].MipLODBias = 0;
  280. samplers[i].MaxAnisotropy = 16;
  281. samplers[i].ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
  282. samplers[i].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK;
  283. samplers[i].MinLOD = 0.0f;
  284. samplers[i].MaxLOD = D3D12_FLOAT32_MAX;
  285. samplers[i].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  286. samplers[i].RegisterSpace = 0;
  287. }
  288. D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {
  289. .NumParameters = 2,
  290. .pParameters = parameters,
  291. .NumStaticSamplers = GPU_MAX_TEXTURES,
  292. .pStaticSamplers = samplers,
  293. .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT,
  294. };
  295. D3D12SerializeRootSignature(&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, &root_blob, &error_blob);
  296. device->lpVtbl->CreateRootSignature(device, 0, root_blob->lpVtbl->GetBufferPointer(root_blob), root_blob->lpVtbl->GetBufferSize(root_blob), &IID_ID3D12RootSignature, &root_signature);
  297. D3D12_COMMAND_QUEUE_DESC queue_desc = {
  298. .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
  299. .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
  300. };
  301. device->lpVtbl->CreateCommandQueue(device, &queue_desc, &IID_ID3D12CommandQueue, &queue);
  302. HWND hwnd = iron_windows_window_handle();
  303. DXGI_SWAP_CHAIN_DESC swapchain_desc = {
  304. .BufferCount = GPU_FRAMEBUFFER_COUNT,
  305. .BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM,
  306. .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT,
  307. .BufferDesc.Width = iron_window_width(),
  308. .BufferDesc.Height = iron_window_height(),
  309. .OutputWindow = hwnd,
  310. .SampleDesc.Count = 1,
  311. .SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD,
  312. .Windowed = true,
  313. };
  314. IDXGIFactory4 *dxgi_factory = NULL;
  315. CreateDXGIFactory1(&IID_IDXGIFactory4, &dxgi_factory);
  316. dxgi_factory->lpVtbl->CreateSwapChain(dxgi_factory, (IUnknown *)queue, &swapchain_desc, &window_swapchain);
  317. fence_value = 0;
  318. fence_event = CreateEvent(NULL, FALSE, FALSE, NULL);
  319. device->lpVtbl->CreateFence(device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, &fence);
  320. gpu_create_framebuffers(depth_buffer_bits);
  321. D3D12_DESCRIPTOR_HEAP_DESC heap_desc = {
  322. .NumDescriptors = GPU_CONSTANT_BUFFER_MULTIPLE,
  323. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  324. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
  325. };
  326. device->lpVtbl->CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, &srv_heap);
  327. device->lpVtbl->CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_DIRECT, &IID_ID3D12CommandAllocator, &command_allocator);
  328. device->lpVtbl->CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator, NULL, &IID_ID3D12CommandList, &command_list);
  329. }
  330. void gpu_begin_internal(gpu_texture_t **targets, int count, gpu_texture_t *depth_buffer, unsigned flags, unsigned color, float depth) {
  331. for (int i = 0; i < current_render_targets_count; ++i) {
  332. current_render_targets[i]->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(current_render_targets[i]->impl.rtv_descriptor_heap, &target_descriptors[i]);
  333. }
  334. if (depth_buffer != NULL) {
  335. depth_buffer->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(depth_buffer->impl.rtv_descriptor_heap, &depth_handle);
  336. current_depth_handle = &depth_handle;
  337. }
  338. else {
  339. current_depth_handle = NULL;
  340. }
  341. command_list->lpVtbl->OMSetRenderTargets(command_list, current_render_targets_count, &target_descriptors[0], false, current_depth_handle);
  342. gpu_texture_t *target = current_render_targets[0];
  343. gpu_viewport(0, 0, target->width, target->height);
  344. gpu_scissor(0, 0, target->width, target->height);
  345. if (flags & GPU_CLEAR_COLOR) {
  346. float clear_color[] = {((color & 0x00ff0000) >> 16) / 255.0f,
  347. ((color & 0x0000ff00) >> 8) / 255.0f,
  348. (color & 0x000000ff) / 255.0f,
  349. ((color & 0xff000000) >> 24) / 255.0f};
  350. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  351. target->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(target->impl.rtv_descriptor_heap, &handle);
  352. command_list->lpVtbl->ClearRenderTargetView(command_list, handle, clear_color, 0, NULL);
  353. }
  354. if (flags & GPU_CLEAR_DEPTH && depth_buffer != NULL) {
  355. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  356. depth_buffer->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(depth_buffer->impl.rtv_descriptor_heap, &handle);
  357. command_list->lpVtbl->ClearDepthStencilView(command_list, handle, D3D12_CLEAR_FLAG_DEPTH, depth, 0, 0, NULL);
  358. }
  359. }
  360. void gpu_end_internal() {
  361. for (int i = 0; i < current_render_targets_count; ++i) {
  362. gpu_barrier(current_render_targets[i],
  363. current_render_targets[i] == &framebuffers[framebuffer_index] ? GPU_TEXTURE_STATE_PRESENT : GPU_TEXTURE_STATE_SHADER_RESOURCE);
  364. }
  365. current_render_targets_count = 0;
  366. }
  367. void gpu_wait() {
  368. wait_for_fence(fence, fence_value, fence_event);
  369. }
  370. void gpu_execute_and_wait() {
  371. command_list->lpVtbl->Close(command_list);
  372. ID3D12CommandList *command_lists[] = {(ID3D12CommandList *)command_list};
  373. queue->lpVtbl->ExecuteCommandLists(queue, 1, command_lists);
  374. queue->lpVtbl->Signal(queue, fence, ++fence_value);
  375. gpu_wait();
  376. command_allocator->lpVtbl->Reset(command_allocator);
  377. command_list->lpVtbl->Reset(command_list, command_allocator, NULL);
  378. if (gpu_in_use) {
  379. command_list->lpVtbl->OMSetRenderTargets(command_list, current_render_targets_count, &target_descriptors[0], false, current_depth_handle);
  380. command_list->lpVtbl->SetPipelineState(command_list, current_pipeline->impl.pso);
  381. command_list->lpVtbl->SetGraphicsRootSignature(command_list, root_signature);
  382. command_list->lpVtbl->IASetVertexBuffers(command_list, 0, 1, (D3D12_VERTEX_BUFFER_VIEW *)&current_vb->impl.vertex_buffer_view);
  383. command_list->lpVtbl->IASetIndexBuffer(command_list, (D3D12_INDEX_BUFFER_VIEW *)&current_ib->impl.index_buffer_view);
  384. command_list->lpVtbl->RSSetViewports(command_list, 1, &current_viewport);
  385. command_list->lpVtbl->RSSetScissorRects(command_list, 1, &current_scissor);
  386. }
  387. }
  388. void gpu_present_internal() {
  389. gpu_execute_and_wait();
  390. window_swapchain->lpVtbl->Present(window_swapchain, window_vsync, 0);
  391. queue->lpVtbl->Signal(queue, fence, ++fence_value);
  392. frame_fence_values[framebuffer_index] = fence_value;
  393. framebuffer_index = (framebuffer_index + 1) % GPU_FRAMEBUFFER_COUNT;
  394. wait_for_fence(fence, frame_fence_values[framebuffer_index], fence_event);
  395. if (resized) {
  396. framebuffer_index = 0;
  397. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  398. gpu_texture_destroy(&framebuffers[i]);
  399. }
  400. if (framebuffer_depth.width > 0) {
  401. gpu_texture_destroy(&framebuffer_depth);
  402. }
  403. window_swapchain->lpVtbl->ResizeBuffers(window_swapchain, GPU_FRAMEBUFFER_COUNT, iron_window_width(), iron_window_height(), DXGI_FORMAT_R8G8B8A8_UNORM, 0);
  404. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  405. gpu_render_target_init2(&framebuffers[i], iron_window_width(), iron_window_height(), GPU_TEXTURE_FORMAT_RGBA32, i);
  406. }
  407. if (framebuffer_depth.width > 0) {
  408. gpu_render_target_init2(&framebuffer_depth, iron_window_width(), iron_window_height(), GPU_TEXTURE_FORMAT_D32, -1);
  409. }
  410. resized = false;
  411. }
  412. while (resources_to_destroy_count > 0) {
  413. resources_to_destroy_count--;
  414. ID3D12Resource *r = resources_to_destroy[resources_to_destroy_count];
  415. r->lpVtbl->Release(r);
  416. }
  417. }
  418. void gpu_resize_internal(int width, int height) {
  419. if (fence_value == 0) {
  420. return;
  421. }
  422. resized = true;
  423. }
  424. bool gpu_raytrace_supported() {
  425. D3D12_FEATURE_DATA_D3D12_OPTIONS5 options;
  426. if (device->lpVtbl->CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS5, &options, sizeof(options)) == S_OK) {
  427. return options.RaytracingTier >= D3D12_RAYTRACING_TIER_1_0;
  428. }
  429. return false;
  430. }
  431. void gpu_set_constant_buffer(gpu_buffer_t *buffer, int offset, size_t size) {
  432. command_list->lpVtbl->SetGraphicsRootConstantBufferView(command_list, 1, buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(buffer->impl.buffer) + offset);
  433. }
  434. void gpu_internal_set_textures() {
  435. UINT srv_step = device->lpVtbl->GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  436. if (srv_heap_index + GPU_MAX_TEXTURES > GPU_CONSTANT_BUFFER_MULTIPLE) {
  437. srv_heap_index = 0;
  438. }
  439. D3D12_CPU_DESCRIPTOR_HANDLE cpu_base;
  440. D3D12_GPU_DESCRIPTOR_HANDLE gpu_base;
  441. srv_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(srv_heap, &cpu_base);
  442. srv_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(srv_heap, &gpu_base);
  443. cpu_base.ptr += srv_heap_index * srv_step;
  444. gpu_base.ptr += srv_heap_index * srv_step;
  445. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  446. if (current_textures[i] != NULL) {
  447. D3D12_CPU_DESCRIPTOR_HANDLE source_cpu;
  448. ID3D12DescriptorHeap *source_heap = current_textures[i]->impl.srv_descriptor_heap;
  449. source_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(source_heap, &source_cpu);
  450. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpu_base, source_cpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  451. cpu_base.ptr += srv_step;
  452. srv_heap_index++;
  453. }
  454. }
  455. ID3D12DescriptorHeap *heaps[] = {srv_heap};
  456. command_list->lpVtbl->SetDescriptorHeaps(command_list, 1, heaps);
  457. command_list->lpVtbl->SetGraphicsRootDescriptorTable(command_list, 0, gpu_base);
  458. }
  459. void gpu_draw_internal() {
  460. gpu_internal_set_textures();
  461. command_list->lpVtbl->IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
  462. command_list->lpVtbl->DrawIndexedInstanced(command_list, current_ib->count, 1, 0, 0, 0);
  463. }
  464. void gpu_viewport(int x, int y, int width, int height) {
  465. current_viewport = (D3D12_VIEWPORT){
  466. .TopLeftX = (float)x,
  467. .TopLeftY = (float)y,
  468. .Width = (float)width,
  469. .Height = (float)height,
  470. .MinDepth = 0.0f,
  471. .MaxDepth = 1.0f,
  472. };
  473. command_list->lpVtbl->RSSetViewports(command_list, 1, &current_viewport);
  474. }
  475. void gpu_scissor(int x, int y, int width, int height) {
  476. current_scissor = (D3D12_RECT){
  477. .left = x,
  478. .top = y,
  479. .right = x + width,
  480. .bottom = y + height,
  481. };
  482. command_list->lpVtbl->RSSetScissorRects(command_list, 1, &current_scissor);
  483. }
  484. void gpu_disable_scissor() {
  485. current_scissor = (D3D12_RECT){
  486. .left = 0,
  487. .top = 0,
  488. .right = current_render_targets[0]->width,
  489. .bottom = current_render_targets[0]->height,
  490. };
  491. command_list->lpVtbl->RSSetScissorRects(command_list, 1, &current_scissor);
  492. }
  493. void gpu_set_pipeline(gpu_pipeline_t *pipeline) {
  494. current_pipeline = pipeline;
  495. command_list->lpVtbl->SetPipelineState(command_list, pipeline->impl.pso);
  496. command_list->lpVtbl->SetGraphicsRootSignature(command_list, root_signature);
  497. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  498. current_textures[i] = NULL;
  499. }
  500. }
  501. void gpu_set_vertex_buffer(gpu_buffer_t *buffer) {
  502. current_vb = buffer;
  503. command_list->lpVtbl->IASetVertexBuffers(command_list, 0, 1, (D3D12_VERTEX_BUFFER_VIEW *)&buffer->impl.vertex_buffer_view);
  504. }
  505. void gpu_set_index_buffer(gpu_buffer_t *buffer) {
  506. current_ib = buffer;
  507. command_list->lpVtbl->IASetIndexBuffer(command_list, (D3D12_INDEX_BUFFER_VIEW *)&buffer->impl.index_buffer_view);
  508. }
  509. void gpu_get_render_target_pixels(gpu_texture_t *render_target, uint8_t *data) {
  510. D3D12_RESOURCE_DESC desc;
  511. render_target->impl.image->lpVtbl->GetDesc(render_target->impl.image, &desc);
  512. DXGI_FORMAT dxgi_format = desc.Format;
  513. int format_size = gpu_texture_format_size(render_target->format);
  514. int row_pitch = render_target->width * format_size;
  515. int align = row_pitch % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
  516. if (align != 0) {
  517. row_pitch = row_pitch + (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - align);
  518. }
  519. int new_readback_buffer_size = row_pitch * render_target->height;
  520. if (new_readback_buffer_size < (2048 * 2048 * 4)) {
  521. new_readback_buffer_size = (2048 * 2048 * 4);
  522. }
  523. if (readback_buffer_size < new_readback_buffer_size) {
  524. readback_buffer_size = new_readback_buffer_size;
  525. if (readback_buffer != NULL) {
  526. readback_buffer->lpVtbl->Release(readback_buffer);
  527. }
  528. D3D12_HEAP_PROPERTIES heap_properties = {
  529. .Type = D3D12_HEAP_TYPE_READBACK,
  530. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  531. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  532. .CreationNodeMask = 1,
  533. .VisibleNodeMask = 1,
  534. };
  535. D3D12_RESOURCE_DESC resource_desc = {
  536. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  537. .Alignment = 0,
  538. .Width = readback_buffer_size,
  539. .Height = 1,
  540. .DepthOrArraySize = 1,
  541. .MipLevels = 1,
  542. .Format = DXGI_FORMAT_UNKNOWN,
  543. .SampleDesc.Count = 1,
  544. .SampleDesc.Quality = 0,
  545. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  546. .Flags = D3D12_RESOURCE_FLAG_NONE,
  547. };
  548. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_COMMON, NULL,
  549. &IID_ID3D12Resource, &readback_buffer);
  550. }
  551. // Copy render target to readback buffer
  552. D3D12_RESOURCE_BARRIER barrier = {
  553. .Transition.pResource = render_target->impl.image,
  554. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  555. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  556. .Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  557. .Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE,
  558. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  559. };
  560. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  561. D3D12_TEXTURE_COPY_LOCATION source = {
  562. .pResource = render_target->impl.image,
  563. .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
  564. .SubresourceIndex = 0,
  565. };
  566. D3D12_TEXTURE_COPY_LOCATION dest = {
  567. .pResource = readback_buffer,
  568. .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
  569. .PlacedFootprint.Offset = 0,
  570. .PlacedFootprint.Footprint.Format = dxgi_format,
  571. .PlacedFootprint.Footprint.Width = render_target->width,
  572. .PlacedFootprint.Footprint.Height = render_target->height,
  573. .PlacedFootprint.Footprint.Depth = 1,
  574. .PlacedFootprint.Footprint.RowPitch = row_pitch,
  575. };
  576. command_list->lpVtbl->CopyTextureRegion(command_list , &dest, 0, 0, 0, &source, NULL);
  577. barrier = (D3D12_RESOURCE_BARRIER){
  578. .Transition.pResource = render_target->impl.image,
  579. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  580. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  581. .Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE,
  582. .Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  583. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  584. };
  585. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  586. gpu_execute_and_wait();
  587. // Read buffer
  588. void *p;
  589. readback_buffer->lpVtbl->Map(readback_buffer, 0, NULL, &p);
  590. memcpy(data, p, render_target->width * render_target->height * format_size);
  591. readback_buffer->lpVtbl->Unmap(readback_buffer, 0, NULL);
  592. }
  593. void gpu_set_texture(int unit, gpu_texture_t *texture) {
  594. current_textures[unit] = texture;
  595. }
  596. void gpu_pipeline_destroy(gpu_pipeline_t *pipe) {
  597. if (pipe->impl.pso != NULL) {
  598. pipe->impl.pso->lpVtbl->Release(pipe->impl.pso);
  599. pipe->impl.pso = NULL;
  600. }
  601. }
  602. void gpu_pipeline_compile(gpu_pipeline_t *pipe) {
  603. int vertex_attribute_count = pipe->input_layout->size;
  604. D3D12_INPUT_ELEMENT_DESC *vertex_desc = (D3D12_INPUT_ELEMENT_DESC *)alloca(sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_attribute_count);
  605. ZeroMemory(vertex_desc, sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_attribute_count);
  606. for (int i = 0; i < pipe->input_layout->size; ++i) {
  607. vertex_desc[i].SemanticName = "TEXCOORD";
  608. vertex_desc[i].SemanticIndex = i;
  609. vertex_desc[i].InputSlot = 0;
  610. vertex_desc[i].AlignedByteOffset = (i == 0) ? 0 : D3D12_APPEND_ALIGNED_ELEMENT;
  611. vertex_desc[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
  612. vertex_desc[i].InstanceDataStepRate = 0;
  613. switch (pipe->input_layout->elements[i].data) {
  614. case GPU_VERTEX_DATA_F32_1X:
  615. vertex_desc[i].Format = DXGI_FORMAT_R32_FLOAT;
  616. break;
  617. case GPU_VERTEX_DATA_F32_2X:
  618. vertex_desc[i].Format = DXGI_FORMAT_R32G32_FLOAT;
  619. break;
  620. case GPU_VERTEX_DATA_F32_3X:
  621. vertex_desc[i].Format = DXGI_FORMAT_R32G32B32_FLOAT;
  622. break;
  623. case GPU_VERTEX_DATA_F32_4X:
  624. vertex_desc[i].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
  625. break;
  626. case GPU_VERTEX_DATA_I16_2X_NORM:
  627. vertex_desc[i].Format = DXGI_FORMAT_R16G16_SNORM;
  628. break;
  629. case GPU_VERTEX_DATA_I16_4X_NORM:
  630. vertex_desc[i].Format = DXGI_FORMAT_R16G16B16A16_SNORM;
  631. break;
  632. default:
  633. break;
  634. }
  635. }
  636. const D3D12_DEPTH_STENCILOP_DESC default_stencil_op = {
  637. D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_NEVER
  638. };
  639. D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {
  640. .VS.BytecodeLength = pipe->vertex_shader->impl.length,
  641. .VS.pShaderBytecode = pipe->vertex_shader->impl.data,
  642. .PS.BytecodeLength = pipe->fragment_shader->impl.length,
  643. .PS.pShaderBytecode = pipe->fragment_shader->impl.data,
  644. .pRootSignature = root_signature,
  645. .NumRenderTargets = pipe->color_attachment_count,
  646. .DSVFormat = DXGI_FORMAT_UNKNOWN,
  647. .InputLayout.NumElements = vertex_attribute_count,
  648. .InputLayout.pInputElementDescs = vertex_desc,
  649. .RasterizerState.FillMode = D3D12_FILL_MODE_SOLID,
  650. .RasterizerState.CullMode = convert_cull_mode(pipe->cull_mode),
  651. .RasterizerState.FrontCounterClockwise = FALSE,
  652. .RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
  653. .RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
  654. .RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
  655. .RasterizerState.DepthClipEnable = TRUE,
  656. .RasterizerState.MultisampleEnable = FALSE,
  657. .RasterizerState.AntialiasedLineEnable = FALSE,
  658. .RasterizerState.ForcedSampleCount = 0,
  659. .RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF,
  660. .BlendState.AlphaToCoverageEnable = FALSE,
  661. .BlendState.IndependentBlendEnable = FALSE,
  662. .DepthStencilState.DepthEnable = TRUE,
  663. .DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL,
  664. .DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS,
  665. .DepthStencilState.StencilEnable = FALSE,
  666. .DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK,
  667. .DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK,
  668. .DepthStencilState.DepthEnable = pipe->depth_mode != GPU_COMPARE_MODE_ALWAYS,
  669. .DepthStencilState.DepthWriteMask = pipe->depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO,
  670. .DepthStencilState.DepthFunc = convert_compare_mode(pipe->depth_mode),
  671. .DepthStencilState.StencilEnable = false,
  672. .DSVFormat = DXGI_FORMAT_D32_FLOAT,
  673. .SampleDesc.Count = 1,
  674. .SampleMask = 0xFFFFFFFF,
  675. .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
  676. .DepthStencilState.FrontFace = default_stencil_op,
  677. .DepthStencilState.BackFace = default_stencil_op,
  678. };
  679. for (int i = 0; i < pipe->color_attachment_count; ++i) {
  680. psoDesc.RTVFormats[i] = convert_format(pipe->color_attachment[i]);
  681. }
  682. psoDesc.BlendState.IndependentBlendEnable = true;
  683. for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
  684. psoDesc.BlendState.RenderTarget[i].BlendEnable = pipe->blend_source != GPU_BLEND_ONE || pipe->blend_destination != GPU_BLEND_ZERO ||
  685. pipe->alpha_blend_source != GPU_BLEND_ONE || pipe->alpha_blend_destination != GPU_BLEND_ZERO;
  686. psoDesc.BlendState.RenderTarget[i].SrcBlend = convert_blend_factor(pipe->blend_source);
  687. psoDesc.BlendState.RenderTarget[i].DestBlend = convert_blend_factor(pipe->blend_destination);
  688. psoDesc.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
  689. psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = convert_blend_factor(pipe->alpha_blend_source);
  690. psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = convert_blend_factor(pipe->alpha_blend_destination);
  691. psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
  692. psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask =
  693. (((pipe->color_write_mask_red[i] ? D3D12_COLOR_WRITE_ENABLE_RED : 0) |
  694. (pipe->color_write_mask_green[i] ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0)) |
  695. (pipe->color_write_mask_blue[i] ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0)) |
  696. (pipe->color_write_mask_alpha[i] ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0);
  697. }
  698. device->lpVtbl->CreateGraphicsPipelineState(device, &psoDesc, &IID_ID3D12PipelineState, &pipe->impl.pso);
  699. }
  700. void gpu_shader_init(gpu_shader_t *shader, const void *_data, size_t length, gpu_shader_type_t type) {
  701. uint8_t *data = (uint8_t *)_data;
  702. shader->impl.length = (int)length;
  703. shader->impl.data = (uint8_t *)malloc(shader->impl.length);
  704. memcpy(shader->impl.data, data, shader->impl.length);
  705. }
  706. void gpu_shader_destroy(gpu_shader_t *shader) {
  707. free(shader->impl.data);
  708. }
  709. void gpu_texture_init_from_bytes(gpu_texture_t *texture, void *data, int width, int height, gpu_texture_format_t format) {
  710. texture->width = width;
  711. texture->height = height;
  712. texture->format = format;
  713. texture->state = GPU_TEXTURE_STATE_SHADER_RESOURCE;
  714. texture->buffer = NULL;
  715. texture->impl.rtv_descriptor_heap = NULL;
  716. DXGI_FORMAT dxgi_format = convert_format(format);
  717. int format_size = gpu_texture_format_size(format);
  718. D3D12_HEAP_PROPERTIES heap_properties = {
  719. .Type = D3D12_HEAP_TYPE_DEFAULT,
  720. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  721. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  722. .CreationNodeMask = 1,
  723. .VisibleNodeMask = 1,
  724. };
  725. D3D12_RESOURCE_DESC resource_desc = {
  726. .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
  727. .Alignment = 0,
  728. .Width = texture->width,
  729. .Height = texture->height,
  730. .DepthOrArraySize = 1,
  731. .MipLevels = 1,
  732. .Format = dxgi_format,
  733. .SampleDesc.Count = 1,
  734. .SampleDesc.Quality = 0,
  735. .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
  736. .Flags = D3D12_RESOURCE_FLAG_NONE,
  737. };
  738. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
  739. D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, NULL, &IID_ID3D12Resource, &texture->impl.image);
  740. D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
  741. UINT64 _upload_size;
  742. device->lpVtbl->GetCopyableFootprints(device, &resource_desc, 0, 1, 0, &footprint, NULL, NULL, &_upload_size);
  743. int new_upload_buffer_size = _upload_size;
  744. if (new_upload_buffer_size < (1024 * 1024 * 4)) {
  745. new_upload_buffer_size = (1024 * 1024 * 4);
  746. }
  747. if (upload_buffer_size < new_upload_buffer_size) {
  748. upload_buffer_size = new_upload_buffer_size;
  749. if (upload_buffer != NULL) {
  750. upload_buffer->lpVtbl->Release(upload_buffer);
  751. }
  752. D3D12_HEAP_PROPERTIES heap_properties_upload = {
  753. .Type = D3D12_HEAP_TYPE_UPLOAD,
  754. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  755. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  756. .CreationNodeMask = 1,
  757. .VisibleNodeMask = 1,
  758. };
  759. D3D12_RESOURCE_DESC resource_desc_upload = {
  760. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  761. .Alignment = 0,
  762. .Width = _upload_size,
  763. .Height = 1,
  764. .DepthOrArraySize = 1,
  765. .MipLevels = 1,
  766. .Format = DXGI_FORMAT_UNKNOWN,
  767. .SampleDesc.Count = 1,
  768. .SampleDesc.Quality = 0,
  769. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  770. .Flags = D3D12_RESOURCE_FLAG_NONE,
  771. };
  772. device->lpVtbl->CreateCommittedResource(device, &heap_properties_upload, D3D12_HEAP_FLAG_NONE, &resource_desc_upload,
  773. D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource, &upload_buffer);
  774. }
  775. int stride = (int)ceilf(_upload_size / (float)(height * D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)) * D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
  776. BYTE *pixel;
  777. upload_buffer->lpVtbl->Map(upload_buffer, 0, NULL, (void **)&pixel);
  778. for (int y = 0; y < texture->height; ++y) {
  779. memcpy(&pixel[y * stride], &((uint8_t *)data)[y * texture->width * format_size], texture->width * format_size);
  780. }
  781. upload_buffer->lpVtbl->Unmap(upload_buffer, 0, NULL);
  782. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  783. .NumDescriptors = 1,
  784. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  785. .NodeMask = 0,
  786. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  787. };
  788. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &texture->impl.srv_descriptor_heap);
  789. D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
  790. .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
  791. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  792. .Format = dxgi_format,
  793. .Texture2D.MipLevels = 1,
  794. .Texture2D.MostDetailedMip = 0,
  795. .Texture2D.ResourceMinLODClamp = 0.0f,
  796. };
  797. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  798. texture->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texture->impl.srv_descriptor_heap, &handle);
  799. device->lpVtbl->CreateShaderResourceView(device, texture->impl.image, &srv_desc, handle);
  800. D3D12_RESOURCE_BARRIER barrier = {
  801. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  802. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  803. .Transition.pResource = texture->impl.image,
  804. .Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  805. .Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST,
  806. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  807. };
  808. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  809. D3D12_TEXTURE_COPY_LOCATION source = {
  810. .pResource = upload_buffer,
  811. .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
  812. .PlacedFootprint = footprint,
  813. };
  814. D3D12_TEXTURE_COPY_LOCATION destination = {
  815. .pResource = texture->impl.image,
  816. .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
  817. .SubresourceIndex = 0,
  818. };
  819. command_list->lpVtbl->CopyTextureRegion(command_list, &destination, 0, 0, 0, &source, NULL);
  820. barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST,
  821. barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  822. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  823. gpu_execute_and_wait(); ////
  824. }
  825. void gpu_texture_destroy(gpu_texture_t *render_target) {
  826. if (render_target->impl.image != NULL) {
  827. render_target->impl.image->lpVtbl->Release(render_target->impl.image);
  828. }
  829. if (render_target->impl.rtv_descriptor_heap != NULL) {
  830. render_target->impl.rtv_descriptor_heap->lpVtbl->Release(render_target->impl.rtv_descriptor_heap);
  831. }
  832. if (render_target->impl.srv_descriptor_heap != NULL) {
  833. render_target->impl.srv_descriptor_heap->lpVtbl->Release(render_target->impl.srv_descriptor_heap);
  834. }
  835. }
  836. void gpu_render_target_init(gpu_texture_t *target, int width, int height, gpu_texture_format_t format) {
  837. gpu_render_target_init2(target, width, height, format, -1);
  838. }
  839. void _gpu_buffer_init(ID3D12Resource **buffer, int size, D3D12_HEAP_TYPE heap_type) {
  840. if (*buffer != NULL) {
  841. assert(resources_to_destroy_count < 256);
  842. resources_to_destroy[resources_to_destroy_count] = *buffer;
  843. resources_to_destroy_count++;
  844. }
  845. D3D12_HEAP_PROPERTIES heap_properties = {
  846. .Type = heap_type,
  847. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  848. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  849. .CreationNodeMask = 1,
  850. .VisibleNodeMask = 1,
  851. };
  852. D3D12_RESOURCE_DESC resource_desc = {
  853. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  854. .Alignment = 0,
  855. .Width = size,
  856. .Height = 1,
  857. .DepthOrArraySize = 1,
  858. .MipLevels = 1,
  859. .Format = DXGI_FORMAT_UNKNOWN,
  860. .SampleDesc.Count = 1,
  861. .SampleDesc.Quality = 0,
  862. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  863. .Flags = D3D12_RESOURCE_FLAG_NONE,
  864. };
  865. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
  866. heap_type == D3D12_HEAP_TYPE_UPLOAD ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, buffer);
  867. }
  868. void gpu_vertex_buffer_init(gpu_buffer_t *buffer, int count, gpu_vertex_structure_t *structure) {
  869. buffer->count = count;
  870. buffer->stride = 0;
  871. for (int i = 0; i < structure->size; ++i) {
  872. buffer->stride += gpu_vertex_data_size(structure->elements[i].data);
  873. }
  874. buffer->impl.vertex_buffer_view.SizeInBytes = buffer->stride * buffer->count;
  875. buffer->impl.vertex_buffer_view.StrideInBytes = buffer->stride;
  876. buffer->impl.buffer = NULL;
  877. }
  878. void *gpu_vertex_buffer_lock(gpu_buffer_t *buffer) {
  879. _gpu_buffer_init(&buffer->impl.buffer, buffer->stride * buffer->count, D3D12_HEAP_TYPE_UPLOAD);
  880. D3D12_RANGE range = {
  881. .Begin = 0,
  882. .End = buffer->count * buffer->stride,
  883. };
  884. void *p;
  885. buffer->impl.buffer->lpVtbl->Map(buffer->impl.buffer, 0, &range, &p);
  886. return p;
  887. }
  888. void gpu_vertex_buffer_unlock(gpu_buffer_t *buffer) {
  889. D3D12_RANGE range = {
  890. .Begin = 0,
  891. .End = buffer->count * buffer->stride,
  892. };
  893. buffer->impl.buffer->lpVtbl->Unmap(buffer->impl.buffer, 0, &range);
  894. ID3D12Resource *upload_buffer = buffer->impl.buffer;
  895. _gpu_buffer_init(&buffer->impl.buffer, buffer->stride * buffer->count, D3D12_HEAP_TYPE_DEFAULT);
  896. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
  897. command_list->lpVtbl->CopyBufferRegion(command_list, buffer->impl.buffer, 0, upload_buffer, 0, buffer->stride * buffer->count);
  898. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
  899. buffer->impl.vertex_buffer_view.BufferLocation = buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(buffer->impl.buffer);
  900. }
  901. void gpu_index_buffer_init(gpu_buffer_t *buffer, int count) {
  902. buffer->count = count;
  903. buffer->impl.index_buffer_view.SizeInBytes = count * 4;
  904. buffer->impl.index_buffer_view.Format = DXGI_FORMAT_R32_UINT;
  905. buffer->impl.buffer = NULL;
  906. }
  907. void *gpu_index_buffer_lock(gpu_buffer_t *buffer) {
  908. _gpu_buffer_init(&buffer->impl.buffer, buffer->count * 4, D3D12_HEAP_TYPE_UPLOAD);
  909. D3D12_RANGE range = {
  910. .Begin = 0,
  911. .End = buffer->count * 4,
  912. };
  913. void *p;
  914. buffer->impl.buffer->lpVtbl->Map(buffer->impl.buffer, 0, &range, &p);
  915. return p;
  916. }
  917. void gpu_index_buffer_unlock(gpu_buffer_t *buffer) {
  918. D3D12_RANGE range = {
  919. .Begin = 0,
  920. .End = buffer->count * 4,
  921. };
  922. buffer->impl.buffer->lpVtbl->Unmap(buffer->impl.buffer, 0, &range);
  923. ID3D12Resource *upload_buffer = buffer->impl.buffer;
  924. _gpu_buffer_init(&buffer->impl.buffer, buffer->count * 4, D3D12_HEAP_TYPE_DEFAULT);
  925. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
  926. command_list->lpVtbl->CopyBufferRegion(command_list, buffer->impl.buffer, 0, upload_buffer, 0, buffer->count * 4);
  927. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_INDEX_BUFFER);
  928. buffer->impl.index_buffer_view.BufferLocation = buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(buffer->impl.buffer);
  929. }
  930. void gpu_constant_buffer_init(gpu_buffer_t *buffer, int size) {
  931. buffer->count = size;
  932. buffer->data = NULL;
  933. buffer->impl.buffer = NULL;
  934. _gpu_buffer_init(&buffer->impl.buffer, size, D3D12_HEAP_TYPE_UPLOAD);
  935. }
  936. void gpu_constant_buffer_destroy(gpu_buffer_t *buffer) {
  937. buffer->impl.buffer->lpVtbl->Release(buffer->impl.buffer);
  938. }
  939. void gpu_constant_buffer_lock(gpu_buffer_t *buffer, int start, int count) {
  940. buffer->impl.last_start = start;
  941. buffer->impl.last_count = count;
  942. D3D12_RANGE range = {
  943. .Begin = start,
  944. .End = start + count,
  945. };
  946. uint8_t *p;
  947. buffer->impl.buffer->lpVtbl->Map(buffer->impl.buffer, 0, &range, (void **)&p);
  948. buffer->data = &p[start];
  949. }
  950. void gpu_constant_buffer_unlock(gpu_buffer_t *buffer) {
  951. D3D12_RANGE range = {
  952. .Begin = buffer->impl.last_start,
  953. .End = buffer->impl.last_start + buffer->impl.last_count,
  954. };
  955. buffer->impl.buffer->lpVtbl->Unmap(buffer->impl.buffer, 0, &range);
  956. buffer->data = NULL;
  957. }
  958. void gpu_buffer_destroy(gpu_buffer_t *buffer) {
  959. buffer->impl.buffer->lpVtbl->Release(buffer->impl.buffer);
  960. buffer->impl.buffer = NULL;
  961. }
  962. static const wchar_t *hit_group_name = L"hitgroup";
  963. static const wchar_t *raygen_shader_name = L"raygeneration";
  964. static const wchar_t *closesthit_shader_name = L"closesthit";
  965. static const wchar_t *miss_shader_name = L"miss";
  966. typedef struct inst {
  967. iron_matrix4x4_t m;
  968. int i;
  969. } inst_t;
  970. static ID3D12Device5 *dxrDevice = NULL;
  971. static ID3D12GraphicsCommandList4 *dxrCommandList = NULL;
  972. static ID3D12RootSignature *dxrRootSignature = NULL;
  973. static ID3D12DescriptorHeap *descriptorHeap = NULL;
  974. static gpu_raytrace_acceleration_structure_t *accel;
  975. static gpu_raytrace_pipeline_t *pipeline;
  976. static gpu_texture_t *output = NULL;
  977. static D3D12_CPU_DESCRIPTOR_HANDLE outputCpuDescriptor;
  978. static D3D12_GPU_DESCRIPTOR_HANDLE outputDescriptorHandle;
  979. static D3D12_GPU_DESCRIPTOR_HANDLE vbgpuDescriptorHandle;
  980. static D3D12_GPU_DESCRIPTOR_HANDLE ibgpuDescriptorHandle;
  981. static D3D12_GPU_DESCRIPTOR_HANDLE tex0gpuDescriptorHandle;
  982. static D3D12_GPU_DESCRIPTOR_HANDLE tex1gpuDescriptorHandle;
  983. static D3D12_GPU_DESCRIPTOR_HANDLE tex2gpuDescriptorHandle;
  984. static D3D12_GPU_DESCRIPTOR_HANDLE texenvgpuDescriptorHandle;
  985. static D3D12_GPU_DESCRIPTOR_HANDLE texsobolgpuDescriptorHandle;
  986. static D3D12_GPU_DESCRIPTOR_HANDLE texscramblegpuDescriptorHandle;
  987. static D3D12_GPU_DESCRIPTOR_HANDLE texrankgpuDescriptorHandle;
  988. static int descriptorsAllocated = 0;
  989. static UINT descriptorSize;
  990. static gpu_buffer_t *vb[16];
  991. static gpu_buffer_t *vb_last[16];
  992. static gpu_buffer_t *ib[16];
  993. static int vb_count = 0;
  994. static int vb_count_last = 0;
  995. static inst_t instances[1024];
  996. static int instances_count = 0;
  997. void gpu_raytrace_pipeline_init(gpu_raytrace_pipeline_t *pipeline, void *ray_shader, int ray_shader_size, gpu_buffer_t *constant_buffer) {
  998. output = NULL;
  999. descriptorsAllocated = 0;
  1000. pipeline->_constant_buffer = constant_buffer;
  1001. // Descriptor heap
  1002. // Allocate a heap for 3 descriptors:
  1003. // 2 - bottom and top level acceleration structure
  1004. // 1 - raytracing output texture SRV
  1005. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  1006. .NumDescriptors = 12,
  1007. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  1008. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
  1009. .NodeMask = 0,
  1010. };
  1011. if (descriptorHeap != NULL) {
  1012. descriptorHeap->lpVtbl->Release(descriptorHeap);
  1013. }
  1014. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &descriptorHeap);
  1015. descriptorSize = device->lpVtbl->GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1016. // Device
  1017. if (dxrDevice != NULL) {
  1018. dxrDevice->lpVtbl->Release(dxrDevice);
  1019. }
  1020. device->lpVtbl->QueryInterface(device, &IID_ID3D12Device5, &dxrDevice);
  1021. if (dxrCommandList != NULL) {
  1022. dxrCommandList->lpVtbl->Release(dxrCommandList);
  1023. }
  1024. command_list->lpVtbl->QueryInterface(command_list , &IID_ID3D12GraphicsCommandList4, &dxrCommandList);
  1025. // Root signatures
  1026. // This is a root signature that is shared across all raytracing shaders invoked during a DispatchRays() call.
  1027. D3D12_DESCRIPTOR_RANGE UAVDescriptor = {
  1028. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
  1029. .NumDescriptors = 1,
  1030. .BaseShaderRegister = 0,
  1031. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1032. };
  1033. D3D12_DESCRIPTOR_RANGE SRVDescriptorA = {
  1034. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1035. .NumDescriptors = 1,
  1036. .BaseShaderRegister = 1,
  1037. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1038. };
  1039. D3D12_DESCRIPTOR_RANGE SRVDescriptorB = {
  1040. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1041. .NumDescriptors = 1,
  1042. .BaseShaderRegister = 2,
  1043. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1044. };
  1045. D3D12_DESCRIPTOR_RANGE SRVDescriptor0 = {
  1046. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1047. .NumDescriptors = 1,
  1048. .BaseShaderRegister = 3,
  1049. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1050. };
  1051. D3D12_DESCRIPTOR_RANGE SRVDescriptor1 = {
  1052. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1053. .NumDescriptors = 1,
  1054. .BaseShaderRegister = 4,
  1055. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1056. };
  1057. D3D12_DESCRIPTOR_RANGE SRVDescriptor2 = {
  1058. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1059. .NumDescriptors = 1,
  1060. .BaseShaderRegister = 5,
  1061. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1062. };
  1063. D3D12_DESCRIPTOR_RANGE SRVDescriptorEnv = {
  1064. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1065. .NumDescriptors = 1,
  1066. .BaseShaderRegister = 6,
  1067. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1068. };
  1069. D3D12_DESCRIPTOR_RANGE SRVDescriptorSobol = {
  1070. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1071. .NumDescriptors = 1,
  1072. .BaseShaderRegister = 7,
  1073. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1074. };
  1075. D3D12_DESCRIPTOR_RANGE SRVDescriptorScramble = {
  1076. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1077. .NumDescriptors = 1,
  1078. .BaseShaderRegister = 8,
  1079. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1080. };
  1081. D3D12_DESCRIPTOR_RANGE SRVDescriptorRank = {
  1082. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  1083. .NumDescriptors = 1,
  1084. .BaseShaderRegister = 9,
  1085. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  1086. };
  1087. D3D12_ROOT_PARAMETER rootParameters[12] = {};
  1088. // Output view
  1089. rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1090. rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1091. rootParameters[0].DescriptorTable.NumDescriptorRanges = 1;
  1092. rootParameters[0].DescriptorTable.pDescriptorRanges = &UAVDescriptor;
  1093. // Acceleration structure
  1094. rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
  1095. rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1096. rootParameters[1].Descriptor.ShaderRegister = 0;
  1097. // Constant buffer
  1098. rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1099. rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1100. rootParameters[2].DescriptorTable.NumDescriptorRanges = 1;
  1101. rootParameters[2].DescriptorTable.pDescriptorRanges = &SRVDescriptorA;
  1102. rootParameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1103. rootParameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1104. rootParameters[3].DescriptorTable.NumDescriptorRanges = 1;
  1105. rootParameters[3].DescriptorTable.pDescriptorRanges = &SRVDescriptorB;
  1106. rootParameters[4].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
  1107. rootParameters[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1108. rootParameters[4].Descriptor.ShaderRegister = 0;
  1109. rootParameters[5].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1110. rootParameters[5].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1111. rootParameters[5].DescriptorTable.NumDescriptorRanges = 1;
  1112. rootParameters[5].DescriptorTable.pDescriptorRanges = &SRVDescriptor0;
  1113. rootParameters[6].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1114. rootParameters[6].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1115. rootParameters[6].DescriptorTable.NumDescriptorRanges = 1;
  1116. rootParameters[6].DescriptorTable.pDescriptorRanges = &SRVDescriptor1;
  1117. rootParameters[7].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1118. rootParameters[7].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1119. rootParameters[7].DescriptorTable.NumDescriptorRanges = 1;
  1120. rootParameters[7].DescriptorTable.pDescriptorRanges = &SRVDescriptor2;
  1121. rootParameters[8].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1122. rootParameters[8].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1123. rootParameters[8].DescriptorTable.NumDescriptorRanges = 1;
  1124. rootParameters[8].DescriptorTable.pDescriptorRanges = &SRVDescriptorEnv;
  1125. rootParameters[9].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1126. rootParameters[9].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1127. rootParameters[9].DescriptorTable.NumDescriptorRanges = 1;
  1128. rootParameters[9].DescriptorTable.pDescriptorRanges = &SRVDescriptorSobol;
  1129. rootParameters[10].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1130. rootParameters[10].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1131. rootParameters[10].DescriptorTable.NumDescriptorRanges = 1;
  1132. rootParameters[10].DescriptorTable.pDescriptorRanges = &SRVDescriptorScramble;
  1133. rootParameters[11].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  1134. rootParameters[11].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  1135. rootParameters[11].DescriptorTable.NumDescriptorRanges = 1;
  1136. rootParameters[11].DescriptorTable.pDescriptorRanges = &SRVDescriptorRank;
  1137. D3D12_ROOT_SIGNATURE_DESC dxrRootSignatureDesc = {
  1138. .NumParameters = ARRAYSIZE(rootParameters),
  1139. .pParameters = rootParameters,
  1140. };
  1141. ID3DBlob *blob = NULL;
  1142. ID3DBlob *error = NULL;
  1143. D3D12SerializeRootSignature(&dxrRootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, &error);
  1144. if (dxrRootSignature != NULL) {
  1145. dxrRootSignature->lpVtbl->Release(dxrRootSignature);
  1146. }
  1147. device->lpVtbl->CreateRootSignature(device, 1, blob->lpVtbl->GetBufferPointer(blob), blob->lpVtbl->GetBufferSize(blob), &IID_ID3D12RootSignature,
  1148. &dxrRootSignature);
  1149. // Pipeline
  1150. D3D12_STATE_OBJECT_DESC raytracingPipeline = {
  1151. .Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE,
  1152. };
  1153. D3D12_SHADER_BYTECODE shaderBytecode = {
  1154. .pShaderBytecode = ray_shader,
  1155. .BytecodeLength = ray_shader_size,
  1156. };
  1157. D3D12_DXIL_LIBRARY_DESC dxilLibrary = {
  1158. .DXILLibrary = shaderBytecode,
  1159. };
  1160. D3D12_EXPORT_DESC exports[3] = {0};
  1161. exports[0].Name = raygen_shader_name;
  1162. exports[1].Name = closesthit_shader_name;
  1163. exports[2].Name = miss_shader_name;
  1164. dxilLibrary.pExports = exports;
  1165. dxilLibrary.NumExports = 3;
  1166. D3D12_HIT_GROUP_DESC hitGroup = {
  1167. .ClosestHitShaderImport = closesthit_shader_name,
  1168. .HitGroupExport = hit_group_name,
  1169. .Type = D3D12_HIT_GROUP_TYPE_TRIANGLES,
  1170. };
  1171. D3D12_RAYTRACING_SHADER_CONFIG shaderConfig = {
  1172. .MaxPayloadSizeInBytes = 10 * sizeof(float), // float4 color
  1173. .MaxAttributeSizeInBytes = 8 * sizeof(float), // float2 barycentrics
  1174. };
  1175. D3D12_RAYTRACING_PIPELINE_CONFIG pipelineConfig = {
  1176. .MaxTraceRecursionDepth = 1, // ~ primary rays only
  1177. };
  1178. D3D12_STATE_SUBOBJECT subobjects[5] = {};
  1179. subobjects[0].Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY;
  1180. subobjects[0].pDesc = &dxilLibrary;
  1181. subobjects[1].Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP;
  1182. subobjects[1].pDesc = &hitGroup;
  1183. subobjects[2].Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG;
  1184. subobjects[2].pDesc = &shaderConfig;
  1185. subobjects[3].Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE;
  1186. subobjects[3].pDesc = &dxrRootSignature;
  1187. subobjects[4].Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG;
  1188. subobjects[4].pDesc = &pipelineConfig;
  1189. raytracingPipeline.NumSubobjects = 5;
  1190. raytracingPipeline.pSubobjects = subobjects;
  1191. dxrDevice->lpVtbl->CreateStateObject(dxrDevice, &raytracingPipeline, &IID_ID3D12StateObject, &pipeline->impl.dxr_state);
  1192. // Shader tables
  1193. // Get shader identifiers
  1194. ID3D12StateObjectProperties *stateObjectProps = NULL;
  1195. pipeline->impl.dxr_state->lpVtbl->QueryInterface(pipeline->impl.dxr_state , &IID_ID3D12StateObjectProperties, &stateObjectProps);
  1196. const void *rayGenShaderId = stateObjectProps->lpVtbl->GetShaderIdentifier(stateObjectProps, raygen_shader_name);
  1197. const void *missShaderId = stateObjectProps->lpVtbl->GetShaderIdentifier(stateObjectProps, miss_shader_name);
  1198. const void *hitGroupShaderId = stateObjectProps->lpVtbl->GetShaderIdentifier(stateObjectProps, hit_group_name);
  1199. UINT shaderIdSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES;
  1200. int align = D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT;
  1201. // Ray gen shader table
  1202. {
  1203. UINT size = shaderIdSize + constant_buffer->count;
  1204. UINT shaderRecordSize = (size + (align - 1)) & ~(align - 1);
  1205. D3D12_RESOURCE_DESC bufferDesc = {
  1206. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1207. .Width = shaderRecordSize,
  1208. .Height = 1,
  1209. .DepthOrArraySize = 1,
  1210. .MipLevels = 1,
  1211. .Format = DXGI_FORMAT_UNKNOWN,
  1212. .SampleDesc.Count = 1,
  1213. .SampleDesc.Quality = 0,
  1214. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1215. };
  1216. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1217. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1218. .CreationNodeMask = 1,
  1219. .VisibleNodeMask = 1,
  1220. };
  1221. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1222. &IID_ID3D12Resource, &pipeline->impl.raygen_shader_table);
  1223. D3D12_RANGE rstRange = {
  1224. .Begin = 0,
  1225. .End = 0,
  1226. };
  1227. uint8_t *byteDest;
  1228. pipeline->impl.raygen_shader_table->lpVtbl->Map(pipeline->impl.raygen_shader_table, 0, &rstRange, (void **)(&byteDest));
  1229. D3D12_RANGE cbRange = {
  1230. .Begin = 0,
  1231. .End = constant_buffer->count,
  1232. };
  1233. void *constantBufferData;
  1234. constant_buffer->impl.buffer->lpVtbl->Map(constant_buffer->impl.buffer, 0, &cbRange, (void **)&constantBufferData);
  1235. memcpy(byteDest, rayGenShaderId, size);
  1236. memcpy(byteDest + size, constantBufferData, constant_buffer->count);
  1237. pipeline->impl.raygen_shader_table->lpVtbl->Unmap(pipeline->impl.raygen_shader_table, 0, NULL);
  1238. }
  1239. // Miss shader table
  1240. {
  1241. UINT size = shaderIdSize;
  1242. UINT shaderRecordSize = (size + (align - 1)) & ~(align - 1);
  1243. D3D12_RESOURCE_DESC bufferDesc = {
  1244. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1245. .Width = shaderRecordSize,
  1246. .Height = 1,
  1247. .DepthOrArraySize = 1,
  1248. .MipLevels = 1,
  1249. .Format = DXGI_FORMAT_UNKNOWN,
  1250. .SampleDesc.Count = 1,
  1251. .SampleDesc.Quality = 0,
  1252. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1253. };
  1254. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1255. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1256. .CreationNodeMask = 1,
  1257. .VisibleNodeMask = 1,
  1258. };
  1259. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1260. &IID_ID3D12Resource, &pipeline->impl.miss_shader_table);
  1261. D3D12_RANGE mstRange = {
  1262. .Begin = 0,
  1263. .End = 0,
  1264. };
  1265. uint8_t *byteDest;
  1266. pipeline->impl.miss_shader_table->lpVtbl->Map(pipeline->impl.miss_shader_table, 0, &mstRange, (void **)(&byteDest));
  1267. memcpy(byteDest, missShaderId, size);
  1268. pipeline->impl.miss_shader_table->lpVtbl->Unmap(pipeline->impl.miss_shader_table, 0, NULL);
  1269. }
  1270. // Hit group shader table
  1271. {
  1272. UINT size = shaderIdSize;
  1273. UINT shaderRecordSize = (size + (align - 1)) & ~(align - 1);
  1274. D3D12_RESOURCE_DESC bufferDesc = {
  1275. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1276. .Width = shaderRecordSize,
  1277. .Height = 1,
  1278. .DepthOrArraySize = 1,
  1279. .MipLevels = 1,
  1280. .Format = DXGI_FORMAT_UNKNOWN,
  1281. .SampleDesc.Count = 1,
  1282. .SampleDesc.Quality = 0,
  1283. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1284. };
  1285. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1286. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1287. .CreationNodeMask = 1,
  1288. .VisibleNodeMask = 1,
  1289. };
  1290. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1291. &IID_ID3D12Resource, &pipeline->impl.hitgroup_shader_table);
  1292. D3D12_RANGE hstRange = {
  1293. .Begin = 0,
  1294. .End = 0,
  1295. };
  1296. uint8_t *byteDest;
  1297. pipeline->impl.hitgroup_shader_table->lpVtbl->Map(pipeline->impl.hitgroup_shader_table, 0, &hstRange, (void **)(&byteDest));
  1298. memcpy(byteDest, hitGroupShaderId, size);
  1299. pipeline->impl.hitgroup_shader_table->lpVtbl->Unmap(pipeline->impl.hitgroup_shader_table, 0, NULL);
  1300. }
  1301. // Output descriptor
  1302. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1303. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1304. outputCpuDescriptor.ptr = handle.ptr + (INT64)(descriptorsAllocated) * (UINT64)(descriptorSize);
  1305. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1306. int descriptorHeapIndex = descriptorsAllocated++;
  1307. outputDescriptorHandle.ptr = handle.ptr + (INT64)(descriptorHeapIndex) * (UINT64)(descriptorSize);
  1308. }
  1309. void gpu_raytrace_pipeline_destroy(gpu_raytrace_pipeline_t *pipeline) {
  1310. pipeline->impl.dxr_state->lpVtbl->Release(pipeline->impl.dxr_state);
  1311. pipeline->impl.raygen_shader_table->lpVtbl->Release(pipeline->impl.raygen_shader_table);
  1312. pipeline->impl.miss_shader_table->lpVtbl->Release(pipeline->impl.miss_shader_table);
  1313. pipeline->impl.hitgroup_shader_table->lpVtbl->Release(pipeline->impl.hitgroup_shader_table);
  1314. }
  1315. UINT create_srv_vb(gpu_buffer_t *vb, UINT numElements, UINT elementSize) {
  1316. D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {
  1317. .ViewDimension = D3D12_SRV_DIMENSION_BUFFER,
  1318. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  1319. .Buffer.NumElements = numElements,
  1320. };
  1321. if (elementSize == 0) {
  1322. srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
  1323. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
  1324. srvDesc.Buffer.StructureByteStride = 0;
  1325. }
  1326. else {
  1327. srvDesc.Format = DXGI_FORMAT_UNKNOWN;
  1328. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
  1329. srvDesc.Buffer.StructureByteStride = elementSize;
  1330. }
  1331. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1332. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1333. D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = {
  1334. .ptr = handle.ptr + (INT64)(descriptorsAllocated) * (UINT64)(descriptorSize),
  1335. };
  1336. UINT descriptorIndex = descriptorsAllocated++;
  1337. device->lpVtbl->CreateShaderResourceView(device, vb->impl.buffer, &srvDesc, cpuDescriptor);
  1338. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1339. vbgpuDescriptorHandle.ptr = handle.ptr + (INT64)(descriptorIndex) * (UINT64)(descriptorSize);
  1340. return descriptorIndex;
  1341. }
  1342. UINT create_srv_ib(gpu_buffer_t *ib, UINT numElements, UINT elementSize) {
  1343. D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {
  1344. .ViewDimension = D3D12_SRV_DIMENSION_BUFFER,
  1345. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  1346. .Buffer.NumElements = numElements,
  1347. };
  1348. if (elementSize == 0) {
  1349. srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
  1350. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
  1351. srvDesc.Buffer.StructureByteStride = 0;
  1352. }
  1353. else {
  1354. srvDesc.Format = DXGI_FORMAT_UNKNOWN;
  1355. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
  1356. srvDesc.Buffer.StructureByteStride = elementSize;
  1357. }
  1358. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1359. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1360. D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = {
  1361. .ptr = handle.ptr + (INT64)(descriptorsAllocated) * (UINT64)(descriptorSize),
  1362. };
  1363. UINT descriptorIndex = descriptorsAllocated++;
  1364. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1365. device->lpVtbl->CreateShaderResourceView(device, ib->impl.buffer, &srvDesc, cpuDescriptor);
  1366. ibgpuDescriptorHandle.ptr = handle.ptr + (INT64)(descriptorIndex) * (UINT64)(descriptorSize);
  1367. return descriptorIndex;
  1368. }
  1369. void gpu_raytrace_acceleration_structure_init(gpu_raytrace_acceleration_structure_t *accel) {
  1370. vb_count = 0;
  1371. instances_count = 0;
  1372. }
  1373. void gpu_raytrace_acceleration_structure_add(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *_vb, gpu_buffer_t *_ib,
  1374. iron_matrix4x4_t _transform) {
  1375. int vb_i = -1;
  1376. for (int i = 0; i < vb_count; ++i) {
  1377. if (_vb == vb[i]) {
  1378. vb_i = i;
  1379. break;
  1380. }
  1381. }
  1382. if (vb_i == -1) {
  1383. vb_i = vb_count;
  1384. vb[vb_count] = _vb;
  1385. ib[vb_count] = _ib;
  1386. vb_count++;
  1387. }
  1388. inst_t inst = { .i = vb_i, .m = _transform };
  1389. instances[instances_count] = inst;
  1390. instances_count++;
  1391. }
  1392. void _gpu_raytrace_acceleration_structure_destroy_bottom(gpu_raytrace_acceleration_structure_t *accel) {
  1393. for (int i = 0; i < vb_count_last; ++i) {
  1394. accel->impl.bottom_level_accel[i]->lpVtbl->Release(accel->impl.bottom_level_accel[i]);
  1395. }
  1396. }
  1397. void _gpu_raytrace_acceleration_structure_destroy_top(gpu_raytrace_acceleration_structure_t *accel) {
  1398. accel->impl.top_level_accel->lpVtbl->Release(accel->impl.top_level_accel);
  1399. }
  1400. void gpu_raytrace_acceleration_structure_build(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *_vb_full, gpu_buffer_t *_ib_full) {
  1401. bool build_bottom = false;
  1402. for (int i = 0; i < 16; ++i) {
  1403. if (vb_last[i] != vb[i]) {
  1404. build_bottom = true;
  1405. }
  1406. vb_last[i] = vb[i];
  1407. }
  1408. if (vb_count_last > 0) {
  1409. if (build_bottom) {
  1410. _gpu_raytrace_acceleration_structure_destroy_bottom(accel);
  1411. }
  1412. _gpu_raytrace_acceleration_structure_destroy_top(accel);
  1413. }
  1414. vb_count_last = vb_count;
  1415. if (vb_count == 0) {
  1416. return;
  1417. }
  1418. descriptorsAllocated = 1; // 1 descriptor already allocated in gpu_raytrace_pipeline_init
  1419. #ifdef is_forge
  1420. create_srv_ib(_ib_full, _ib_full->count, 0);
  1421. create_srv_vb(_vb_full, _vb_full->count, vb[0]->stride);
  1422. #else
  1423. create_srv_ib(ib[0], ib[0]->count, 0);
  1424. create_srv_vb(vb[0], vb[0]->count, vb[0]->stride);
  1425. #endif
  1426. // Reset the command list for the acceleration structure construction
  1427. command_list->lpVtbl->Reset(command_list, command_allocator, NULL);
  1428. // Get required sizes for an acceleration structure
  1429. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS topLevelInputs = {
  1430. .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY,
  1431. .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE,
  1432. .NumDescs = 1,
  1433. .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL,
  1434. };
  1435. D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO topLevelPrebuildInfo = {0};
  1436. dxrDevice->lpVtbl->GetRaytracingAccelerationStructurePrebuildInfo(dxrDevice, &topLevelInputs, &topLevelPrebuildInfo);
  1437. UINT64 scratch_size = topLevelPrebuildInfo.ScratchDataSizeInBytes;
  1438. // Bottom AS
  1439. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS bottomLevelInputs[16];
  1440. D3D12_RAYTRACING_GEOMETRY_DESC geometryDescs[16];
  1441. if (build_bottom) {
  1442. for (int i = 0; i < vb_count; ++i) {
  1443. D3D12_RAYTRACING_GEOMETRY_DESC geometryDesc = {
  1444. .Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES,
  1445. .Triangles.IndexBuffer = ib[i]->impl.buffer->lpVtbl->GetGPUVirtualAddress(ib[i]->impl.buffer),
  1446. .Triangles.IndexCount = ib[i]->count,
  1447. .Triangles.IndexFormat = DXGI_FORMAT_R32_UINT,
  1448. .Triangles.Transform3x4 = 0,
  1449. .Triangles.VertexFormat = DXGI_FORMAT_R16G16B16A16_SNORM,
  1450. .Triangles.VertexCount = vb[i]->count,
  1451. };
  1452. D3D12_RESOURCE_DESC desc;
  1453. vb[i]->impl.buffer->lpVtbl->GetDesc(vb[i]->impl.buffer, &desc);
  1454. geometryDesc.Triangles.VertexBuffer.StartAddress = vb[i]->impl.buffer->lpVtbl->GetGPUVirtualAddress(vb[i]->impl.buffer);
  1455. geometryDesc.Triangles.VertexBuffer.StrideInBytes = desc.Width / vb[i]->count;
  1456. geometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE;
  1457. geometryDescs[i] = geometryDesc;
  1458. D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO bottomLevelPrebuildInfo = {0};
  1459. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {
  1460. .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY,
  1461. .NumDescs = 1,
  1462. .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL,
  1463. .pGeometryDescs = &geometryDescs[i],
  1464. .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE,
  1465. };
  1466. dxrDevice->lpVtbl->GetRaytracingAccelerationStructurePrebuildInfo(dxrDevice, &inputs, &bottomLevelPrebuildInfo);
  1467. bottomLevelInputs[i] = inputs;
  1468. UINT64 blSize = bottomLevelPrebuildInfo.ScratchDataSizeInBytes;
  1469. if (scratch_size < blSize) {
  1470. scratch_size = blSize;
  1471. }
  1472. // Allocate resources for acceleration structures
  1473. // The resources that will contain acceleration structures must be created in the state D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE,
  1474. // and must have resource flag D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS.
  1475. {
  1476. D3D12_RESOURCE_DESC bufferDesc = {
  1477. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1478. .Width = bottomLevelPrebuildInfo.ResultDataMaxSizeInBytes,
  1479. .Height = 1,
  1480. .DepthOrArraySize = 1,
  1481. .MipLevels = 1,
  1482. .Format = DXGI_FORMAT_UNKNOWN,
  1483. .SampleDesc.Count = 1,
  1484. .SampleDesc.Quality = 0,
  1485. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1486. .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1487. };
  1488. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1489. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1490. .CreationNodeMask = 1,
  1491. .VisibleNodeMask = 1,
  1492. };
  1493. device->lpVtbl->CreateCommittedResource(dxrDevice, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, NULL,
  1494. &IID_ID3D12Resource, &accel->impl.bottom_level_accel[i]);
  1495. }
  1496. }
  1497. }
  1498. // Create scratch memory
  1499. ID3D12Resource *scratchResource;
  1500. {
  1501. D3D12_RESOURCE_DESC bufferDesc = {
  1502. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1503. .Width = scratch_size,
  1504. .Height = 1,
  1505. .DepthOrArraySize = 1,
  1506. .MipLevels = 1,
  1507. .Format = DXGI_FORMAT_UNKNOWN,
  1508. .SampleDesc.Count = 1,
  1509. .SampleDesc.Quality = 0,
  1510. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1511. .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1512. };
  1513. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1514. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1515. .CreationNodeMask = 1,
  1516. .VisibleNodeMask = 1,
  1517. };
  1518. device->lpVtbl->CreateCommittedResource(dxrDevice, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL,
  1519. &IID_ID3D12Resource, &scratchResource);
  1520. }
  1521. // Bottom AS
  1522. if (build_bottom) {
  1523. for (int i = 0; i < vb_count; ++i) {
  1524. // Bottom Level Acceleration Structure desc
  1525. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC bottomLevelBuildDesc = {
  1526. .Inputs = bottomLevelInputs[i],
  1527. .ScratchAccelerationStructureData = scratchResource->lpVtbl->GetGPUVirtualAddress(scratchResource),
  1528. .DestAccelerationStructureData = accel->impl.bottom_level_accel[i]->lpVtbl->GetGPUVirtualAddress(accel->impl.bottom_level_accel[i]),
  1529. };
  1530. // Build acceleration structure
  1531. dxrCommandList->lpVtbl->BuildRaytracingAccelerationStructure(dxrCommandList, &bottomLevelBuildDesc, 0, NULL);
  1532. }
  1533. }
  1534. // Top AS
  1535. {
  1536. D3D12_RESOURCE_DESC bufferDesc = {
  1537. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1538. .Width = topLevelPrebuildInfo.ResultDataMaxSizeInBytes,
  1539. .Height = 1,
  1540. .DepthOrArraySize = 1,
  1541. .MipLevels = 1,
  1542. .Format = DXGI_FORMAT_UNKNOWN,
  1543. .SampleDesc.Count = 1,
  1544. .SampleDesc.Quality = 0,
  1545. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1546. .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1547. };
  1548. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1549. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1550. .CreationNodeMask = 1,
  1551. .VisibleNodeMask = 1,
  1552. };
  1553. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, NULL,
  1554. &IID_ID3D12Resource, &accel->impl.top_level_accel);
  1555. }
  1556. // Create an instance desc for the bottom-level acceleration structure
  1557. D3D12_RESOURCE_DESC bufferDesc = {
  1558. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1559. .Width = sizeof(D3D12_RAYTRACING_INSTANCE_DESC) * instances_count,
  1560. .Height = 1,
  1561. .DepthOrArraySize = 1,
  1562. .MipLevels = 1,
  1563. .Format = DXGI_FORMAT_UNKNOWN,
  1564. .SampleDesc.Count = 1,
  1565. .SampleDesc.Quality = 0,
  1566. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1567. };
  1568. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1569. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1570. .CreationNodeMask = 1,
  1571. .VisibleNodeMask = 1,
  1572. };
  1573. ID3D12Resource *instanceDescs;
  1574. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1575. &IID_ID3D12Resource, &instanceDescs);
  1576. void *mappedData;
  1577. instanceDescs->lpVtbl->Map(instanceDescs, 0, NULL, &mappedData);
  1578. for (int i = 0; i < instances_count; ++i) {
  1579. D3D12_RAYTRACING_INSTANCE_DESC instanceDesc = {0};
  1580. instanceDesc.Transform[0][0] = instances[i].m.m[0];
  1581. instanceDesc.Transform[0][1] = instances[i].m.m[1];
  1582. instanceDesc.Transform[0][2] = instances[i].m.m[2];
  1583. instanceDesc.Transform[0][3] = instances[i].m.m[3];
  1584. instanceDesc.Transform[1][0] = instances[i].m.m[4];
  1585. instanceDesc.Transform[1][1] = instances[i].m.m[5];
  1586. instanceDesc.Transform[1][2] = instances[i].m.m[6];
  1587. instanceDesc.Transform[1][3] = instances[i].m.m[7];
  1588. instanceDesc.Transform[2][0] = instances[i].m.m[8];
  1589. instanceDesc.Transform[2][1] = instances[i].m.m[9];
  1590. instanceDesc.Transform[2][2] = instances[i].m.m[10];
  1591. instanceDesc.Transform[2][3] = instances[i].m.m[11];
  1592. int ib_off = 0;
  1593. for (int j = 0; j < instances[i].i; ++j) {
  1594. ib_off += ib[j]->count * 4;
  1595. }
  1596. instanceDesc.InstanceID = ib_off;
  1597. instanceDesc.InstanceMask = 1;
  1598. instanceDesc.AccelerationStructure =
  1599. accel->impl.bottom_level_accel[instances[i].i]->lpVtbl->GetGPUVirtualAddress(accel->impl.bottom_level_accel[instances[i].i]);
  1600. memcpy((uint8_t *)mappedData + i * sizeof(D3D12_RAYTRACING_INSTANCE_DESC), &instanceDesc, sizeof(D3D12_RAYTRACING_INSTANCE_DESC));
  1601. }
  1602. instanceDescs->lpVtbl->Unmap(instanceDescs, 0, NULL);
  1603. // Top Level Acceleration Structure desc
  1604. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC topLevelBuildDesc = {0};
  1605. topLevelInputs.InstanceDescs = instanceDescs->lpVtbl->GetGPUVirtualAddress(instanceDescs);
  1606. topLevelBuildDesc.Inputs = topLevelInputs;
  1607. topLevelBuildDesc.DestAccelerationStructureData = accel->impl.top_level_accel->lpVtbl->GetGPUVirtualAddress(accel->impl.top_level_accel);
  1608. topLevelBuildDesc.ScratchAccelerationStructureData = scratchResource->lpVtbl->GetGPUVirtualAddress(scratchResource);
  1609. D3D12_RESOURCE_BARRIER barrier = {
  1610. .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
  1611. .UAV.pResource = accel->impl.bottom_level_accel[0],
  1612. };
  1613. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  1614. dxrCommandList->lpVtbl->BuildRaytracingAccelerationStructure(dxrCommandList, &topLevelBuildDesc, 0, NULL);
  1615. gpu_execute_and_wait();
  1616. scratchResource->lpVtbl->Release(scratchResource);
  1617. instanceDescs->lpVtbl->Release(instanceDescs);
  1618. }
  1619. void gpu_raytrace_acceleration_structure_destroy(gpu_raytrace_acceleration_structure_t *accel) {
  1620. // accel->impl.bottom_level_accel->Release();
  1621. // accel->impl.top_level_accel->Release();
  1622. }
  1623. void gpu_raytrace_set_textures(gpu_texture_t *texpaint0, gpu_texture_t *texpaint1, gpu_texture_t *texpaint2, gpu_texture_t *texenv, gpu_texture_t *texsobol, gpu_texture_t *texscramble, gpu_texture_t *texrank) {
  1624. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1625. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1626. D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = {0};
  1627. cpuDescriptor.ptr = handle.ptr + 5 * (UINT64)(descriptorSize);
  1628. D3D12_CPU_DESCRIPTOR_HANDLE sourceCpu;
  1629. texpaint0->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texpaint0->impl.srv_descriptor_heap, &sourceCpu);
  1630. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1631. D3D12_GPU_DESCRIPTOR_HANDLE ghandle;
  1632. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1633. tex0gpuDescriptorHandle.ptr = ghandle.ptr + 5 * (UINT64)(descriptorSize);
  1634. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1635. cpuDescriptor.ptr = handle.ptr + 6 * (UINT64)(descriptorSize);
  1636. texpaint1->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texpaint1->impl.srv_descriptor_heap, &sourceCpu);
  1637. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1638. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1639. tex1gpuDescriptorHandle.ptr = ghandle.ptr + 6 * (UINT64)(descriptorSize);
  1640. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1641. cpuDescriptor.ptr = handle.ptr + 7 * (UINT64)(descriptorSize);
  1642. texpaint2->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texpaint2->impl.srv_descriptor_heap, &sourceCpu);
  1643. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1644. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1645. tex2gpuDescriptorHandle.ptr = ghandle.ptr + 7 * (UINT64)(descriptorSize);
  1646. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1647. cpuDescriptor.ptr = handle.ptr + 8 * (UINT64)(descriptorSize);
  1648. texenv->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texenv->impl.srv_descriptor_heap, &sourceCpu);
  1649. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1650. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1651. texenvgpuDescriptorHandle.ptr = ghandle.ptr + 8 * (UINT64)(descriptorSize);
  1652. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1653. cpuDescriptor.ptr = handle.ptr + 9 * (UINT64)(descriptorSize);
  1654. texsobol->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texsobol->impl.srv_descriptor_heap, &sourceCpu);
  1655. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1656. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1657. texsobolgpuDescriptorHandle.ptr = ghandle.ptr + 9 * (UINT64)(descriptorSize);
  1658. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1659. cpuDescriptor.ptr = handle.ptr + 10 * (UINT64)(descriptorSize);
  1660. texscramble->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texscramble->impl.srv_descriptor_heap , &sourceCpu);
  1661. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1662. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1663. texscramblegpuDescriptorHandle.ptr = ghandle.ptr + 10 * (UINT64)(descriptorSize);
  1664. descriptorHeap->lpVtbl->GetCPUDescriptorHandleForHeapStart(descriptorHeap, &handle);
  1665. cpuDescriptor.ptr = handle.ptr + 11 * (UINT64)(descriptorSize);
  1666. texrank->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texrank->impl.srv_descriptor_heap , &sourceCpu);
  1667. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1668. descriptorHeap->lpVtbl->GetGPUDescriptorHandleForHeapStart(descriptorHeap, &ghandle);
  1669. texrankgpuDescriptorHandle.ptr = ghandle.ptr + 11 * (UINT64)(descriptorSize);
  1670. }
  1671. void gpu_raytrace_set_acceleration_structure(gpu_raytrace_acceleration_structure_t *_accel) {
  1672. accel = _accel;
  1673. }
  1674. void gpu_raytrace_set_pipeline(gpu_raytrace_pipeline_t *_pipeline) {
  1675. pipeline = _pipeline;
  1676. }
  1677. void gpu_raytrace_set_target(gpu_texture_t *_output) {
  1678. if (_output != output) {
  1679. _output->impl.image->lpVtbl->Release(_output->impl.image);
  1680. _output->impl.rtv_descriptor_heap->lpVtbl->Release(_output->impl.rtv_descriptor_heap);
  1681. _output->impl.srv_descriptor_heap->lpVtbl->Release(_output->impl.srv_descriptor_heap);
  1682. D3D12_HEAP_PROPERTIES heap_properties = {
  1683. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1684. .CreationNodeMask = 1,
  1685. .VisibleNodeMask = 1,
  1686. };
  1687. D3D12_RESOURCE_DESC desc = {
  1688. .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
  1689. .Width = _output->width,
  1690. .Height = _output->height,
  1691. .DepthOrArraySize = 1,
  1692. .MipLevels = 1,
  1693. .Format = DXGI_FORMAT_R16G16B16A16_FLOAT,
  1694. .SampleDesc.Count = 1,
  1695. .SampleDesc.Quality = 0,
  1696. .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
  1697. .Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1698. };
  1699. D3D12_CLEAR_VALUE clear_value;
  1700. clear_value.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
  1701. clear_value.Color[0] = 0.0f;
  1702. clear_value.Color[1] = 0.0f;
  1703. clear_value.Color[2] = 0.0f;
  1704. clear_value.Color[3] = 0.0f;
  1705. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &desc,
  1706. D3D12_RESOURCE_STATE_COMMON, &clear_value, &IID_ID3D12Resource, &_output->impl.image);
  1707. D3D12_RENDER_TARGET_VIEW_DESC view = {
  1708. .Format = DXGI_FORMAT_R16G16B16A16_FLOAT,
  1709. .ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D,
  1710. .Texture2D.MipSlice = 0,
  1711. .Texture2D.PlaneSlice = 0,
  1712. };
  1713. D3D12_DESCRIPTOR_HEAP_DESC heap_desc = {
  1714. .NumDescriptors = 1,
  1715. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
  1716. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  1717. };
  1718. device->lpVtbl->CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, &_output->impl.rtv_descriptor_heap);
  1719. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1720. _output->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(_output->impl.rtv_descriptor_heap, &handle);
  1721. device->lpVtbl->CreateRenderTargetView(device, _output->impl.image, &view, handle);
  1722. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  1723. .NumDescriptors = 1,
  1724. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  1725. .NodeMask = 0,
  1726. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  1727. };
  1728. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &_output->impl.srv_descriptor_heap);
  1729. D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
  1730. .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
  1731. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  1732. .Format = DXGI_FORMAT_R16G16B16A16_FLOAT,
  1733. .Texture2D.MipLevels = 1,
  1734. .Texture2D.MostDetailedMip = 0,
  1735. .Texture2D.ResourceMinLODClamp = 0.0f,
  1736. };
  1737. _output->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(_output->impl.srv_descriptor_heap, &handle);
  1738. device->lpVtbl->CreateShaderResourceView(device, _output->impl.image, &srv_desc,
  1739. handle);
  1740. D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {
  1741. .ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D,
  1742. };
  1743. device->lpVtbl->CreateUnorderedAccessView(device, _output->impl.image, NULL, &UAVDesc, outputCpuDescriptor);
  1744. }
  1745. output = _output;
  1746. }
  1747. void gpu_raytrace_dispatch_rays() {
  1748. command_list->lpVtbl->SetComputeRootSignature(command_list, dxrRootSignature);
  1749. // Bind the heaps, acceleration structure and dispatch rays
  1750. command_list->lpVtbl->SetDescriptorHeaps(command_list, 1, &descriptorHeap);
  1751. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 0, outputDescriptorHandle);
  1752. command_list->lpVtbl->SetComputeRootShaderResourceView(command_list, 1, accel->impl.top_level_accel->lpVtbl->GetGPUVirtualAddress(accel->impl.top_level_accel));
  1753. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 2, ibgpuDescriptorHandle);
  1754. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 3, vbgpuDescriptorHandle);
  1755. command_list->lpVtbl->SetComputeRootConstantBufferView(command_list, 4, pipeline->_constant_buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(pipeline->_constant_buffer->impl.buffer));
  1756. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 5, tex0gpuDescriptorHandle);
  1757. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 6, tex1gpuDescriptorHandle);
  1758. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 7, tex2gpuDescriptorHandle);
  1759. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 8, texenvgpuDescriptorHandle);
  1760. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 9, texsobolgpuDescriptorHandle);
  1761. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 10, texscramblegpuDescriptorHandle);
  1762. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 11, texrankgpuDescriptorHandle);
  1763. // Since each shader table has only one shader record, the stride is same as the size.
  1764. D3D12_DISPATCH_RAYS_DESC dispatchDesc = {0};
  1765. D3D12_RESOURCE_DESC desc;
  1766. pipeline->impl.hitgroup_shader_table->lpVtbl->GetDesc(pipeline->impl.hitgroup_shader_table, &desc);
  1767. dispatchDesc.HitGroupTable.StartAddress = pipeline->impl.hitgroup_shader_table->lpVtbl->GetGPUVirtualAddress(pipeline->impl.hitgroup_shader_table);
  1768. dispatchDesc.HitGroupTable.SizeInBytes = desc.Width;
  1769. dispatchDesc.HitGroupTable.StrideInBytes = dispatchDesc.HitGroupTable.SizeInBytes;
  1770. dispatchDesc.MissShaderTable.StartAddress = pipeline->impl.miss_shader_table->lpVtbl->GetGPUVirtualAddress(pipeline->impl.miss_shader_table);
  1771. pipeline->impl.miss_shader_table->lpVtbl->GetDesc(pipeline->impl.miss_shader_table, &desc);
  1772. dispatchDesc.MissShaderTable.SizeInBytes = desc.Width;
  1773. dispatchDesc.MissShaderTable.StrideInBytes = dispatchDesc.MissShaderTable.SizeInBytes;
  1774. dispatchDesc.RayGenerationShaderRecord.StartAddress = pipeline->impl.raygen_shader_table->lpVtbl->GetGPUVirtualAddress(pipeline->impl.raygen_shader_table);
  1775. pipeline->impl.raygen_shader_table->lpVtbl->GetDesc(pipeline->impl.raygen_shader_table, &desc);
  1776. dispatchDesc.RayGenerationShaderRecord.SizeInBytes = desc.Width;
  1777. dispatchDesc.Width = output->width;
  1778. dispatchDesc.Height = output->height;
  1779. dispatchDesc.Depth = 1;
  1780. dxrCommandList->lpVtbl->SetPipelineState1(dxrCommandList, pipeline->impl.dxr_state);
  1781. dxrCommandList->lpVtbl->DispatchRays(dxrCommandList, &dispatchDesc);
  1782. }