direct3d12_gpu.c 84 KB


  1. #define WIN32_LEAN_AND_MEAN
  2. #include <assert.h>
  3. #include <backends/windows_system.h>
  4. #include <d3d12.h>
  5. #include <dxgi.h>
  6. #include <dxgi1_4.h>
  7. #include <iron_global.h>
  8. #include <iron_gpu.h>
  9. #include <iron_math.h>
  10. #include <iron_system.h>
  11. #include <malloc.h>
  12. #include <math.h>
  13. #include <stdbool.h>
  14. bool gpu_transpose_mat = false;
  15. static ID3D12Device *device = NULL;
  16. static ID3D12CommandQueue *queue;
  17. static IDXGISwapChain *window_swapchain;
  18. static ID3D12RootSignature *root_signature = NULL;
  19. static ID3D12CommandAllocator *command_allocator;
  20. static ID3D12GraphicsCommandList *command_list;
  21. static D3D12_VIEWPORT current_viewport;
  22. static D3D12_RECT current_scissor;
  23. static gpu_buffer_t *current_vb;
  24. static gpu_buffer_t *current_ib;
  25. static D3D12_CPU_DESCRIPTOR_HANDLE target_descriptors[GPU_MAX_TEXTURES];
  26. static D3D12_CPU_DESCRIPTOR_HANDLE depth_handle;
  27. static D3D12_CPU_DESCRIPTOR_HANDLE *current_depth_handle;
  28. static bool window_vsync;
  29. static ID3D12DescriptorHeap *sampler_heap;
  30. static ID3D12DescriptorHeap *srv_heap;
  31. static int srv_heap_index = 0;
  32. static UINT64 fence_value;
  33. static ID3D12Fence *fence;
  34. static HANDLE fence_event;
  35. static UINT64 frame_fence_values[GPU_FRAMEBUFFER_COUNT] = {0, 0};
  36. static bool resized = false;
  37. static ID3D12Resource *readback_buffer = NULL;
  38. static int readback_buffer_size = 0;
  39. static ID3D12Resource *upload_buffer = NULL;
  40. static int upload_buffer_size = 0;
  41. static ID3D12Resource *resources_to_destroy[512];
  42. static int resources_to_destroy_count = 0;
  43. static char device_name[256];
  44. static D3D12_BLEND convert_blend_factor(gpu_blend_t factor) {
  45. switch (factor) {
  46. case GPU_BLEND_ONE:
  47. return D3D12_BLEND_ONE;
  48. case GPU_BLEND_ZERO:
  49. return D3D12_BLEND_ZERO;
  50. case GPU_BLEND_SOURCE_ALPHA:
  51. return D3D12_BLEND_SRC_ALPHA;
  52. case GPU_BLEND_DEST_ALPHA:
  53. return D3D12_BLEND_DEST_ALPHA;
  54. case GPU_BLEND_INV_SOURCE_ALPHA:
  55. return D3D12_BLEND_INV_SRC_ALPHA;
  56. case GPU_BLEND_INV_DEST_ALPHA:
  57. return D3D12_BLEND_INV_DEST_ALPHA;
  58. }
  59. }
  60. static D3D12_CULL_MODE convert_cull_mode(gpu_cull_mode_t cull_mode) {
  61. switch (cull_mode) {
  62. case GPU_CULL_MODE_CLOCKWISE:
  63. return D3D12_CULL_MODE_FRONT;
  64. case GPU_CULL_MODE_COUNTER_CLOCKWISE:
  65. return D3D12_CULL_MODE_BACK;
  66. default:
  67. return D3D12_CULL_MODE_NONE;
  68. }
  69. }
  70. static D3D12_COMPARISON_FUNC convert_compare_mode(gpu_compare_mode_t compare) {
  71. switch (compare) {
  72. default:
  73. case GPU_COMPARE_MODE_ALWAYS:
  74. return D3D12_COMPARISON_FUNC_ALWAYS;
  75. case GPU_COMPARE_MODE_NEVER:
  76. return D3D12_COMPARISON_FUNC_NEVER;
  77. case GPU_COMPARE_MODE_EQUAL:
  78. return D3D12_COMPARISON_FUNC_EQUAL;
  79. case GPU_COMPARE_MODE_LESS:
  80. return D3D12_COMPARISON_FUNC_LESS;
  81. }
  82. }
  83. static DXGI_FORMAT convert_format(gpu_texture_format_t format) {
  84. switch (format) {
  85. case GPU_TEXTURE_FORMAT_RGBA128:
  86. return DXGI_FORMAT_R32G32B32A32_FLOAT;
  87. case GPU_TEXTURE_FORMAT_RGBA64:
  88. return DXGI_FORMAT_R16G16B16A16_FLOAT;
  89. case GPU_TEXTURE_FORMAT_R32:
  90. return DXGI_FORMAT_R32_FLOAT;
  91. case GPU_TEXTURE_FORMAT_R16:
  92. return DXGI_FORMAT_R16_FLOAT;
  93. case GPU_TEXTURE_FORMAT_R8:
  94. return DXGI_FORMAT_R8_UNORM;
  95. case GPU_TEXTURE_FORMAT_D32:
  96. return DXGI_FORMAT_D32_FLOAT;
  97. case GPU_TEXTURE_FORMAT_RGBA32:
  98. default:
  99. return DXGI_FORMAT_R8G8B8A8_UNORM;
  100. }
  101. }
  102. static D3D12_RESOURCE_STATES convert_texture_state(gpu_texture_state_t state) {
  103. switch (state) {
  104. case GPU_TEXTURE_STATE_SHADER_RESOURCE:
  105. return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
  106. case GPU_TEXTURE_STATE_RENDER_TARGET:
  107. return D3D12_RESOURCE_STATE_RENDER_TARGET;
  108. case GPU_TEXTURE_STATE_RENDER_TARGET_DEPTH:
  109. return D3D12_RESOURCE_STATE_DEPTH_WRITE;
  110. case GPU_TEXTURE_STATE_PRESENT:
  111. return D3D12_RESOURCE_STATE_PRESENT;
  112. }
  113. }
  114. static void wait_for_fence(ID3D12Fence *fence, UINT64 completion_value, HANDLE wait_event) {
  115. if (fence->lpVtbl->GetCompletedValue(fence) < completion_value) {
  116. fence->lpVtbl->SetEventOnCompletion(fence, completion_value, wait_event);
  117. WaitForSingleObject(wait_event, INFINITE);
  118. }
  119. }
  120. static void _gpu_barrier(ID3D12Resource *r, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after) {
  121. D3D12_RESOURCE_BARRIER barrier = {
  122. .Transition.pResource = r,
  123. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  124. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  125. .Transition.StateBefore = state_before,
  126. .Transition.StateAfter = state_after,
  127. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  128. };
  129. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  130. }
  131. void gpu_barrier(gpu_texture_t *render_target, gpu_texture_state_t state_after) {
  132. if (render_target->state == state_after) {
  133. return;
  134. }
  135. _gpu_barrier(render_target->impl.image, convert_texture_state(render_target->state), convert_texture_state(state_after));
  136. render_target->state = state_after;
  137. }
  138. void gpu_destroy() {
  139. wait_for_fence(fence, fence_value, fence_event);
  140. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  141. gpu_texture_destroy_internal(&framebuffers[i]);
  142. }
  143. if (framebuffer_depth.width > 0) {
  144. gpu_texture_destroy_internal(&framebuffer_depth);
  145. }
  146. if (readback_buffer != NULL) {
  147. readback_buffer->lpVtbl->Release(readback_buffer);
  148. }
  149. if (upload_buffer != NULL) {
  150. upload_buffer->lpVtbl->Release(upload_buffer);
  151. }
  152. command_list->lpVtbl->Release(command_list);
  153. command_allocator->lpVtbl->Release(command_allocator);
  154. window_swapchain->lpVtbl->Release(window_swapchain);
  155. queue->lpVtbl->Release(queue);
  156. root_signature->lpVtbl->Release(root_signature);
  157. srv_heap->lpVtbl->Release(srv_heap);
  158. fence->lpVtbl->Release(fence);
  159. CloseHandle(fence_event);
  160. device->lpVtbl->Release(device);
  161. }
  162. void gpu_render_target_init2(gpu_texture_t *render_target, int width, int height, gpu_texture_format_t format, int framebuffer_index) {
  163. render_target->width = width;
  164. render_target->height = height;
  165. render_target->format = format;
  166. render_target->state = (framebuffer_index >= 0) ? GPU_TEXTURE_STATE_PRESENT : GPU_TEXTURE_STATE_SHADER_RESOURCE;
  167. render_target->buffer = NULL;
  168. render_target->impl.has_storage_bit = false;
  169. DXGI_FORMAT dxgi_format = convert_format(format);
  170. D3D12_CLEAR_VALUE clear_value;
  171. clear_value.Format = dxgi_format;
  172. clear_value.Color[0] = 0.0f;
  173. clear_value.Color[1] = 0.0f;
  174. clear_value.Color[2] = 0.0f;
  175. clear_value.Color[3] = 0.0f;
  176. clear_value.DepthStencil.Depth = 1.0f;
  177. D3D12_HEAP_PROPERTIES heap_properties = {
  178. .Type = D3D12_HEAP_TYPE_DEFAULT,
  179. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  180. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  181. .CreationNodeMask = 1,
  182. .VisibleNodeMask = 1,
  183. };
  184. D3D12_RESOURCE_DESC resource_desc = {
  185. .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
  186. .Alignment = 0,
  187. .Width = width,
  188. .Height = height,
  189. .DepthOrArraySize = 1,
  190. .MipLevels = 1,
  191. .Format = dxgi_format,
  192. .SampleDesc.Count = 1,
  193. .SampleDesc.Quality = 0,
  194. .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
  195. .Flags = format == GPU_TEXTURE_FORMAT_D32 ? D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL : D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
  196. };
  197. D3D12_DESCRIPTOR_HEAP_DESC heap_desc = {
  198. .NumDescriptors = 1,
  199. .Type = format == GPU_TEXTURE_FORMAT_D32 ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV : D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
  200. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  201. };
  202. device->lpVtbl->CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, &render_target->impl.rtv_descriptor_heap);
  203. if (framebuffer_index >= 0) {
  204. window_swapchain->lpVtbl->GetBuffer(window_swapchain, framebuffer_index, &IID_ID3D12Resource, &render_target->impl.image);
  205. }
  206. else {
  207. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  208. &clear_value, &IID_ID3D12Resource, &render_target->impl.image);
  209. }
  210. D3D12_RENDER_TARGET_VIEW_DESC view_desc = {
  211. .Format = dxgi_format,
  212. .ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D,
  213. .Texture2D.MipSlice = 0,
  214. .Texture2D.PlaneSlice = 0,
  215. };
  216. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  217. render_target->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(render_target->impl.rtv_descriptor_heap, &handle);
  218. if (format == GPU_TEXTURE_FORMAT_D32) {
  219. device->lpVtbl->CreateDepthStencilView(device, render_target->impl.image, NULL, handle);
  220. }
  221. else {
  222. device->lpVtbl->CreateRenderTargetView(device, render_target->impl.image, &view_desc, handle);
  223. }
  224. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  225. .NumDescriptors = 1,
  226. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  227. .NodeMask = 0,
  228. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  229. };
  230. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &render_target->impl.srv_descriptor_heap);
  231. D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
  232. .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
  233. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  234. .Format = format == GPU_TEXTURE_FORMAT_D32 ? DXGI_FORMAT_R32_FLOAT : dxgi_format,
  235. .Texture2D.MipLevels = 1,
  236. .Texture2D.MostDetailedMip = 0,
  237. .Texture2D.ResourceMinLODClamp = 0.0f,
  238. };
  239. render_target->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(render_target->impl.srv_descriptor_heap, &handle);
  240. device->lpVtbl->CreateShaderResourceView(device, render_target->impl.image, &srv_desc, handle);
  241. }
  242. void create_root_signature(bool linear_sampling) {
  243. ID3DBlob *root_blob;
  244. ID3DBlob *error_blob;
  245. D3D12_ROOT_PARAMETER parameters[3] = {0};
  246. D3D12_DESCRIPTOR_RANGE range = {
  247. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
  248. .NumDescriptors = (UINT)GPU_MAX_TEXTURES,
  249. .BaseShaderRegister = 0,
  250. .RegisterSpace = 0,
  251. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  252. };
  253. parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  254. parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  255. parameters[0].DescriptorTable.NumDescriptorRanges = 1;
  256. parameters[0].DescriptorTable.pDescriptorRanges = &range;
  257. parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
  258. parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  259. parameters[1].Descriptor.ShaderRegister = 0;
  260. parameters[1].Descriptor.RegisterSpace = 0;
  261. D3D12_DESCRIPTOR_RANGE sampler_range = {
  262. .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
  263. .NumDescriptors = 1,
  264. .BaseShaderRegister = 0,
  265. .RegisterSpace = 0,
  266. .OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
  267. };
  268. parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
  269. parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
  270. parameters[2].DescriptorTable.NumDescriptorRanges = 1;
  271. parameters[2].DescriptorTable.pDescriptorRanges = &sampler_range;
  272. D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {
  273. .NumParameters = 3,
  274. .pParameters = parameters,
  275. .Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT,
  276. };
  277. D3D12SerializeRootSignature(&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, &root_blob, &error_blob);
  278. device->lpVtbl->CreateRootSignature(device, 0, root_blob->lpVtbl->GetBufferPointer(root_blob), root_blob->lpVtbl->GetBufferSize(root_blob),
  279. &IID_ID3D12RootSignature, &root_signature);
  280. }
  281. void gpu_init_internal(int depth_buffer_bits, bool vsync) {
  282. window_vsync = vsync;
  283. #ifdef _DEBUG
  284. ID3D12Debug *debug_controller = NULL;
  285. if (D3D12GetDebugInterface(&IID_ID3D12Debug, &debug_controller) == S_OK) {
  286. debug_controller->lpVtbl->EnableDebugLayer(debug_controller);
  287. }
  288. #endif
  289. D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, &IID_ID3D12Device, &device);
  290. create_root_signature(true);
  291. D3D12_COMMAND_QUEUE_DESC queue_desc = {
  292. .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
  293. .Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
  294. };
  295. device->lpVtbl->CreateCommandQueue(device, &queue_desc, &IID_ID3D12CommandQueue, &queue);
  296. HWND hwnd = iron_windows_window_handle();
  297. DXGI_SWAP_CHAIN_DESC swapchain_desc = {
  298. .BufferCount = GPU_FRAMEBUFFER_COUNT,
  299. .BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM,
  300. .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT,
  301. .BufferDesc.Width = iron_window_width(),
  302. .BufferDesc.Height = iron_window_height(),
  303. .OutputWindow = hwnd,
  304. .SampleDesc.Count = 1,
  305. .SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD,
  306. .Windowed = true,
  307. };
  308. IDXGIFactory4 *dxgi_factory = NULL;
  309. CreateDXGIFactory1(&IID_IDXGIFactory4, &dxgi_factory);
  310. dxgi_factory->lpVtbl->CreateSwapChain(dxgi_factory, (IUnknown *)queue, &swapchain_desc, &window_swapchain);
  311. fence_value = 0;
  312. fence_event = CreateEvent(NULL, FALSE, FALSE, NULL);
  313. device->lpVtbl->CreateFence(device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, &fence);
  314. gpu_create_framebuffers(depth_buffer_bits);
  315. D3D12_DESCRIPTOR_HEAP_DESC heap_desc = {
  316. .NumDescriptors = GPU_CONSTANT_BUFFER_MULTIPLE,
  317. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  318. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
  319. };
  320. device->lpVtbl->CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, &srv_heap);
  321. D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = {
  322. .NumDescriptors = 1,
  323. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
  324. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
  325. };
  326. device->lpVtbl->CreateDescriptorHeap(device, &sampler_heap_desc, &IID_ID3D12DescriptorHeap, &sampler_heap);
  327. gpu_use_linear_sampling(true);
  328. device->lpVtbl->CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_DIRECT, &IID_ID3D12CommandAllocator, &command_allocator);
  329. device->lpVtbl->CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator, NULL, &IID_ID3D12CommandList, &command_list);
  330. }
  331. void gpu_begin_internal(gpu_clear_t flags, unsigned color, float depth) {
  332. for (int i = 0; i < current_render_targets_count; ++i) {
  333. current_render_targets[i]->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(current_render_targets[i]->impl.rtv_descriptor_heap,
  334. &target_descriptors[i]);
  335. }
  336. if (current_depth_buffer != NULL) {
  337. current_depth_buffer->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(current_depth_buffer->impl.rtv_descriptor_heap,
  338. &depth_handle);
  339. current_depth_handle = &depth_handle;
  340. }
  341. else {
  342. current_depth_handle = NULL;
  343. }
  344. command_list->lpVtbl->OMSetRenderTargets(command_list, current_render_targets_count, &target_descriptors[0], false, current_depth_handle);
  345. gpu_texture_t *target = current_render_targets[0];
  346. gpu_viewport(0, 0, target->width, target->height);
  347. gpu_scissor(0, 0, target->width, target->height);
  348. if (flags & GPU_CLEAR_COLOR) {
  349. float clear_color[] = {((color & 0x00ff0000) >> 16) / 255.0f, ((color & 0x0000ff00) >> 8) / 255.0f, (color & 0x000000ff) / 255.0f,
  350. ((color & 0xff000000) >> 24) / 255.0f};
  351. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  352. target->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(target->impl.rtv_descriptor_heap, &handle);
  353. command_list->lpVtbl->ClearRenderTargetView(command_list, handle, clear_color, 0, NULL);
  354. }
  355. if (flags & GPU_CLEAR_DEPTH && current_depth_buffer != NULL) {
  356. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  357. current_depth_buffer->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(current_depth_buffer->impl.rtv_descriptor_heap, &handle);
  358. command_list->lpVtbl->ClearDepthStencilView(command_list, handle, D3D12_CLEAR_FLAG_DEPTH, depth, 0, 0, NULL);
  359. }
  360. }
  361. void gpu_end_internal() {
  362. for (int i = 0; i < current_render_targets_count; ++i) {
  363. gpu_barrier(current_render_targets[i],
  364. current_render_targets[i] == &framebuffers[framebuffer_index] ? GPU_TEXTURE_STATE_PRESENT : GPU_TEXTURE_STATE_SHADER_RESOURCE);
  365. }
  366. current_render_targets_count = 0;
  367. }
  368. void gpu_execute_and_wait() {
  369. command_list->lpVtbl->Close(command_list);
  370. ID3D12CommandList *command_lists[] = {(ID3D12CommandList *)command_list};
  371. queue->lpVtbl->ExecuteCommandLists(queue, 1, command_lists);
  372. queue->lpVtbl->Signal(queue, fence, ++fence_value);
  373. wait_for_fence(fence, fence_value, fence_event);
  374. command_allocator->lpVtbl->Reset(command_allocator);
  375. command_list->lpVtbl->Reset(command_list, command_allocator, NULL);
  376. if (gpu_in_use) {
  377. command_list->lpVtbl->OMSetRenderTargets(command_list, current_render_targets_count, &target_descriptors[0], false, current_depth_handle);
  378. command_list->lpVtbl->SetPipelineState(command_list, current_pipeline->impl.pipeline);
  379. command_list->lpVtbl->SetGraphicsRootSignature(command_list, root_signature);
  380. command_list->lpVtbl->IASetVertexBuffers(command_list, 0, 1, (D3D12_VERTEX_BUFFER_VIEW *)&current_vb->impl.vertex_buffer_view);
  381. command_list->lpVtbl->IASetIndexBuffer(command_list, (D3D12_INDEX_BUFFER_VIEW *)&current_ib->impl.index_buffer_view);
  382. command_list->lpVtbl->RSSetViewports(command_list, 1, &current_viewport);
  383. command_list->lpVtbl->RSSetScissorRects(command_list, 1, &current_scissor);
  384. }
  385. }
  386. void gpu_present_internal() {
  387. gpu_execute_and_wait();
  388. window_swapchain->lpVtbl->Present(window_swapchain, window_vsync, 0);
  389. queue->lpVtbl->Signal(queue, fence, ++fence_value);
  390. frame_fence_values[framebuffer_index] = fence_value;
  391. framebuffer_index = (framebuffer_index + 1) % GPU_FRAMEBUFFER_COUNT;
  392. wait_for_fence(fence, frame_fence_values[framebuffer_index], fence_event);
  393. if (resized) {
  394. framebuffer_index = 0;
  395. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  396. gpu_texture_destroy_internal(&framebuffers[i]);
  397. }
  398. if (framebuffer_depth.width > 0) {
  399. gpu_texture_destroy_internal(&framebuffer_depth);
  400. }
  401. window_swapchain->lpVtbl->ResizeBuffers(window_swapchain, GPU_FRAMEBUFFER_COUNT, iron_window_width(), iron_window_height(), DXGI_FORMAT_R8G8B8A8_UNORM,
  402. 0);
  403. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  404. gpu_render_target_init2(&framebuffers[i], iron_window_width(), iron_window_height(), GPU_TEXTURE_FORMAT_RGBA32, i);
  405. }
  406. if (framebuffer_depth.width > 0) {
  407. gpu_render_target_init2(&framebuffer_depth, iron_window_width(), iron_window_height(), GPU_TEXTURE_FORMAT_D32, -1);
  408. }
  409. resized = false;
  410. }
  411. while (resources_to_destroy_count > 0) {
  412. resources_to_destroy_count--;
  413. ID3D12Resource *r = resources_to_destroy[resources_to_destroy_count];
  414. r->lpVtbl->Release(r);
  415. }
  416. }
  417. void gpu_resize_internal(int width, int height) {
  418. if (fence_value == 0) {
  419. return;
  420. }
  421. resized = true;
  422. }
  423. bool gpu_raytrace_supported() {
  424. D3D12_FEATURE_DATA_D3D12_OPTIONS5 options;
  425. if (device->lpVtbl->CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS5, &options, sizeof(options)) == S_OK) {
  426. return options.RaytracingTier >= D3D12_RAYTRACING_TIER_1_0;
  427. }
  428. return false;
  429. }
  430. void gpu_set_constant_buffer(gpu_buffer_t *buffer, int offset, size_t size) {
  431. command_list->lpVtbl->SetGraphicsRootConstantBufferView(command_list, 1, buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(buffer->impl.buffer) + offset);
  432. }
  433. void gpu_internal_set_textures() {
  434. UINT srv_step = device->lpVtbl->GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  435. if (srv_heap_index + GPU_MAX_TEXTURES > GPU_CONSTANT_BUFFER_MULTIPLE) {
  436. srv_heap_index = 0;
  437. }
  438. D3D12_CPU_DESCRIPTOR_HANDLE cpu_base;
  439. D3D12_GPU_DESCRIPTOR_HANDLE gpu_base;
  440. srv_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(srv_heap, &cpu_base);
  441. srv_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(srv_heap, &gpu_base);
  442. cpu_base.ptr += srv_heap_index * srv_step;
  443. gpu_base.ptr += srv_heap_index * srv_step;
  444. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  445. if (current_textures[i] != NULL) {
  446. D3D12_CPU_DESCRIPTOR_HANDLE source_cpu;
  447. ID3D12DescriptorHeap *source_heap = current_textures[i]->impl.srv_descriptor_heap;
  448. source_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(source_heap, &source_cpu);
  449. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpu_base, source_cpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  450. cpu_base.ptr += srv_step;
  451. srv_heap_index++;
  452. }
  453. }
  454. ID3D12DescriptorHeap *heaps[] = {srv_heap, sampler_heap};
  455. command_list->lpVtbl->SetDescriptorHeaps(command_list, 2, heaps);
  456. command_list->lpVtbl->SetGraphicsRootDescriptorTable(command_list, 0, gpu_base);
  457. D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_base;
  458. sampler_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(sampler_heap, &sampler_gpu_base);
  459. command_list->lpVtbl->SetGraphicsRootDescriptorTable(command_list, 2, sampler_gpu_base);
  460. }
  461. void gpu_draw_internal() {
  462. gpu_internal_set_textures();
  463. command_list->lpVtbl->IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
  464. command_list->lpVtbl->DrawIndexedInstanced(command_list, current_ib->count, 1, 0, 0, 0);
  465. }
  466. void gpu_viewport(int x, int y, int width, int height) {
  467. current_viewport = (D3D12_VIEWPORT){
  468. .TopLeftX = (float)x,
  469. .TopLeftY = (float)y,
  470. .Width = (float)width,
  471. .Height = (float)height,
  472. .MinDepth = 0.0f,
  473. .MaxDepth = 1.0f,
  474. };
  475. command_list->lpVtbl->RSSetViewports(command_list, 1, &current_viewport);
  476. }
  477. void gpu_scissor(int x, int y, int width, int height) {
  478. current_scissor = (D3D12_RECT){
  479. .left = x,
  480. .top = y,
  481. .right = x + width,
  482. .bottom = y + height,
  483. };
  484. command_list->lpVtbl->RSSetScissorRects(command_list, 1, &current_scissor);
  485. }
  486. void gpu_disable_scissor() {
  487. current_scissor = (D3D12_RECT){
  488. .left = 0,
  489. .top = 0,
  490. .right = current_render_targets[0]->width,
  491. .bottom = current_render_targets[0]->height,
  492. };
  493. command_list->lpVtbl->RSSetScissorRects(command_list, 1, &current_scissor);
  494. }
  495. void gpu_set_pipeline_internal(gpu_pipeline_t *pipeline) {
  496. command_list->lpVtbl->SetPipelineState(command_list, pipeline->impl.pipeline);
  497. command_list->lpVtbl->SetGraphicsRootSignature(command_list, root_signature);
  498. }
  499. void gpu_set_vertex_buffer(gpu_buffer_t *buffer) {
  500. current_vb = buffer;
  501. command_list->lpVtbl->IASetVertexBuffers(command_list, 0, 1, (D3D12_VERTEX_BUFFER_VIEW *)&buffer->impl.vertex_buffer_view);
  502. }
  503. void gpu_set_index_buffer(gpu_buffer_t *buffer) {
  504. current_ib = buffer;
  505. command_list->lpVtbl->IASetIndexBuffer(command_list, (D3D12_INDEX_BUFFER_VIEW *)&buffer->impl.index_buffer_view);
  506. }
  507. void gpu_get_render_target_pixels(gpu_texture_t *render_target, uint8_t *data) {
  508. D3D12_RESOURCE_DESC desc;
  509. render_target->impl.image->lpVtbl->GetDesc(render_target->impl.image, &desc);
  510. DXGI_FORMAT dxgi_format = desc.Format;
  511. int format_size = gpu_texture_format_size(render_target->format);
  512. int packed_row_size = render_target->width * format_size;
  513. int row_pitch = packed_row_size;
  514. int align = row_pitch % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
  515. if (align != 0) {
  516. row_pitch = row_pitch + (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - align);
  517. }
  518. int new_readback_buffer_size = row_pitch * render_target->height;
  519. if (new_readback_buffer_size < (2048 * 2048 * 4)) {
  520. new_readback_buffer_size = (2048 * 2048 * 4);
  521. }
  522. if (readback_buffer_size < new_readback_buffer_size) {
  523. readback_buffer_size = new_readback_buffer_size;
  524. if (readback_buffer != NULL) {
  525. readback_buffer->lpVtbl->Release(readback_buffer);
  526. }
  527. D3D12_HEAP_PROPERTIES heap_properties = {
  528. .Type = D3D12_HEAP_TYPE_READBACK,
  529. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  530. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  531. .CreationNodeMask = 1,
  532. .VisibleNodeMask = 1,
  533. };
  534. D3D12_RESOURCE_DESC resource_desc = {
  535. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  536. .Alignment = 0,
  537. .Width = readback_buffer_size,
  538. .Height = 1,
  539. .DepthOrArraySize = 1,
  540. .MipLevels = 1,
  541. .Format = DXGI_FORMAT_UNKNOWN,
  542. .SampleDesc.Count = 1,
  543. .SampleDesc.Quality = 0,
  544. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  545. .Flags = D3D12_RESOURCE_FLAG_NONE,
  546. };
  547. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, NULL,
  548. &IID_ID3D12Resource, &readback_buffer);
  549. }
  550. // Copy render target to readback buffer
  551. D3D12_RESOURCE_BARRIER barrier = {
  552. .Transition.pResource = render_target->impl.image,
  553. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  554. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  555. .Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  556. .Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE,
  557. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  558. };
  559. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  560. D3D12_TEXTURE_COPY_LOCATION source = {
  561. .pResource = render_target->impl.image,
  562. .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
  563. .SubresourceIndex = 0,
  564. };
  565. D3D12_TEXTURE_COPY_LOCATION dest = {
  566. .pResource = readback_buffer,
  567. .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
  568. .PlacedFootprint.Offset = 0,
  569. .PlacedFootprint.Footprint.Format = dxgi_format,
  570. .PlacedFootprint.Footprint.Width = render_target->width,
  571. .PlacedFootprint.Footprint.Height = render_target->height,
  572. .PlacedFootprint.Footprint.Depth = 1,
  573. .PlacedFootprint.Footprint.RowPitch = row_pitch,
  574. };
  575. command_list->lpVtbl->CopyTextureRegion(command_list, &dest, 0, 0, 0, &source, NULL);
  576. barrier = (D3D12_RESOURCE_BARRIER){
  577. .Transition.pResource = render_target->impl.image,
  578. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  579. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  580. .Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE,
  581. .Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  582. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  583. };
  584. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  585. gpu_execute_and_wait();
  586. void *p;
  587. readback_buffer->lpVtbl->Map(readback_buffer, 0, NULL, &p);
  588. if (packed_row_size == row_pitch) {
  589. memcpy(data, p, render_target->width * render_target->height * format_size);
  590. }
  591. else {
  592. uint8_t *src = (uint8_t *)p;
  593. uint8_t *dst = data;
  594. for (int y = 0; y < render_target->height; y++) {
  595. memcpy(dst, src, packed_row_size);
  596. src += row_pitch;
  597. dst += packed_row_size;
  598. }
  599. }
  600. readback_buffer->lpVtbl->Unmap(readback_buffer, 0, NULL);
  601. }
  602. void gpu_set_texture(int unit, gpu_texture_t *texture) {
  603. current_textures[unit] = texture;
  604. }
  605. void gpu_use_linear_sampling(bool b) {
  606. D3D12_SAMPLER_DESC sampler_desc = {
  607. .Filter = b ? D3D12_FILTER_MIN_MAG_MIP_LINEAR : D3D12_FILTER_MIN_MAG_MIP_POINT,
  608. .AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP,
  609. .AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP,
  610. .AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP,
  611. .MipLODBias = 0,
  612. .MaxAnisotropy = 16,
  613. .ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER,
  614. .BorderColor = {0.0f, 0.0f, 0.0f, 0.0f},
  615. .MinLOD = 0.0f,
  616. .MaxLOD = D3D12_FLOAT32_MAX,
  617. };
  618. D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle;
  619. sampler_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(sampler_heap, &sampler_handle);
  620. device->lpVtbl->CreateSampler(device, &sampler_desc, sampler_handle);
  621. }
  622. void gpu_pipeline_destroy_internal(gpu_pipeline_t *pipe) {
  623. if (pipe->impl.pipeline != NULL) {
  624. pipe->impl.pipeline->lpVtbl->Release(pipe->impl.pipeline);
  625. pipe->impl.pipeline = NULL;
  626. }
  627. }
  628. void gpu_pipeline_compile(gpu_pipeline_t *pipe) {
  629. int vertex_attribute_count = pipe->input_layout->size;
  630. D3D12_INPUT_ELEMENT_DESC *vertex_desc = (D3D12_INPUT_ELEMENT_DESC *)alloca(sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_attribute_count);
  631. ZeroMemory(vertex_desc, sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_attribute_count);
  632. for (int i = 0; i < pipe->input_layout->size; ++i) {
  633. vertex_desc[i].SemanticName = "TEXCOORD";
  634. vertex_desc[i].SemanticIndex = i;
  635. vertex_desc[i].InputSlot = 0;
  636. vertex_desc[i].AlignedByteOffset = (i == 0) ? 0 : D3D12_APPEND_ALIGNED_ELEMENT;
  637. vertex_desc[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
  638. vertex_desc[i].InstanceDataStepRate = 0;
  639. switch (pipe->input_layout->elements[i].data) {
  640. case GPU_VERTEX_DATA_F32_1X:
  641. vertex_desc[i].Format = DXGI_FORMAT_R32_FLOAT;
  642. break;
  643. case GPU_VERTEX_DATA_F32_2X:
  644. vertex_desc[i].Format = DXGI_FORMAT_R32G32_FLOAT;
  645. break;
  646. case GPU_VERTEX_DATA_F32_3X:
  647. vertex_desc[i].Format = DXGI_FORMAT_R32G32B32_FLOAT;
  648. break;
  649. case GPU_VERTEX_DATA_F32_4X:
  650. vertex_desc[i].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
  651. break;
  652. case GPU_VERTEX_DATA_I16_2X_NORM:
  653. vertex_desc[i].Format = DXGI_FORMAT_R16G16_SNORM;
  654. break;
  655. case GPU_VERTEX_DATA_I16_4X_NORM:
  656. vertex_desc[i].Format = DXGI_FORMAT_R16G16B16A16_SNORM;
  657. break;
  658. default:
  659. break;
  660. }
  661. }
  662. const D3D12_DEPTH_STENCILOP_DESC default_stencil_op = {D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_NEVER};
  663. D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {
  664. .VS.BytecodeLength = pipe->vertex_shader->impl.length,
  665. .VS.pShaderBytecode = pipe->vertex_shader->impl.data,
  666. .PS.BytecodeLength = pipe->fragment_shader->impl.length,
  667. .PS.pShaderBytecode = pipe->fragment_shader->impl.data,
  668. .pRootSignature = root_signature,
  669. .NumRenderTargets = pipe->color_attachment_count,
  670. .InputLayout.NumElements = vertex_attribute_count,
  671. .InputLayout.pInputElementDescs = vertex_desc,
  672. .RasterizerState.FillMode = D3D12_FILL_MODE_SOLID,
  673. .RasterizerState.CullMode = convert_cull_mode(pipe->cull_mode),
  674. .RasterizerState.FrontCounterClockwise = FALSE,
  675. .RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
  676. .RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
  677. .RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
  678. .RasterizerState.DepthClipEnable = TRUE,
  679. .RasterizerState.MultisampleEnable = FALSE,
  680. .RasterizerState.AntialiasedLineEnable = FALSE,
  681. .RasterizerState.ForcedSampleCount = 0,
  682. .RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF,
  683. .BlendState.AlphaToCoverageEnable = FALSE,
  684. .BlendState.IndependentBlendEnable = FALSE,
  685. .DepthStencilState.StencilEnable = FALSE,
  686. .DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK,
  687. .DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK,
  688. .DepthStencilState.DepthEnable = pipe->depth_mode != GPU_COMPARE_MODE_ALWAYS,
  689. .DepthStencilState.DepthWriteMask = pipe->depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO,
  690. .DepthStencilState.DepthFunc = convert_compare_mode(pipe->depth_mode),
  691. .DSVFormat = DXGI_FORMAT_D32_FLOAT,
  692. .SampleDesc.Count = 1,
  693. .SampleMask = 0xFFFFFFFF,
  694. .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
  695. .DepthStencilState.FrontFace = default_stencil_op,
  696. .DepthStencilState.BackFace = default_stencil_op,
  697. };
  698. for (int i = 0; i < pipe->color_attachment_count; ++i) {
  699. psoDesc.RTVFormats[i] = convert_format(pipe->color_attachment[i]);
  700. }
  701. psoDesc.BlendState.IndependentBlendEnable = true;
  702. for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
  703. psoDesc.BlendState.RenderTarget[i].BlendEnable = pipe->blend_source != GPU_BLEND_ONE || pipe->blend_destination != GPU_BLEND_ZERO ||
  704. pipe->alpha_blend_source != GPU_BLEND_ONE || pipe->alpha_blend_destination != GPU_BLEND_ZERO;
  705. psoDesc.BlendState.RenderTarget[i].SrcBlend = convert_blend_factor(pipe->blend_source);
  706. psoDesc.BlendState.RenderTarget[i].DestBlend = convert_blend_factor(pipe->blend_destination);
  707. psoDesc.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
  708. psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = convert_blend_factor(pipe->alpha_blend_source);
  709. psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = convert_blend_factor(pipe->alpha_blend_destination);
  710. psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
  711. psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask =
  712. (((pipe->color_write_mask_red[i] ? D3D12_COLOR_WRITE_ENABLE_RED : 0) | (pipe->color_write_mask_green[i] ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0)) |
  713. (pipe->color_write_mask_blue[i] ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0)) |
  714. (pipe->color_write_mask_alpha[i] ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0);
  715. }
  716. device->lpVtbl->CreateGraphicsPipelineState(device, &psoDesc, &IID_ID3D12PipelineState, &pipe->impl.pipeline);
  717. }
  718. void gpu_shader_init(gpu_shader_t *shader, const void *_data, size_t length, gpu_shader_type_t type) {
  719. uint8_t *data = (uint8_t *)_data;
  720. shader->impl.length = (int)length;
  721. shader->impl.data = (uint8_t *)malloc(shader->impl.length);
  722. memcpy(shader->impl.data, data, shader->impl.length);
  723. }
  724. void gpu_shader_destroy(gpu_shader_t *shader) {
  725. free(shader->impl.data);
  726. }
  727. void gpu_texture_init_from_bytes(gpu_texture_t *texture, void *data, int width, int height, gpu_texture_format_t format) {
  728. texture->width = width;
  729. texture->height = height;
  730. texture->format = format;
  731. texture->state = GPU_TEXTURE_STATE_SHADER_RESOURCE;
  732. texture->buffer = NULL;
  733. texture->impl.rtv_descriptor_heap = NULL;
  734. DXGI_FORMAT dxgi_format = convert_format(format);
  735. int format_size = gpu_texture_format_size(format);
  736. D3D12_HEAP_PROPERTIES heap_properties = {
  737. .Type = D3D12_HEAP_TYPE_DEFAULT,
  738. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  739. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  740. .CreationNodeMask = 1,
  741. .VisibleNodeMask = 1,
  742. };
  743. D3D12_RESOURCE_DESC resource_desc = {
  744. .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
  745. .Alignment = 0,
  746. .Width = texture->width,
  747. .Height = texture->height,
  748. .DepthOrArraySize = 1,
  749. .MipLevels = 1,
  750. .Format = dxgi_format,
  751. .SampleDesc.Count = 1,
  752. .SampleDesc.Quality = 0,
  753. .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
  754. .Flags = D3D12_RESOURCE_FLAG_NONE,
  755. };
  756. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, NULL,
  757. &IID_ID3D12Resource, &texture->impl.image);
  758. D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
  759. UINT64 upload_size;
  760. device->lpVtbl->GetCopyableFootprints(device, &resource_desc, 0, 1, 0, &footprint, NULL, NULL, &upload_size);
  761. int new_upload_buffer_size = upload_size;
  762. if (new_upload_buffer_size < (1024 * 1024 * 4)) {
  763. new_upload_buffer_size = (1024 * 1024 * 4);
  764. }
  765. if (upload_buffer_size < new_upload_buffer_size) {
  766. upload_buffer_size = new_upload_buffer_size;
  767. if (upload_buffer != NULL) {
  768. upload_buffer->lpVtbl->Release(upload_buffer);
  769. }
  770. D3D12_HEAP_PROPERTIES heap_properties_upload = {
  771. .Type = D3D12_HEAP_TYPE_UPLOAD,
  772. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  773. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  774. .CreationNodeMask = 1,
  775. .VisibleNodeMask = 1,
  776. };
  777. D3D12_RESOURCE_DESC resource_desc_upload = {
  778. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  779. .Alignment = 0,
  780. .Width = upload_buffer_size,
  781. .Height = 1,
  782. .DepthOrArraySize = 1,
  783. .MipLevels = 1,
  784. .Format = DXGI_FORMAT_UNKNOWN,
  785. .SampleDesc.Count = 1,
  786. .SampleDesc.Quality = 0,
  787. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  788. .Flags = D3D12_RESOURCE_FLAG_NONE,
  789. };
  790. device->lpVtbl->CreateCommittedResource(device, &heap_properties_upload, D3D12_HEAP_FLAG_NONE, &resource_desc_upload, D3D12_RESOURCE_STATE_GENERIC_READ,
  791. NULL, &IID_ID3D12Resource, &upload_buffer);
  792. }
  793. BYTE *pixel;
  794. upload_buffer->lpVtbl->Map(upload_buffer, 0, NULL, (void **)&pixel);
  795. UINT row_pitch = footprint.Footprint.RowPitch;
  796. for (int y = 0; y < texture->height; ++y) {
  797. memcpy(pixel + y * row_pitch, ((uint8_t *)data) + y * width * format_size, width * format_size);
  798. }
  799. upload_buffer->lpVtbl->Unmap(upload_buffer, 0, NULL);
  800. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  801. .NumDescriptors = 1,
  802. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  803. .NodeMask = 0,
  804. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  805. };
  806. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &texture->impl.srv_descriptor_heap);
  807. D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
  808. .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
  809. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  810. .Format = dxgi_format,
  811. .Texture2D.MipLevels = 1,
  812. .Texture2D.MostDetailedMip = 0,
  813. .Texture2D.ResourceMinLODClamp = 0.0f,
  814. };
  815. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  816. texture->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(texture->impl.srv_descriptor_heap, &handle);
  817. device->lpVtbl->CreateShaderResourceView(device, texture->impl.image, &srv_desc, handle);
  818. D3D12_RESOURCE_BARRIER barrier = {
  819. .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
  820. .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
  821. .Transition.pResource = texture->impl.image,
  822. .Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  823. .Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST,
  824. .Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
  825. };
  826. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  827. D3D12_TEXTURE_COPY_LOCATION source = {
  828. .pResource = upload_buffer,
  829. .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
  830. .PlacedFootprint = footprint,
  831. };
  832. D3D12_TEXTURE_COPY_LOCATION destination = {
  833. .pResource = texture->impl.image,
  834. .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
  835. .SubresourceIndex = 0,
  836. };
  837. command_list->lpVtbl->CopyTextureRegion(command_list, &destination, 0, 0, 0, &source, NULL);
  838. barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
  839. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  840. gpu_execute_and_wait(); ////
  841. }
  842. void gpu_texture_destroy_internal(gpu_texture_t *render_target) {
  843. if (render_target->impl.image != NULL) {
  844. render_target->impl.image->lpVtbl->Release(render_target->impl.image);
  845. }
  846. if (render_target->impl.rtv_descriptor_heap != NULL) {
  847. render_target->impl.rtv_descriptor_heap->lpVtbl->Release(render_target->impl.rtv_descriptor_heap);
  848. }
  849. if (render_target->impl.srv_descriptor_heap != NULL) {
  850. render_target->impl.srv_descriptor_heap->lpVtbl->Release(render_target->impl.srv_descriptor_heap);
  851. }
  852. }
  853. void gpu_render_target_init(gpu_texture_t *target, int width, int height, gpu_texture_format_t format) {
  854. gpu_render_target_init2(target, width, height, format, -1);
  855. }
  856. void _gpu_buffer_init(ID3D12Resource **buffer, int size, D3D12_HEAP_TYPE heap_type) {
  857. if (*buffer != NULL) {
  858. assert(resources_to_destroy_count < 512);
  859. resources_to_destroy[resources_to_destroy_count] = *buffer;
  860. resources_to_destroy_count++;
  861. }
  862. D3D12_HEAP_PROPERTIES heap_properties = {
  863. .Type = heap_type,
  864. .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
  865. .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
  866. .CreationNodeMask = 1,
  867. .VisibleNodeMask = 1,
  868. };
  869. D3D12_RESOURCE_DESC resource_desc = {
  870. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  871. .Alignment = 0,
  872. .Width = size,
  873. .Height = 1,
  874. .DepthOrArraySize = 1,
  875. .MipLevels = 1,
  876. .Format = DXGI_FORMAT_UNKNOWN,
  877. .SampleDesc.Count = 1,
  878. .SampleDesc.Quality = 0,
  879. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  880. .Flags = D3D12_RESOURCE_FLAG_NONE,
  881. };
  882. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
  883. heap_type == D3D12_HEAP_TYPE_UPLOAD ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COMMON, NULL,
  884. &IID_ID3D12Resource, buffer);
  885. }
  886. void gpu_vertex_buffer_init(gpu_buffer_t *buffer, int count, gpu_vertex_structure_t *structure) {
  887. buffer->count = count;
  888. buffer->stride = 0;
  889. for (int i = 0; i < structure->size; ++i) {
  890. buffer->stride += gpu_vertex_data_size(structure->elements[i].data);
  891. }
  892. buffer->impl.vertex_buffer_view.SizeInBytes = buffer->stride * buffer->count;
  893. buffer->impl.vertex_buffer_view.StrideInBytes = buffer->stride;
  894. buffer->impl.buffer = NULL;
  895. }
  896. void *gpu_vertex_buffer_lock(gpu_buffer_t *buffer) {
  897. _gpu_buffer_init(&buffer->impl.buffer, buffer->stride * buffer->count, D3D12_HEAP_TYPE_UPLOAD);
  898. D3D12_RANGE range = {
  899. .Begin = 0,
  900. .End = buffer->count * buffer->stride,
  901. };
  902. void *p;
  903. buffer->impl.buffer->lpVtbl->Map(buffer->impl.buffer, 0, &range, &p);
  904. return p;
  905. }
  906. void gpu_vertex_buffer_unlock(gpu_buffer_t *buffer) {
  907. D3D12_RANGE range = {
  908. .Begin = 0,
  909. .End = buffer->count * buffer->stride,
  910. };
  911. buffer->impl.buffer->lpVtbl->Unmap(buffer->impl.buffer, 0, &range);
  912. ID3D12Resource *upload_buffer = buffer->impl.buffer;
  913. _gpu_buffer_init(&buffer->impl.buffer, buffer->stride * buffer->count, D3D12_HEAP_TYPE_DEFAULT);
  914. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
  915. command_list->lpVtbl->CopyBufferRegion(command_list, buffer->impl.buffer, 0, upload_buffer, 0, buffer->stride * buffer->count);
  916. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
  917. buffer->impl.vertex_buffer_view.BufferLocation = buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(buffer->impl.buffer);
  918. }
  919. void gpu_index_buffer_init(gpu_buffer_t *buffer, int count) {
  920. buffer->count = count;
  921. buffer->impl.index_buffer_view.SizeInBytes = count * 4;
  922. buffer->impl.index_buffer_view.Format = DXGI_FORMAT_R32_UINT;
  923. buffer->impl.buffer = NULL;
  924. }
  925. void *gpu_index_buffer_lock(gpu_buffer_t *buffer) {
  926. _gpu_buffer_init(&buffer->impl.buffer, buffer->count * 4, D3D12_HEAP_TYPE_UPLOAD);
  927. D3D12_RANGE range = {
  928. .Begin = 0,
  929. .End = buffer->count * 4,
  930. };
  931. void *p;
  932. buffer->impl.buffer->lpVtbl->Map(buffer->impl.buffer, 0, &range, &p);
  933. return p;
  934. }
  935. void gpu_index_buffer_unlock(gpu_buffer_t *buffer) {
  936. D3D12_RANGE range = {
  937. .Begin = 0,
  938. .End = buffer->count * 4,
  939. };
  940. buffer->impl.buffer->lpVtbl->Unmap(buffer->impl.buffer, 0, &range);
  941. ID3D12Resource *upload_buffer = buffer->impl.buffer;
  942. _gpu_buffer_init(&buffer->impl.buffer, buffer->count * 4, D3D12_HEAP_TYPE_DEFAULT);
  943. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
  944. command_list->lpVtbl->CopyBufferRegion(command_list, buffer->impl.buffer, 0, upload_buffer, 0, buffer->count * 4);
  945. _gpu_barrier(buffer->impl.buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_INDEX_BUFFER);
  946. buffer->impl.index_buffer_view.BufferLocation = buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(buffer->impl.buffer);
  947. }
  948. void gpu_constant_buffer_init(gpu_buffer_t *buffer, int size) {
  949. buffer->count = size;
  950. buffer->data = NULL;
  951. buffer->impl.buffer = NULL;
  952. _gpu_buffer_init(&buffer->impl.buffer, size, D3D12_HEAP_TYPE_UPLOAD);
  953. }
  954. void gpu_constant_buffer_lock(gpu_buffer_t *buffer, int start, int count) {
  955. buffer->impl.last_start = start;
  956. buffer->impl.last_count = count;
  957. D3D12_RANGE range = {
  958. .Begin = start,
  959. .End = start + count,
  960. };
  961. uint8_t *p;
  962. buffer->impl.buffer->lpVtbl->Map(buffer->impl.buffer, 0, &range, (void **)&p);
  963. buffer->data = &p[start];
  964. }
  965. void gpu_constant_buffer_unlock(gpu_buffer_t *buffer) {
  966. D3D12_RANGE range = {
  967. .Begin = buffer->impl.last_start,
  968. .End = buffer->impl.last_start + buffer->impl.last_count,
  969. };
  970. buffer->impl.buffer->lpVtbl->Unmap(buffer->impl.buffer, 0, &range);
  971. buffer->data = NULL;
  972. }
  973. void gpu_buffer_destroy_internal(gpu_buffer_t *buffer) {
  974. buffer->impl.buffer->lpVtbl->Release(buffer->impl.buffer);
  975. buffer->impl.buffer = NULL;
  976. }
  977. char *gpu_device_name() {
  978. IDXGIFactory *factory;
  979. CreateDXGIFactory(&IID_IDXGIFactory, (void **)&factory);
  980. IDXGIAdapter *adapter;
  981. factory->lpVtbl->EnumAdapters(factory, 0, &adapter);
  982. DXGI_ADAPTER_DESC desc;
  983. adapter->lpVtbl->GetDesc(adapter, &desc);
  984. WideCharToMultiByte(CP_UTF8, 0, desc.Description, -1, device_name, sizeof(device_name), NULL, NULL);
  985. adapter->lpVtbl->Release(adapter);
  986. factory->lpVtbl->Release(factory);
  987. return device_name;
  988. }
  989. typedef struct inst {
  990. iron_matrix4x4_t m;
  991. int i;
  992. } inst_t;
  993. static ID3D12Device5 *dxr_device = NULL;
  994. static ID3D12GraphicsCommandList4 *dxr_command_list = NULL;
  995. static ID3D12RootSignature *dxr_root_signature = NULL;
  996. static ID3D12DescriptorHeap *dxr_descriptor_heap = NULL;
  997. static gpu_raytrace_acceleration_structure_t *dxr_accel;
  998. static gpu_raytrace_pipeline_t *dxr_pipeline;
  999. static gpu_texture_t *dxr_output = NULL;
  1000. static D3D12_CPU_DESCRIPTOR_HANDLE dxr_output_cpu_descriptor;
  1001. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_output_descriptor_handle;
  1002. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_vbgpu_descriptor_handle;
  1003. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_ibgpu_descriptor_handle;
  1004. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_tex0gpu_descriptor_handle;
  1005. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_tex1gpu_descriptor_handle;
  1006. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_tex2gpu_descriptor_handle;
  1007. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_texenvgpu_descriptor_handle;
  1008. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_texsobolgpu_descriptor_handle;
  1009. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_texscramblegpu_descriptor_handle;
  1010. static D3D12_GPU_DESCRIPTOR_HANDLE dxr_texrankgpu_descriptor_handle;
  1011. static int dxr_descriptors_allocated = 0;
  1012. static UINT dxr_descriptor_size;
  1013. static gpu_buffer_t *dxr_vb[16];
  1014. static gpu_buffer_t *dxr_vb_last[16];
  1015. static gpu_buffer_t *dxr_ib[16];
  1016. static int dxr_vb_count = 0;
  1017. static int dxr_vb_count_last = 0;
  1018. static inst_t dxr_instances[1024];
  1019. static int dxr_instances_count = 0;
  1020. void gpu_raytrace_pipeline_init(gpu_raytrace_pipeline_t *pipeline, void *ray_shader, int ray_shader_size, gpu_buffer_t *constant_buffer) {
  1021. dxr_output = NULL;
  1022. dxr_descriptors_allocated = 0;
  1023. pipeline->constant_buffer = constant_buffer;
  1024. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  1025. .NumDescriptors = 12,
  1026. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  1027. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
  1028. .NodeMask = 0,
  1029. };
  1030. if (dxr_descriptor_heap != NULL) {
  1031. dxr_descriptor_heap->lpVtbl->Release(dxr_descriptor_heap);
  1032. }
  1033. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &dxr_descriptor_heap);
  1034. dxr_descriptor_size = device->lpVtbl->GetDescriptorHandleIncrementSize(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1035. if (dxr_device != NULL) {
  1036. dxr_device->lpVtbl->Release(dxr_device);
  1037. }
  1038. if (dxr_command_list != NULL) {
  1039. dxr_command_list->lpVtbl->Release(dxr_command_list);
  1040. }
  1041. device->lpVtbl->QueryInterface(device, &IID_ID3D12Device5, &dxr_device);
  1042. command_list->lpVtbl->QueryInterface(command_list, &IID_ID3D12GraphicsCommandList4, &dxr_command_list);
  1043. // Root signatures
  1044. D3D12_DESCRIPTOR_RANGE ranges[] = {{D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1045. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 1, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1046. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 2, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1047. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 3, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1048. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 4, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1049. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 5, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1050. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 6, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1051. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 7, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1052. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 8, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND},
  1053. {D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 9, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND}};
  1054. D3D12_ROOT_PARAMETER root_parameters[12] = {{D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[0]}}, D3D12_SHADER_VISIBILITY_ALL},
  1055. {D3D12_ROOT_PARAMETER_TYPE_SRV, {{0}}, D3D12_SHADER_VISIBILITY_ALL},
  1056. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[1]}}, D3D12_SHADER_VISIBILITY_ALL},
  1057. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[2]}}, D3D12_SHADER_VISIBILITY_ALL},
  1058. {D3D12_ROOT_PARAMETER_TYPE_CBV, {{0}}, D3D12_SHADER_VISIBILITY_ALL},
  1059. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[3]}}, D3D12_SHADER_VISIBILITY_ALL},
  1060. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[4]}}, D3D12_SHADER_VISIBILITY_ALL},
  1061. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[5]}}, D3D12_SHADER_VISIBILITY_ALL},
  1062. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[6]}}, D3D12_SHADER_VISIBILITY_ALL},
  1063. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[7]}}, D3D12_SHADER_VISIBILITY_ALL},
  1064. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[8]}}, D3D12_SHADER_VISIBILITY_ALL},
  1065. {D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, {{1, &ranges[9]}}, D3D12_SHADER_VISIBILITY_ALL}};
  1066. D3D12_STATIC_SAMPLER_DESC linear_sampler = {
  1067. .Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR,
  1068. .AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP,
  1069. .AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP,
  1070. .AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP,
  1071. .MipLODBias = 0,
  1072. .MaxAnisotropy = 16,
  1073. .ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER,
  1074. .BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK,
  1075. .MinLOD = 0.0f,
  1076. .MaxLOD = D3D12_FLOAT32_MAX,
  1077. .ShaderRegister = 0,
  1078. .RegisterSpace = 0,
  1079. .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
  1080. };
  1081. D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {
  1082. .NumParameters = ARRAYSIZE(root_parameters),
  1083. .pParameters = root_parameters,
  1084. .NumStaticSamplers = 1,
  1085. .pStaticSamplers = &linear_sampler,
  1086. };
  1087. ID3DBlob *blob = NULL;
  1088. ID3DBlob *error = NULL;
  1089. D3D12SerializeRootSignature(&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, &error);
  1090. if (dxr_root_signature != NULL) {
  1091. dxr_root_signature->lpVtbl->Release(dxr_root_signature);
  1092. }
  1093. device->lpVtbl->CreateRootSignature(device, 1, blob->lpVtbl->GetBufferPointer(blob), blob->lpVtbl->GetBufferSize(blob), &IID_ID3D12RootSignature,
  1094. &dxr_root_signature);
  1095. // Pipeline
  1096. D3D12_STATE_OBJECT_DESC raytracingPipeline = {
  1097. .Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE,
  1098. };
  1099. D3D12_SHADER_BYTECODE shaderBytecode = {
  1100. .pShaderBytecode = ray_shader,
  1101. .BytecodeLength = ray_shader_size,
  1102. };
  1103. D3D12_DXIL_LIBRARY_DESC dxilLibrary = {
  1104. .DXILLibrary = shaderBytecode,
  1105. };
  1106. D3D12_EXPORT_DESC exports[3] = {0};
  1107. exports[0].Name = L"raygeneration";
  1108. exports[1].Name = L"closesthit";
  1109. exports[2].Name = L"miss";
  1110. dxilLibrary.pExports = exports;
  1111. dxilLibrary.NumExports = 3;
  1112. D3D12_HIT_GROUP_DESC hitGroup = {
  1113. .ClosestHitShaderImport = L"closesthit",
  1114. .HitGroupExport = L"hitgroup",
  1115. .Type = D3D12_HIT_GROUP_TYPE_TRIANGLES,
  1116. };
  1117. D3D12_RAYTRACING_SHADER_CONFIG shaderConfig = {
  1118. .MaxPayloadSizeInBytes = 10 * sizeof(float), // float4 color, float3 ray_origin, float3 ray_dir
  1119. .MaxAttributeSizeInBytes = 2 * sizeof(float), // float2 barycentrics
  1120. };
  1121. D3D12_RAYTRACING_PIPELINE_CONFIG pipelineConfig = {
  1122. .MaxTraceRecursionDepth = 1, // ~ primary rays only
  1123. };
  1124. D3D12_STATE_SUBOBJECT subobjects[5] = {{D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY, &dxilLibrary},
  1125. {D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP, &hitGroup},
  1126. {D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG, &shaderConfig},
  1127. {D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE, &dxr_root_signature},
  1128. {D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG, &pipelineConfig}};
  1129. raytracingPipeline.NumSubobjects = 5;
  1130. raytracingPipeline.pSubobjects = subobjects;
  1131. dxr_device->lpVtbl->CreateStateObject(dxr_device, &raytracingPipeline, &IID_ID3D12StateObject, &pipeline->impl.state);
  1132. // Shader tables
  1133. ID3D12StateObjectProperties *stateObjectProps = NULL;
  1134. pipeline->impl.state->lpVtbl->QueryInterface(pipeline->impl.state, &IID_ID3D12StateObjectProperties, &stateObjectProps);
  1135. const void *rayGenShaderId = stateObjectProps->lpVtbl->GetShaderIdentifier(stateObjectProps, L"raygeneration");
  1136. const void *missShaderId = stateObjectProps->lpVtbl->GetShaderIdentifier(stateObjectProps, L"miss");
  1137. const void *hitGroupShaderId = stateObjectProps->lpVtbl->GetShaderIdentifier(stateObjectProps, L"hitgroup");
  1138. UINT shaderIdSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES;
  1139. int align = D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT;
  1140. // Ray gen shader table
  1141. {
  1142. UINT size = shaderIdSize + constant_buffer->count;
  1143. UINT shaderRecordSize = (size + (align - 1)) & ~(align - 1);
  1144. D3D12_RESOURCE_DESC bufferDesc = {
  1145. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1146. .Width = shaderRecordSize,
  1147. .Height = 1,
  1148. .DepthOrArraySize = 1,
  1149. .MipLevels = 1,
  1150. .Format = DXGI_FORMAT_UNKNOWN,
  1151. .SampleDesc.Count = 1,
  1152. .SampleDesc.Quality = 0,
  1153. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1154. };
  1155. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1156. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1157. .CreationNodeMask = 1,
  1158. .VisibleNodeMask = 1,
  1159. };
  1160. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1161. &IID_ID3D12Resource, &pipeline->impl.raygen_shader_table);
  1162. D3D12_RANGE rstRange = {
  1163. .Begin = 0,
  1164. .End = 0,
  1165. };
  1166. uint8_t *byteDest;
  1167. pipeline->impl.raygen_shader_table->lpVtbl->Map(pipeline->impl.raygen_shader_table, 0, &rstRange, (void **)(&byteDest));
  1168. D3D12_RANGE cbRange = {
  1169. .Begin = 0,
  1170. .End = constant_buffer->count,
  1171. };
  1172. void *constantBufferData;
  1173. constant_buffer->impl.buffer->lpVtbl->Map(constant_buffer->impl.buffer, 0, &cbRange, (void **)&constantBufferData);
  1174. memcpy(byteDest, rayGenShaderId, size);
  1175. memcpy(byteDest + size, constantBufferData, constant_buffer->count);
  1176. pipeline->impl.raygen_shader_table->lpVtbl->Unmap(pipeline->impl.raygen_shader_table, 0, NULL);
  1177. }
  1178. // Miss shader table
  1179. {
  1180. UINT size = shaderIdSize;
  1181. UINT shaderRecordSize = (size + (align - 1)) & ~(align - 1);
  1182. D3D12_RESOURCE_DESC bufferDesc = {
  1183. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1184. .Width = shaderRecordSize,
  1185. .Height = 1,
  1186. .DepthOrArraySize = 1,
  1187. .MipLevels = 1,
  1188. .Format = DXGI_FORMAT_UNKNOWN,
  1189. .SampleDesc.Count = 1,
  1190. .SampleDesc.Quality = 0,
  1191. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1192. };
  1193. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1194. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1195. .CreationNodeMask = 1,
  1196. .VisibleNodeMask = 1,
  1197. };
  1198. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1199. &IID_ID3D12Resource, &pipeline->impl.miss_shader_table);
  1200. D3D12_RANGE mstRange = {
  1201. .Begin = 0,
  1202. .End = 0,
  1203. };
  1204. uint8_t *byteDest;
  1205. pipeline->impl.miss_shader_table->lpVtbl->Map(pipeline->impl.miss_shader_table, 0, &mstRange, (void **)(&byteDest));
  1206. memcpy(byteDest, missShaderId, size);
  1207. pipeline->impl.miss_shader_table->lpVtbl->Unmap(pipeline->impl.miss_shader_table, 0, NULL);
  1208. }
  1209. // Hit group shader table
  1210. {
  1211. UINT size = shaderIdSize;
  1212. UINT shaderRecordSize = (size + (align - 1)) & ~(align - 1);
  1213. D3D12_RESOURCE_DESC bufferDesc = {
  1214. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1215. .Width = shaderRecordSize,
  1216. .Height = 1,
  1217. .DepthOrArraySize = 1,
  1218. .MipLevels = 1,
  1219. .Format = DXGI_FORMAT_UNKNOWN,
  1220. .SampleDesc.Count = 1,
  1221. .SampleDesc.Quality = 0,
  1222. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1223. };
  1224. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1225. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1226. .CreationNodeMask = 1,
  1227. .VisibleNodeMask = 1,
  1228. };
  1229. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1230. &IID_ID3D12Resource, &pipeline->impl.hitgroup_shader_table);
  1231. D3D12_RANGE hstRange = {
  1232. .Begin = 0,
  1233. .End = 0,
  1234. };
  1235. uint8_t *byteDest;
  1236. pipeline->impl.hitgroup_shader_table->lpVtbl->Map(pipeline->impl.hitgroup_shader_table, 0, &hstRange, (void **)(&byteDest));
  1237. memcpy(byteDest, hitGroupShaderId, size);
  1238. pipeline->impl.hitgroup_shader_table->lpVtbl->Unmap(pipeline->impl.hitgroup_shader_table, 0, NULL);
  1239. }
  1240. // Output descriptor
  1241. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1242. dxr_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1243. dxr_output_cpu_descriptor.ptr = handle.ptr + (INT64)(dxr_descriptors_allocated) * (UINT64)(dxr_descriptor_size);
  1244. dxr_descriptor_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1245. int descriptorHeapIndex = dxr_descriptors_allocated++;
  1246. dxr_output_descriptor_handle.ptr = handle.ptr + (INT64)(descriptorHeapIndex) * (UINT64)(dxr_descriptor_size);
  1247. }
  1248. void gpu_raytrace_pipeline_destroy(gpu_raytrace_pipeline_t *pipeline) {
  1249. pipeline->impl.state->lpVtbl->Release(pipeline->impl.state);
  1250. pipeline->impl.raygen_shader_table->lpVtbl->Release(pipeline->impl.raygen_shader_table);
  1251. pipeline->impl.miss_shader_table->lpVtbl->Release(pipeline->impl.miss_shader_table);
  1252. pipeline->impl.hitgroup_shader_table->lpVtbl->Release(pipeline->impl.hitgroup_shader_table);
  1253. }
  1254. UINT create_srv_vb(gpu_buffer_t *vb, UINT numElements, UINT elementSize) {
  1255. D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {
  1256. .ViewDimension = D3D12_SRV_DIMENSION_BUFFER,
  1257. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  1258. .Buffer.NumElements = numElements,
  1259. };
  1260. if (elementSize == 0) {
  1261. srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
  1262. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
  1263. srvDesc.Buffer.StructureByteStride = 0;
  1264. }
  1265. else {
  1266. srvDesc.Format = DXGI_FORMAT_UNKNOWN;
  1267. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
  1268. srvDesc.Buffer.StructureByteStride = elementSize;
  1269. }
  1270. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1271. dxr_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1272. D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = {
  1273. .ptr = handle.ptr + (INT64)(dxr_descriptors_allocated) * (UINT64)(dxr_descriptor_size),
  1274. };
  1275. UINT descriptorIndex = dxr_descriptors_allocated++;
  1276. device->lpVtbl->CreateShaderResourceView(device, vb->impl.buffer, &srvDesc, cpuDescriptor);
  1277. dxr_descriptor_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1278. dxr_vbgpu_descriptor_handle.ptr = handle.ptr + (INT64)(descriptorIndex) * (UINT64)(dxr_descriptor_size);
  1279. return descriptorIndex;
  1280. }
  1281. UINT create_srv_ib(gpu_buffer_t *ib, UINT numElements, UINT elementSize) {
  1282. D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {
  1283. .ViewDimension = D3D12_SRV_DIMENSION_BUFFER,
  1284. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  1285. .Buffer.NumElements = numElements,
  1286. };
  1287. if (elementSize == 0) {
  1288. srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
  1289. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
  1290. srvDesc.Buffer.StructureByteStride = 0;
  1291. }
  1292. else {
  1293. srvDesc.Format = DXGI_FORMAT_UNKNOWN;
  1294. srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
  1295. srvDesc.Buffer.StructureByteStride = elementSize;
  1296. }
  1297. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1298. dxr_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1299. D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = {
  1300. .ptr = handle.ptr + (INT64)(dxr_descriptors_allocated) * (UINT64)(dxr_descriptor_size),
  1301. };
  1302. UINT descriptorIndex = dxr_descriptors_allocated++;
  1303. dxr_descriptor_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1304. device->lpVtbl->CreateShaderResourceView(device, ib->impl.buffer, &srvDesc, cpuDescriptor);
  1305. dxr_ibgpu_descriptor_handle.ptr = handle.ptr + (INT64)(descriptorIndex) * (UINT64)(dxr_descriptor_size);
  1306. return descriptorIndex;
  1307. }
  1308. void gpu_raytrace_acceleration_structure_init(gpu_raytrace_acceleration_structure_t *accel) {
  1309. dxr_vb_count = 0;
  1310. dxr_instances_count = 0;
  1311. }
  1312. void gpu_raytrace_acceleration_structure_add(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *vb, gpu_buffer_t *ib, iron_matrix4x4_t transform) {
  1313. int vb_i = -1;
  1314. for (int i = 0; i < dxr_vb_count; ++i) {
  1315. if (vb == dxr_vb[i]) {
  1316. vb_i = i;
  1317. break;
  1318. }
  1319. }
  1320. if (vb_i == -1) {
  1321. vb_i = dxr_vb_count;
  1322. dxr_vb[dxr_vb_count] = vb;
  1323. dxr_ib[dxr_vb_count] = ib;
  1324. dxr_vb_count++;
  1325. }
  1326. inst_t inst = {.i = vb_i, .m = transform};
  1327. dxr_instances[dxr_instances_count] = inst;
  1328. dxr_instances_count++;
  1329. }
  1330. void _gpu_raytrace_acceleration_structure_destroy_bottom(gpu_raytrace_acceleration_structure_t *accel) {
  1331. for (int i = 0; i < dxr_vb_count_last; ++i) {
  1332. accel->impl.bottom_level_accel[i]->lpVtbl->Release(accel->impl.bottom_level_accel[i]);
  1333. }
  1334. }
  1335. void _gpu_raytrace_acceleration_structure_destroy_top(gpu_raytrace_acceleration_structure_t *accel) {
  1336. accel->impl.top_level_accel->lpVtbl->Release(accel->impl.top_level_accel);
  1337. }
  1338. void gpu_raytrace_acceleration_structure_build(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *vb_full, gpu_buffer_t *ib_full) {
  1339. bool build_bottom = false;
  1340. for (int i = 0; i < 16; ++i) {
  1341. if (dxr_vb_last[i] != dxr_vb[i]) {
  1342. build_bottom = true;
  1343. }
  1344. dxr_vb_last[i] = dxr_vb[i];
  1345. }
  1346. if (dxr_vb_count_last > 0) {
  1347. if (build_bottom) {
  1348. _gpu_raytrace_acceleration_structure_destroy_bottom(accel);
  1349. }
  1350. _gpu_raytrace_acceleration_structure_destroy_top(accel);
  1351. }
  1352. dxr_vb_count_last = dxr_vb_count;
  1353. if (dxr_vb_count == 0) {
  1354. return;
  1355. }
  1356. dxr_descriptors_allocated = 1; // 1 descriptor already allocated in gpu_raytrace_pipeline_init
  1357. #ifdef is_forge
  1358. create_srv_ib(ib_full, ib_full->count, 0);
  1359. create_srv_vb(vb_full, vb_full->count, dxr_vb[0]->stride);
  1360. #else
  1361. create_srv_ib(dxr_ib[0], dxr_ib[0]->count, 0);
  1362. create_srv_vb(dxr_vb[0], dxr_vb[0]->count, dxr_vb[0]->stride);
  1363. #endif
  1364. command_list->lpVtbl->Reset(command_list, command_allocator, NULL);
  1365. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS topLevelInputs = {
  1366. .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY,
  1367. .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE,
  1368. .NumDescs = 1,
  1369. .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL,
  1370. };
  1371. D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO topLevelPrebuildInfo = {0};
  1372. dxr_device->lpVtbl->GetRaytracingAccelerationStructurePrebuildInfo(dxr_device, &topLevelInputs, &topLevelPrebuildInfo);
  1373. UINT64 scratch_size = topLevelPrebuildInfo.ScratchDataSizeInBytes;
  1374. // Bottom AS
  1375. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS bottomLevelInputs[16];
  1376. D3D12_RAYTRACING_GEOMETRY_DESC geometryDescs[16];
  1377. if (build_bottom) {
  1378. for (int i = 0; i < dxr_vb_count; ++i) {
  1379. D3D12_RAYTRACING_GEOMETRY_DESC geometryDesc = {
  1380. .Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES,
  1381. .Triangles.IndexBuffer = dxr_ib[i]->impl.buffer->lpVtbl->GetGPUVirtualAddress(dxr_ib[i]->impl.buffer),
  1382. .Triangles.IndexCount = dxr_ib[i]->count,
  1383. .Triangles.IndexFormat = DXGI_FORMAT_R32_UINT,
  1384. .Triangles.Transform3x4 = 0,
  1385. .Triangles.VertexFormat = DXGI_FORMAT_R16G16B16A16_SNORM,
  1386. .Triangles.VertexCount = dxr_vb[i]->count,
  1387. };
  1388. D3D12_RESOURCE_DESC desc;
  1389. dxr_vb[i]->impl.buffer->lpVtbl->GetDesc(dxr_vb[i]->impl.buffer, &desc);
  1390. geometryDesc.Triangles.VertexBuffer.StartAddress = dxr_vb[i]->impl.buffer->lpVtbl->GetGPUVirtualAddress(dxr_vb[i]->impl.buffer);
  1391. geometryDesc.Triangles.VertexBuffer.StrideInBytes = desc.Width / dxr_vb[i]->count;
  1392. geometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE;
  1393. geometryDescs[i] = geometryDesc;
  1394. D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO bottomLevelPrebuildInfo = {0};
  1395. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {
  1396. .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY,
  1397. .NumDescs = 1,
  1398. .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL,
  1399. .pGeometryDescs = &geometryDescs[i],
  1400. .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE,
  1401. };
  1402. dxr_device->lpVtbl->GetRaytracingAccelerationStructurePrebuildInfo(dxr_device, &inputs, &bottomLevelPrebuildInfo);
  1403. bottomLevelInputs[i] = inputs;
  1404. UINT64 blSize = bottomLevelPrebuildInfo.ScratchDataSizeInBytes;
  1405. if (scratch_size < blSize) {
  1406. scratch_size = blSize;
  1407. }
  1408. {
  1409. D3D12_RESOURCE_DESC bufferDesc = {
  1410. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1411. .Width = bottomLevelPrebuildInfo.ResultDataMaxSizeInBytes,
  1412. .Height = 1,
  1413. .DepthOrArraySize = 1,
  1414. .MipLevels = 1,
  1415. .Format = DXGI_FORMAT_UNKNOWN,
  1416. .SampleDesc.Count = 1,
  1417. .SampleDesc.Quality = 0,
  1418. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1419. .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1420. };
  1421. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1422. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1423. .CreationNodeMask = 1,
  1424. .VisibleNodeMask = 1,
  1425. };
  1426. device->lpVtbl->CreateCommittedResource(dxr_device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc,
  1427. D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, NULL, &IID_ID3D12Resource,
  1428. &accel->impl.bottom_level_accel[i]);
  1429. }
  1430. }
  1431. }
  1432. // Create scratch memory
  1433. ID3D12Resource *scratchResource;
  1434. {
  1435. D3D12_RESOURCE_DESC bufferDesc = {
  1436. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1437. .Width = scratch_size,
  1438. .Height = 1,
  1439. .DepthOrArraySize = 1,
  1440. .MipLevels = 1,
  1441. .Format = DXGI_FORMAT_UNKNOWN,
  1442. .SampleDesc.Count = 1,
  1443. .SampleDesc.Quality = 0,
  1444. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1445. .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1446. };
  1447. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1448. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1449. .CreationNodeMask = 1,
  1450. .VisibleNodeMask = 1,
  1451. };
  1452. device->lpVtbl->CreateCommittedResource(dxr_device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
  1453. NULL, &IID_ID3D12Resource, &scratchResource);
  1454. }
  1455. // Bottom AS
  1456. if (build_bottom) {
  1457. for (int i = 0; i < dxr_vb_count; ++i) {
  1458. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC bottomLevelBuildDesc = {
  1459. .Inputs = bottomLevelInputs[i],
  1460. .ScratchAccelerationStructureData = scratchResource->lpVtbl->GetGPUVirtualAddress(scratchResource),
  1461. .DestAccelerationStructureData = accel->impl.bottom_level_accel[i]->lpVtbl->GetGPUVirtualAddress(accel->impl.bottom_level_accel[i]),
  1462. };
  1463. dxr_command_list->lpVtbl->BuildRaytracingAccelerationStructure(dxr_command_list, &bottomLevelBuildDesc, 0, NULL);
  1464. }
  1465. }
  1466. // Top AS
  1467. {
  1468. D3D12_RESOURCE_DESC bufferDesc = {
  1469. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1470. .Width = topLevelPrebuildInfo.ResultDataMaxSizeInBytes,
  1471. .Height = 1,
  1472. .DepthOrArraySize = 1,
  1473. .MipLevels = 1,
  1474. .Format = DXGI_FORMAT_UNKNOWN,
  1475. .SampleDesc.Count = 1,
  1476. .SampleDesc.Quality = 0,
  1477. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1478. .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1479. };
  1480. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1481. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1482. .CreationNodeMask = 1,
  1483. .VisibleNodeMask = 1,
  1484. };
  1485. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc,
  1486. D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, NULL, &IID_ID3D12Resource,
  1487. &accel->impl.top_level_accel);
  1488. }
  1489. D3D12_RESOURCE_DESC bufferDesc = {
  1490. .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
  1491. .Width = sizeof(D3D12_RAYTRACING_INSTANCE_DESC) * dxr_instances_count,
  1492. .Height = 1,
  1493. .DepthOrArraySize = 1,
  1494. .MipLevels = 1,
  1495. .Format = DXGI_FORMAT_UNKNOWN,
  1496. .SampleDesc.Count = 1,
  1497. .SampleDesc.Quality = 0,
  1498. .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
  1499. };
  1500. D3D12_HEAP_PROPERTIES uploadHeapProperties = {
  1501. .Type = D3D12_HEAP_TYPE_UPLOAD,
  1502. .CreationNodeMask = 1,
  1503. .VisibleNodeMask = 1,
  1504. };
  1505. ID3D12Resource *instanceDescs;
  1506. device->lpVtbl->CreateCommittedResource(device, &uploadHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
  1507. &IID_ID3D12Resource, &instanceDescs);
  1508. void *mappedData;
  1509. instanceDescs->lpVtbl->Map(instanceDescs, 0, NULL, &mappedData);
  1510. for (int i = 0; i < dxr_instances_count; ++i) {
  1511. D3D12_RAYTRACING_INSTANCE_DESC instanceDesc = {0};
  1512. instanceDesc.Transform[0][0] = dxr_instances[i].m.m[0];
  1513. instanceDesc.Transform[0][1] = dxr_instances[i].m.m[1];
  1514. instanceDesc.Transform[0][2] = dxr_instances[i].m.m[2];
  1515. instanceDesc.Transform[0][3] = dxr_instances[i].m.m[3];
  1516. instanceDesc.Transform[1][0] = dxr_instances[i].m.m[4];
  1517. instanceDesc.Transform[1][1] = dxr_instances[i].m.m[5];
  1518. instanceDesc.Transform[1][2] = dxr_instances[i].m.m[6];
  1519. instanceDesc.Transform[1][3] = dxr_instances[i].m.m[7];
  1520. instanceDesc.Transform[2][0] = dxr_instances[i].m.m[8];
  1521. instanceDesc.Transform[2][1] = dxr_instances[i].m.m[9];
  1522. instanceDesc.Transform[2][2] = dxr_instances[i].m.m[10];
  1523. instanceDesc.Transform[2][3] = dxr_instances[i].m.m[11];
  1524. int ib_off = 0;
  1525. for (int j = 0; j < dxr_instances[i].i; ++j) {
  1526. ib_off += dxr_ib[j]->count * 4;
  1527. }
  1528. instanceDesc.InstanceID = ib_off;
  1529. instanceDesc.InstanceMask = 1;
  1530. instanceDesc.AccelerationStructure =
  1531. accel->impl.bottom_level_accel[dxr_instances[i].i]->lpVtbl->GetGPUVirtualAddress(accel->impl.bottom_level_accel[dxr_instances[i].i]);
  1532. memcpy((uint8_t *)mappedData + i * sizeof(D3D12_RAYTRACING_INSTANCE_DESC), &instanceDesc, sizeof(D3D12_RAYTRACING_INSTANCE_DESC));
  1533. }
  1534. instanceDescs->lpVtbl->Unmap(instanceDescs, 0, NULL);
  1535. D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC topLevelBuildDesc = {0};
  1536. topLevelInputs.InstanceDescs = instanceDescs->lpVtbl->GetGPUVirtualAddress(instanceDescs);
  1537. topLevelBuildDesc.Inputs = topLevelInputs;
  1538. topLevelBuildDesc.DestAccelerationStructureData = accel->impl.top_level_accel->lpVtbl->GetGPUVirtualAddress(accel->impl.top_level_accel);
  1539. topLevelBuildDesc.ScratchAccelerationStructureData = scratchResource->lpVtbl->GetGPUVirtualAddress(scratchResource);
  1540. D3D12_RESOURCE_BARRIER barrier = {
  1541. .Type = D3D12_RESOURCE_BARRIER_TYPE_UAV,
  1542. .UAV.pResource = accel->impl.bottom_level_accel[0],
  1543. };
  1544. command_list->lpVtbl->ResourceBarrier(command_list, 1, &barrier);
  1545. dxr_command_list->lpVtbl->BuildRaytracingAccelerationStructure(dxr_command_list, &topLevelBuildDesc, 0, NULL);
  1546. gpu_execute_and_wait();
  1547. scratchResource->lpVtbl->Release(scratchResource);
  1548. instanceDescs->lpVtbl->Release(instanceDescs);
  1549. }
  1550. void gpu_raytrace_acceleration_structure_destroy(gpu_raytrace_acceleration_structure_t *accel) {
  1551. // accel->impl.bottom_level_accel->Release();
  1552. // accel->impl.top_level_accel->Release();
  1553. }
  1554. void gpu_raytrace_set_textures(gpu_texture_t *texpaint0, gpu_texture_t *texpaint1, gpu_texture_t *texpaint2, gpu_texture_t *texenv, gpu_texture_t *texsobol,
  1555. gpu_texture_t *texscramble, gpu_texture_t *texrank) {
  1556. D3D12_CPU_DESCRIPTOR_HANDLE handle, cpuDescriptor, sourceCpu;
  1557. D3D12_GPU_DESCRIPTOR_HANDLE ghandle;
  1558. gpu_texture_t *textures[] = {texpaint0, texpaint1, texpaint2, texenv, texsobol, texscramble, texrank};
  1559. D3D12_GPU_DESCRIPTOR_HANDLE *gpu_handles[] = {&dxr_tex0gpu_descriptor_handle, &dxr_tex1gpu_descriptor_handle, &dxr_tex2gpu_descriptor_handle,
  1560. &dxr_texenvgpu_descriptor_handle, &dxr_texsobolgpu_descriptor_handle, &dxr_texscramblegpu_descriptor_handle,
  1561. &dxr_texrankgpu_descriptor_handle};
  1562. dxr_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &handle);
  1563. for (int i = 0; i < 7; i++) {
  1564. cpuDescriptor.ptr = handle.ptr + (5 + i) * (UINT64)dxr_descriptor_size;
  1565. textures[i]->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(textures[i]->impl.srv_descriptor_heap, &sourceCpu);
  1566. device->lpVtbl->CopyDescriptorsSimple(device, 1, cpuDescriptor, sourceCpu, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
  1567. dxr_descriptor_heap->lpVtbl->GetGPUDescriptorHandleForHeapStart(dxr_descriptor_heap, &ghandle);
  1568. gpu_handles[i]->ptr = ghandle.ptr + (5 + i) * (UINT64)dxr_descriptor_size;
  1569. }
  1570. }
  1571. void gpu_raytrace_set_acceleration_structure(gpu_raytrace_acceleration_structure_t *accel) {
  1572. dxr_accel = accel;
  1573. }
  1574. void gpu_raytrace_set_pipeline(gpu_raytrace_pipeline_t *pipeline) {
  1575. dxr_pipeline = pipeline;
  1576. }
  1577. void gpu_raytrace_set_target(gpu_texture_t *output) {
  1578. if (!output->impl.has_storage_bit) {
  1579. output->impl.has_storage_bit = true;
  1580. // gpu_texture_destroy(output);
  1581. resources_to_destroy[resources_to_destroy_count] = output->impl.image;
  1582. resources_to_destroy_count++;
  1583. D3D12_HEAP_PROPERTIES heap_properties = {
  1584. .Type = D3D12_HEAP_TYPE_DEFAULT,
  1585. .CreationNodeMask = 1,
  1586. .VisibleNodeMask = 1,
  1587. };
  1588. D3D12_RESOURCE_DESC desc = {
  1589. .Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
  1590. .Width = output->width,
  1591. .Height = output->height,
  1592. .DepthOrArraySize = 1,
  1593. .MipLevels = 1,
  1594. .Format = convert_format(output->format),
  1595. .SampleDesc.Count = 1,
  1596. .SampleDesc.Quality = 0,
  1597. .Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
  1598. .Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
  1599. };
  1600. D3D12_CLEAR_VALUE clear_value;
  1601. clear_value.Format = convert_format(output->format);
  1602. clear_value.Color[0] = 0.0f;
  1603. clear_value.Color[1] = 0.0f;
  1604. clear_value.Color[2] = 0.0f;
  1605. clear_value.Color[3] = 0.0f;
  1606. device->lpVtbl->CreateCommittedResource(device, &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, &clear_value,
  1607. &IID_ID3D12Resource, &output->impl.image);
  1608. D3D12_RENDER_TARGET_VIEW_DESC view = {
  1609. .Format = convert_format(output->format),
  1610. .ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D,
  1611. .Texture2D.MipSlice = 0,
  1612. .Texture2D.PlaneSlice = 0,
  1613. };
  1614. D3D12_DESCRIPTOR_HEAP_DESC heap_desc = {
  1615. .NumDescriptors = 1,
  1616. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
  1617. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  1618. };
  1619. device->lpVtbl->CreateDescriptorHeap(device, &heap_desc, &IID_ID3D12DescriptorHeap, &output->impl.rtv_descriptor_heap);
  1620. D3D12_CPU_DESCRIPTOR_HANDLE handle;
  1621. output->impl.rtv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(output->impl.rtv_descriptor_heap, &handle);
  1622. device->lpVtbl->CreateRenderTargetView(device, output->impl.image, &view, handle);
  1623. D3D12_DESCRIPTOR_HEAP_DESC descriptor_heap_desc = {
  1624. .NumDescriptors = 1,
  1625. .Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
  1626. .NodeMask = 0,
  1627. .Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
  1628. };
  1629. device->lpVtbl->CreateDescriptorHeap(device, &descriptor_heap_desc, &IID_ID3D12DescriptorHeap, &output->impl.srv_descriptor_heap);
  1630. D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
  1631. .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
  1632. .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
  1633. .Format = convert_format(output->format),
  1634. .Texture2D.MipLevels = 1,
  1635. .Texture2D.MostDetailedMip = 0,
  1636. .Texture2D.ResourceMinLODClamp = 0.0f,
  1637. };
  1638. output->impl.srv_descriptor_heap->lpVtbl->GetCPUDescriptorHandleForHeapStart(output->impl.srv_descriptor_heap, &handle);
  1639. device->lpVtbl->CreateShaderResourceView(device, output->impl.image, &srv_desc, handle);
  1640. D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {
  1641. .ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D,
  1642. };
  1643. device->lpVtbl->CreateUnorderedAccessView(device, output->impl.image, NULL, &UAVDesc, dxr_output_cpu_descriptor);
  1644. }
  1645. dxr_output = output;
  1646. }
  1647. void gpu_raytrace_dispatch_rays() {
  1648. _gpu_barrier(dxr_output->impl.image, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
  1649. command_list->lpVtbl->SetComputeRootSignature(command_list, dxr_root_signature);
  1650. command_list->lpVtbl->SetDescriptorHeaps(command_list, 1, &dxr_descriptor_heap);
  1651. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 0, dxr_output_descriptor_handle);
  1652. command_list->lpVtbl->SetComputeRootShaderResourceView(command_list, 1,
  1653. dxr_accel->impl.top_level_accel->lpVtbl->GetGPUVirtualAddress(dxr_accel->impl.top_level_accel));
  1654. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 2, dxr_ibgpu_descriptor_handle);
  1655. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 3, dxr_vbgpu_descriptor_handle);
  1656. command_list->lpVtbl->SetComputeRootConstantBufferView(
  1657. command_list, 4, dxr_pipeline->constant_buffer->impl.buffer->lpVtbl->GetGPUVirtualAddress(dxr_pipeline->constant_buffer->impl.buffer));
  1658. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 5, dxr_tex0gpu_descriptor_handle);
  1659. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 6, dxr_tex1gpu_descriptor_handle);
  1660. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 7, dxr_tex2gpu_descriptor_handle);
  1661. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 8, dxr_texenvgpu_descriptor_handle);
  1662. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 9, dxr_texsobolgpu_descriptor_handle);
  1663. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 10, dxr_texscramblegpu_descriptor_handle);
  1664. command_list->lpVtbl->SetComputeRootDescriptorTable(command_list, 11, dxr_texrankgpu_descriptor_handle);
  1665. D3D12_DISPATCH_RAYS_DESC dispatchDesc = {0};
  1666. D3D12_RESOURCE_DESC desc;
  1667. dxr_pipeline->impl.hitgroup_shader_table->lpVtbl->GetDesc(dxr_pipeline->impl.hitgroup_shader_table, &desc);
  1668. dispatchDesc.HitGroupTable.StartAddress = dxr_pipeline->impl.hitgroup_shader_table->lpVtbl->GetGPUVirtualAddress(dxr_pipeline->impl.hitgroup_shader_table);
  1669. dispatchDesc.HitGroupTable.SizeInBytes = desc.Width;
  1670. dispatchDesc.HitGroupTable.StrideInBytes = dispatchDesc.HitGroupTable.SizeInBytes;
  1671. dispatchDesc.MissShaderTable.StartAddress = dxr_pipeline->impl.miss_shader_table->lpVtbl->GetGPUVirtualAddress(dxr_pipeline->impl.miss_shader_table);
  1672. dxr_pipeline->impl.miss_shader_table->lpVtbl->GetDesc(dxr_pipeline->impl.miss_shader_table, &desc);
  1673. dispatchDesc.MissShaderTable.SizeInBytes = desc.Width;
  1674. dispatchDesc.MissShaderTable.StrideInBytes = dispatchDesc.MissShaderTable.SizeInBytes;
  1675. dispatchDesc.RayGenerationShaderRecord.StartAddress =
  1676. dxr_pipeline->impl.raygen_shader_table->lpVtbl->GetGPUVirtualAddress(dxr_pipeline->impl.raygen_shader_table);
  1677. dxr_pipeline->impl.raygen_shader_table->lpVtbl->GetDesc(dxr_pipeline->impl.raygen_shader_table, &desc);
  1678. dispatchDesc.RayGenerationShaderRecord.SizeInBytes = desc.Width;
  1679. dispatchDesc.Width = dxr_output->width;
  1680. dispatchDesc.Height = dxr_output->height;
  1681. dispatchDesc.Depth = 1;
  1682. dxr_command_list->lpVtbl->SetPipelineState1(dxr_command_list, dxr_pipeline->impl.state);
  1683. dxr_command_list->lpVtbl->DispatchRays(dxr_command_list, &dispatchDesc);
  1684. _gpu_barrier(dxr_output->impl.image, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
  1685. }