vulkan_gpu.c 116 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076
  1. #ifndef NDEBUG
  2. #define VALIDATE
  3. #endif
  4. #include <malloc.h>
  5. #include <memory.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <vulkan/vulkan.h>
  10. #include <vulkan/vulkan_core.h>
  11. #include <iron_gpu.h>
  12. #include <iron_math.h>
  13. #include <iron_system.h>
  14. #include "vulkan_gpu.h"
  15. bool gpu_transpose_mat = true;
  16. extern int constant_buffer_index;
  17. static gpu_texture_t *current_textures[GPU_MAX_TEXTURES] = {
  18. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  19. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
  20. };
  21. static VkSemaphore framebuffer_available_semaphore;
  22. static VkSemaphore rendering_finished_semaphores[GPU_FRAMEBUFFER_COUNT];
  23. static VkFence fence;
  24. static gpu_pipeline_t *current_pipeline = NULL;
  25. static VkViewport current_viewport;
  26. static VkRect2D current_scissor;
  27. static gpu_buffer_t *current_vb;
  28. static gpu_buffer_t *current_ib;
  29. static VkDescriptorSetLayout descriptor_layout;
  30. static VkDescriptorSet descriptor_sets[GPU_CONSTANT_BUFFER_MULTIPLE];
  31. static VkRenderingInfo current_rendering_info;
  32. static VkRenderingAttachmentInfo current_color_attachment_infos[8];
  33. static VkRenderingAttachmentInfo current_depth_attachment_info;
  34. static VkPhysicalDeviceMemoryProperties memory_properties;
  35. static VkSampler linear_sampler;
  36. static VkSampler point_sampler;
  37. static bool linear_sampling = true;
  38. static VkCommandBuffer command_buffer;
  39. static VkBuffer buffers_to_destroy[256];
  40. static VkDeviceMemory buffer_memories_to_destroy[256];
  41. static int buffers_to_destroy_count = 0;
  42. static char device_name[256];
  43. static VkInstance instance;
  44. static VkPhysicalDevice gpu;
  45. static VkDevice device;
  46. static VkCommandPool cmd_pool;
  47. static VkQueue queue;
  48. #ifdef VALIDATE
  49. static bool validation_found;
  50. static VkDebugUtilsMessengerEXT debug_messenger;
  51. #endif
  52. static bool surface_destroyed;
  53. static int window_depth_bits;
  54. static bool window_vsync;
  55. static VkSurfaceKHR surface;
  56. static VkSurfaceFormatKHR surface_format;
  57. static VkSwapchainKHR swapchain;
  58. static VkImage window_images[GPU_FRAMEBUFFER_COUNT];
  59. static uint32_t framebuffer_count;
  60. static bool framebuffer_acquired = false;
  61. static VkBuffer readback_buffer;
  62. static int readback_buffer_size = 0;
  63. static VkDeviceMemory readback_mem;
  64. static VkBuffer upload_buffer;
  65. static int upload_buffer_size = 0;
  66. static VkDeviceMemory upload_mem;
  67. static bool is_amd = false;
  68. void iron_vulkan_get_instance_extensions(const char **extensions, int *index);
  69. VkBool32 iron_vulkan_get_physical_device_presentation_support(VkPhysicalDevice physical_device, uint32_t queue_family_index);
  70. VkResult iron_vulkan_create_surface(VkInstance instance, VkSurfaceKHR *surface);
  71. static VkFormat convert_image_format(gpu_texture_format_t format) {
  72. switch (format) {
  73. case GPU_TEXTURE_FORMAT_RGBA128:
  74. return VK_FORMAT_R32G32B32A32_SFLOAT;
  75. case GPU_TEXTURE_FORMAT_RGBA64:
  76. return VK_FORMAT_R16G16B16A16_SFLOAT;
  77. case GPU_TEXTURE_FORMAT_R8:
  78. return VK_FORMAT_R8_UNORM;
  79. case GPU_TEXTURE_FORMAT_R16:
  80. return VK_FORMAT_R16_SFLOAT;
  81. case GPU_TEXTURE_FORMAT_R32:
  82. return VK_FORMAT_R32_SFLOAT;
  83. case GPU_TEXTURE_FORMAT_D32:
  84. return VK_FORMAT_D32_SFLOAT;
  85. default:
  86. #ifdef IRON_ANDROID
  87. return VK_FORMAT_R8G8B8A8_UNORM;
  88. #else
  89. return VK_FORMAT_B8G8R8A8_UNORM;
  90. #endif
  91. }
  92. }
  93. static VkCullModeFlagBits convert_cull_mode(gpu_cull_mode_t cull_mode) {
  94. switch (cull_mode) {
  95. case GPU_CULL_MODE_CLOCKWISE:
  96. return VK_CULL_MODE_BACK_BIT;
  97. case GPU_CULL_MODE_COUNTERCLOCKWISE:
  98. return VK_CULL_MODE_FRONT_BIT;
  99. default:
  100. return VK_CULL_MODE_NONE;
  101. }
  102. }
  103. static VkCompareOp convert_compare_mode(gpu_compare_mode_t compare) {
  104. switch (compare) {
  105. default:
  106. case GPU_COMPARE_MODE_ALWAYS:
  107. return VK_COMPARE_OP_ALWAYS;
  108. case GPU_COMPARE_MODE_NEVER:
  109. return VK_COMPARE_OP_NEVER;
  110. case GPU_COMPARE_MODE_EQUAL:
  111. return VK_COMPARE_OP_EQUAL;
  112. case GPU_COMPARE_MODE_LESS:
  113. return VK_COMPARE_OP_LESS;
  114. }
  115. }
  116. static VkBlendFactor convert_blend_factor(gpu_blending_factor_t factor) {
  117. switch (factor) {
  118. case GPU_BLEND_ONE:
  119. return VK_BLEND_FACTOR_ONE;
  120. case GPU_BLEND_ZERO:
  121. return VK_BLEND_FACTOR_ZERO;
  122. case GPU_BLEND_SOURCE_ALPHA:
  123. return VK_BLEND_FACTOR_SRC_ALPHA;
  124. case GPU_BLEND_DEST_ALPHA:
  125. return VK_BLEND_FACTOR_DST_ALPHA;
  126. case GPU_BLEND_INV_SOURCE_ALPHA:
  127. return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
  128. case GPU_BLEND_INV_DEST_ALPHA:
  129. return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA;
  130. }
  131. }
  132. static VkImageLayout convert_texture_state(gpu_texture_state_t state) {
  133. switch (state) {
  134. case GPU_TEXTURE_STATE_SHADER_RESOURCE:
  135. return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  136. case GPU_TEXTURE_STATE_RENDER_TARGET:
  137. return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
  138. case GPU_TEXTURE_STATE_RENDER_TARGET_DEPTH:
  139. return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
  140. case GPU_TEXTURE_STATE_PRESENT:
  141. return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
  142. }
  143. }
  144. static VkBool32 vk_debug_utils_messenger_callback_ext(
  145. VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
  146. VkDebugUtilsMessageTypeFlagsEXT message_types,
  147. const VkDebugUtilsMessengerCallbackDataEXT *pcallback_data,
  148. void *puser_data) {
  149. if (message_severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
  150. iron_error("Vulkan ERROR: Code %d : %s\n", pcallback_data->messageIdNumber, pcallback_data->pMessage);
  151. }
  152. else if (message_severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
  153. iron_log("Vulkan WARNING: Code %d : %s\n", pcallback_data->messageIdNumber, pcallback_data->pMessage);
  154. }
  155. return VK_FALSE;
  156. }
  157. static bool check_extensions(const char **wanted_extensions, int wanted_extension_count, VkExtensionProperties *extensions, int extension_count) {
  158. bool *found_extensions = calloc(wanted_extension_count, 1);
  159. for (int i = 0; i < extension_count; i++) {
  160. for (int i2 = 0; i2 < wanted_extension_count; i2++) {
  161. if (strcmp(wanted_extensions[i2], extensions[i].extensionName) == 0) {
  162. found_extensions[i2] = true;
  163. }
  164. }
  165. }
  166. bool missing_extensions = false;
  167. for (int i = 0; i < wanted_extension_count; i++) {
  168. if (!found_extensions[i]) {
  169. iron_error("Failed to find extension %s", wanted_extensions[i]);
  170. missing_extensions = true;
  171. }
  172. }
  173. free(found_extensions);
  174. return missing_extensions;
  175. }
  176. static bool find_layer(VkLayerProperties *layers, int layer_count, const char *wanted_layer) {
  177. for (int i = 0; i < layer_count; i++) {
  178. if (strcmp(wanted_layer, layers[i].layerName) == 0) {
  179. return true;
  180. }
  181. }
  182. return false;
  183. }
  184. static uint32_t memory_type_from_properties(uint32_t type_bits, VkFlags requirements_mask) {
  185. uint32_t best_index = 0;
  186. VkDeviceSize best_size = 0;
  187. for (uint32_t i = 0; i < 32; i++) {
  188. if ((type_bits & 1) == 1) {
  189. if (is_amd && memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD) {
  190. continue;
  191. }
  192. if ((memory_properties.memoryTypes[i].propertyFlags & requirements_mask) == requirements_mask) {
  193. uint32_t heap_index = memory_properties.memoryTypes[i].heapIndex;
  194. VkDeviceSize heap_size = memory_properties.memoryHeaps[heap_index].size;
  195. if (heap_size > best_size) {
  196. best_size = heap_size;
  197. best_index = i;
  198. }
  199. }
  200. }
  201. type_bits >>= 1;
  202. }
  203. return best_index;
  204. }
  205. static VkAccessFlags access_mask(VkImageLayout layout) {
  206. if (layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
  207. return VK_ACCESS_TRANSFER_READ_BIT;
  208. }
  209. if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
  210. return VK_ACCESS_TRANSFER_WRITE_BIT;
  211. }
  212. if (layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
  213. return VK_ACCESS_MEMORY_READ_BIT;
  214. }
  215. return 0;
  216. }
  217. void gpu_barrier(gpu_texture_t *render_target, gpu_texture_state_t state_after) {
  218. if (render_target->state == state_after) {
  219. return;
  220. }
  221. VkImageMemoryBarrier barrier = {
  222. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
  223. .srcAccessMask = access_mask(convert_texture_state(render_target->state)),
  224. .dstAccessMask = access_mask(convert_texture_state(state_after)),
  225. .oldLayout = convert_texture_state(render_target->state),
  226. .newLayout = convert_texture_state(state_after),
  227. .image = render_target->impl.image,
  228. .subresourceRange = {
  229. .aspectMask = render_target->format == GPU_TEXTURE_FORMAT_D32 ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT,
  230. .baseMipLevel = 0,
  231. .levelCount = 1,
  232. .baseArrayLayer = 0,
  233. .layerCount = 1,
  234. },
  235. };
  236. vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 0, NULL, 1, &barrier);
  237. render_target->state = state_after;
  238. }
  239. static void set_image_layout(VkImage image, VkImageAspectFlags aspect_mask, VkImageLayout old_layout, VkImageLayout new_layout) {
  240. if (gpu_in_use) {
  241. vkCmdEndRendering(command_buffer);
  242. }
  243. VkImageMemoryBarrier barrier = {
  244. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
  245. .srcAccessMask = 0,
  246. .dstAccessMask = 0,
  247. .oldLayout = old_layout,
  248. .newLayout = new_layout,
  249. .image = image,
  250. .subresourceRange.aspectMask = aspect_mask,
  251. .subresourceRange.baseMipLevel = 0,
  252. .subresourceRange.levelCount = 1,
  253. .subresourceRange.baseArrayLayer = 0,
  254. .subresourceRange.layerCount = 1,
  255. };
  256. if (new_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR) {
  257. barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
  258. }
  259. if (new_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
  260. barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
  261. }
  262. if (new_layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL) {
  263. barrier.dstAccessMask = barrier.dstAccessMask | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
  264. }
  265. vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 0, NULL, 1, &barrier);
  266. if (gpu_in_use) {
  267. vkCmdBeginRendering(command_buffer, &current_rendering_info);
  268. }
  269. }
  270. static void create_descriptors(void) {
  271. VkDescriptorSetLayoutBinding bindings[18];
  272. memset(bindings, 0, sizeof(bindings));
  273. bindings[0].binding = 0;
  274. bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
  275. bindings[0].descriptorCount = 1;
  276. bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
  277. bindings[1].binding = 1;
  278. bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
  279. bindings[1].descriptorCount = 1;
  280. bindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
  281. for (int i = 2; i < 2 + GPU_MAX_TEXTURES; ++i) {
  282. bindings[i].binding = i;
  283. bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
  284. bindings[i].descriptorCount = 1;
  285. bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
  286. }
  287. VkDescriptorSetLayoutCreateInfo layout_create_info = {
  288. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  289. .bindingCount = 2 + GPU_MAX_TEXTURES,
  290. .pBindings = bindings,
  291. };
  292. vkCreateDescriptorSetLayout(device, &layout_create_info, NULL, &descriptor_layout);
  293. VkDescriptorPoolSize type_counts[3];
  294. memset(type_counts, 0, sizeof(type_counts));
  295. type_counts[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
  296. type_counts[0].descriptorCount = GPU_CONSTANT_BUFFER_MULTIPLE;
  297. type_counts[1].type = VK_DESCRIPTOR_TYPE_SAMPLER;
  298. type_counts[1].descriptorCount = GPU_CONSTANT_BUFFER_MULTIPLE;
  299. type_counts[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
  300. type_counts[2].descriptorCount = GPU_CONSTANT_BUFFER_MULTIPLE * GPU_MAX_TEXTURES;
  301. VkDescriptorPoolCreateInfo pool_info = {
  302. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
  303. .maxSets = GPU_CONSTANT_BUFFER_MULTIPLE,
  304. .poolSizeCount = 3,
  305. .pPoolSizes = type_counts,
  306. };
  307. VkDescriptorPool descriptor_pool;
  308. vkCreateDescriptorPool(device, &pool_info, NULL, &descriptor_pool);
  309. VkDescriptorSetLayout layouts[GPU_CONSTANT_BUFFER_MULTIPLE];
  310. for (int i = 0; i < GPU_CONSTANT_BUFFER_MULTIPLE; ++i) {
  311. layouts[i] = descriptor_layout;
  312. }
  313. VkDescriptorSetAllocateInfo alloc_info = {
  314. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
  315. .descriptorPool = descriptor_pool,
  316. .descriptorSetCount = GPU_CONSTANT_BUFFER_MULTIPLE,
  317. .pSetLayouts = layouts,
  318. };
  319. vkAllocateDescriptorSets(device, &alloc_info, descriptor_sets);
  320. VkSamplerCreateInfo sampler_info = {
  321. .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
  322. .magFilter = VK_FILTER_LINEAR,
  323. .minFilter = VK_FILTER_LINEAR,
  324. .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
  325. .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
  326. .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
  327. .maxAnisotropy = 1.0f,
  328. .borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
  329. .compareOp = VK_COMPARE_OP_ALWAYS,
  330. .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
  331. };
  332. vkCreateSampler(device, &sampler_info, NULL, &linear_sampler);
  333. sampler_info.magFilter = VK_FILTER_NEAREST;
  334. sampler_info.minFilter = VK_FILTER_NEAREST;
  335. sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
  336. vkCreateSampler(device, &sampler_info, NULL, &point_sampler);
  337. }
  338. VkSwapchainKHR cleanup_swapchain() {
  339. // for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  340. // gpu_texture_destroy_internal(&framebuffers[i]);
  341. // }
  342. VkSwapchainKHR chain = swapchain;
  343. swapchain = VK_NULL_HANDLE;
  344. return chain;
  345. }
  346. void gpu_render_target_init2(gpu_texture_t *target, int width, int height, gpu_texture_format_t format, int framebuffer_index) {
  347. target->width = width;
  348. target->height = height;
  349. target->format = format;
  350. target->state = (framebuffer_index >= 0) ? GPU_TEXTURE_STATE_PRESENT : GPU_TEXTURE_STATE_SHADER_RESOURCE;
  351. target->buffer = NULL;
  352. target->impl.has_storage_bit = false;
  353. if (framebuffer_index >= 0) {
  354. return;
  355. }
  356. VkFormatProperties format_properties;
  357. vkGetPhysicalDeviceFormatProperties(gpu, convert_image_format(target->format), &format_properties);
  358. VkImageCreateInfo image = {
  359. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  360. .imageType = VK_IMAGE_TYPE_2D,
  361. .format = convert_image_format(target->format),
  362. .extent.width = width,
  363. .extent.height = height,
  364. .extent.depth = 1,
  365. .mipLevels = 1,
  366. .arrayLayers = 1,
  367. .samples = VK_SAMPLE_COUNT_1_BIT,
  368. .tiling = VK_IMAGE_TILING_OPTIMAL,
  369. };
  370. if (format == GPU_TEXTURE_FORMAT_D32) {
  371. image.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
  372. }
  373. else {
  374. image.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
  375. }
  376. VkImageViewCreateInfo color_image_view = {
  377. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  378. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  379. .format = convert_image_format(target->format),
  380. .subresourceRange.aspectMask = format == GPU_TEXTURE_FORMAT_D32 ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT,
  381. .subresourceRange.baseMipLevel = 0,
  382. .subresourceRange.levelCount = 1,
  383. .subresourceRange.baseArrayLayer = 0,
  384. .subresourceRange.layerCount = 1,
  385. };
  386. vkCreateImage(device, &image, NULL, &target->impl.image);
  387. VkMemoryRequirements memory_reqs;
  388. vkGetImageMemoryRequirements(device, target->impl.image, &memory_reqs);
  389. VkMemoryAllocateInfo allocation_nfo = {
  390. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  391. .allocationSize = memory_reqs.size,
  392. };
  393. allocation_nfo.memoryTypeIndex = memory_type_from_properties(memory_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  394. vkAllocateMemory(device, &allocation_nfo, NULL, &target->impl.mem);
  395. vkBindImageMemory(device, target->impl.image, target->impl.mem, 0);
  396. set_image_layout(target->impl.image, format == GPU_TEXTURE_FORMAT_D32 ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  397. color_image_view.image = target->impl.image;
  398. vkCreateImageView(device, &color_image_view, NULL, &target->impl.view);
  399. }
  400. static void create_swapchain() {
  401. VkSwapchainKHR old_swapchain = cleanup_swapchain();
  402. if (surface_destroyed) {
  403. vkDestroySwapchainKHR(device, old_swapchain, NULL);
  404. old_swapchain = VK_NULL_HANDLE;
  405. vkDestroySurfaceKHR(instance, surface, NULL);
  406. iron_vulkan_create_surface(instance, &surface);
  407. surface_destroyed = false;
  408. }
  409. VkSurfaceCapabilitiesKHR caps = {0};
  410. vkGetPhysicalDeviceSurfaceCapabilitiesKHR(gpu, surface, &caps);
  411. VkPresentModeKHR present_modes[256];
  412. uint32_t present_mode_count;
  413. vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, surface, &present_mode_count, NULL);
  414. present_mode_count = present_mode_count > 256 ? 256 : present_mode_count;
  415. vkGetPhysicalDeviceSurfacePresentModesKHR(gpu, surface, &present_mode_count, present_modes);
  416. uint32_t image_count = GPU_FRAMEBUFFER_COUNT;
  417. if (image_count < caps.minImageCount) {
  418. image_count = caps.minImageCount;
  419. }
  420. else if (image_count > caps.maxImageCount && caps.maxImageCount > 0) {
  421. image_count = caps.maxImageCount;
  422. }
  423. VkSurfaceTransformFlagBitsKHR pre_transform = {0};
  424. if (caps.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) {
  425. pre_transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
  426. }
  427. else {
  428. pre_transform = caps.currentTransform;
  429. }
  430. // Fetch newest window size
  431. iron_internal_handle_messages();
  432. VkSwapchainCreateInfoKHR swapchain_info = {
  433. .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
  434. .surface = surface,
  435. .minImageCount = image_count,
  436. .imageFormat = surface_format.format,
  437. .imageColorSpace = surface_format.colorSpace,
  438. .imageExtent.width = iron_window_width(),
  439. .imageExtent.height = iron_window_height(),
  440. .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
  441. .preTransform = pre_transform,
  442. };
  443. if (caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) {
  444. swapchain_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
  445. }
  446. else if (caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
  447. swapchain_info.compositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
  448. }
  449. else if (caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR) {
  450. swapchain_info.compositeAlpha = VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;
  451. }
  452. else if (caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR) {
  453. swapchain_info.compositeAlpha = VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR;
  454. }
  455. swapchain_info.imageArrayLayers = 1;
  456. swapchain_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
  457. swapchain_info.queueFamilyIndexCount = 0;
  458. swapchain_info.pQueueFamilyIndices = NULL;
  459. swapchain_info.presentMode = window_vsync ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_MAILBOX_KHR;
  460. swapchain_info.oldSwapchain = old_swapchain;
  461. swapchain_info.clipped = true;
  462. vkCreateSwapchainKHR(device, &swapchain_info, NULL, &swapchain);
  463. if (old_swapchain != VK_NULL_HANDLE) {
  464. gpu_execute_and_wait();
  465. vkDestroySwapchainKHR(device, old_swapchain, NULL);
  466. }
  467. int framebuffer_count = GPU_FRAMEBUFFER_COUNT;
  468. vkGetSwapchainImagesKHR(device, swapchain, &framebuffer_count, window_images);
  469. for (uint32_t i = 0; i < framebuffer_count; i++) {
  470. framebuffers[i].impl.image = window_images[i];
  471. set_image_layout(window_images[i], VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);
  472. VkImageViewCreateInfo color_attachment_view = {
  473. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  474. .format = surface_format.format,
  475. .components.r = VK_COMPONENT_SWIZZLE_R,
  476. .components.g = VK_COMPONENT_SWIZZLE_G,
  477. .components.b = VK_COMPONENT_SWIZZLE_B,
  478. .components.a = VK_COMPONENT_SWIZZLE_A,
  479. .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  480. .subresourceRange.baseMipLevel = 0,
  481. .subresourceRange.levelCount = 1,
  482. .subresourceRange.baseArrayLayer = 0,
  483. .subresourceRange.layerCount = 1,
  484. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  485. .flags = 0,
  486. .image = window_images[i],
  487. };
  488. vkCreateImageView(device, &color_attachment_view, NULL, &framebuffers[i].impl.view);
  489. }
  490. framebuffer_index = 0;
  491. if (window_depth_bits > 0) {
  492. VkImageCreateInfo image = {
  493. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  494. .imageType = VK_IMAGE_TYPE_2D,
  495. .format = VK_FORMAT_D32_SFLOAT,
  496. .extent.width = iron_window_width(),
  497. .extent.height = iron_window_height(),
  498. .extent.depth = 1,
  499. .mipLevels = 1,
  500. .arrayLayers = 1,
  501. .samples = VK_SAMPLE_COUNT_1_BIT,
  502. .tiling = VK_IMAGE_TILING_OPTIMAL,
  503. .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
  504. .flags = 0,
  505. };
  506. VkMemoryAllocateInfo mem_alloc = {
  507. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  508. };
  509. VkMemoryRequirements mem_reqs = {0};
  510. vkCreateImage(device, &image, NULL, &framebuffer_depth.impl.image);
  511. vkGetImageMemoryRequirements(device, framebuffer_depth.impl.image, &mem_reqs);
  512. mem_alloc.allocationSize = mem_reqs.size;
  513. mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, 0);
  514. vkAllocateMemory(device, &mem_alloc, NULL, &framebuffer_depth.impl.mem);
  515. vkBindImageMemory(device, framebuffer_depth.impl.image, framebuffer_depth.impl.mem, 0);
  516. set_image_layout(framebuffer_depth.impl.image, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  517. VkImageViewCreateInfo view = {
  518. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  519. .image = framebuffer_depth.impl.image,
  520. .format = VK_FORMAT_D32_SFLOAT,
  521. .subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
  522. .subresourceRange.baseMipLevel = 0,
  523. .subresourceRange.levelCount = 1,
  524. .subresourceRange.baseArrayLayer = 0,
  525. .subresourceRange.layerCount = 1,
  526. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  527. };
  528. vkCreateImageView(device, &view, NULL, &framebuffer_depth.impl.view);
  529. }
  530. }
  531. static void acquire_next_image() {
  532. VkResult err = vkAcquireNextImageKHR(device, swapchain, UINT64_MAX, framebuffer_available_semaphore, VK_NULL_HANDLE, &framebuffer_index);
  533. if (err == VK_ERROR_SURFACE_LOST_KHR || err == VK_ERROR_OUT_OF_DATE_KHR || surface_destroyed) {
  534. surface_destroyed = surface_destroyed || (err == VK_ERROR_SURFACE_LOST_KHR);
  535. gpu_in_use = false;
  536. create_swapchain();
  537. gpu_in_use = true;
  538. acquire_next_image();
  539. for (int i = 0; i < GPU_FRAMEBUFFER_COUNT; ++i) {
  540. // gpu_texture_destroy_internal(&framebuffers[i]);
  541. // gpu_render_target_init2(&framebuffers[i], iron_window_width(), iron_window_height(), GPU_TEXTURE_FORMAT_RGBA32, i);
  542. framebuffers[i].width = iron_window_width();
  543. framebuffers[i].height = iron_window_height();
  544. }
  545. }
  546. }
  547. void gpu_resize_internal(int width, int height) {
  548. // Newest window size is fetched in create_swapchain
  549. }
  550. void gpu_init_internal(int depth_buffer_bits, bool vsync) {
  551. uint32_t instance_layer_count = 0;
  552. static const char *wanted_instance_layers[64];
  553. int wanted_instance_layer_count = 0;
  554. vkEnumerateInstanceLayerProperties(&instance_layer_count, NULL);
  555. if (instance_layer_count > 0) {
  556. VkLayerProperties *instance_layers = (VkLayerProperties *)malloc(sizeof(VkLayerProperties) * instance_layer_count);
  557. vkEnumerateInstanceLayerProperties(&instance_layer_count, instance_layers);
  558. #ifdef VALIDATE
  559. validation_found = find_layer(instance_layers, instance_layer_count, "VK_LAYER_KHRONOS_validation");
  560. if (validation_found) {
  561. iron_log("Running with Vulkan validation layers enabled.");
  562. wanted_instance_layers[wanted_instance_layer_count++] = "VK_LAYER_KHRONOS_validation";
  563. }
  564. #endif
  565. free(instance_layers);
  566. }
  567. static const char *wanted_instance_extensions[64];
  568. int wanted_instance_extension_count = 0;
  569. uint32_t instance_extension_count = 0;
  570. wanted_instance_extensions[wanted_instance_extension_count++] = VK_KHR_SURFACE_EXTENSION_NAME;
  571. wanted_instance_extensions[wanted_instance_extension_count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME;
  572. iron_vulkan_get_instance_extensions(wanted_instance_extensions, &wanted_instance_extension_count);
  573. vkEnumerateInstanceExtensionProperties(NULL, &instance_extension_count, NULL);
  574. VkExtensionProperties *instance_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * instance_extension_count);
  575. vkEnumerateInstanceExtensionProperties(NULL, &instance_extension_count, instance_extensions);
  576. bool missing_instance_extensions = check_extensions(wanted_instance_extensions, wanted_instance_extension_count, instance_extensions, instance_extension_count);
  577. if (missing_instance_extensions) {
  578. iron_error("");
  579. }
  580. #ifdef VALIDATE
  581. // this extension should be provided by the validation layers
  582. if (validation_found) {
  583. wanted_instance_extensions[wanted_instance_extension_count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
  584. }
  585. #endif
  586. VkApplicationInfo app = {
  587. .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
  588. .pApplicationName = iron_application_name(),
  589. .applicationVersion = 0,
  590. .pEngineName = "Iron",
  591. .engineVersion = 0,
  592. .apiVersion = VK_API_VERSION_1_3,
  593. };
  594. VkInstanceCreateInfo info = {0};
  595. info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
  596. info.pApplicationInfo = &app;
  597. #ifdef VALIDATE
  598. if (validation_found) {
  599. info.enabledLayerCount = wanted_instance_layer_count;
  600. info.ppEnabledLayerNames = (const char *const *)wanted_instance_layers;
  601. }
  602. else
  603. #endif
  604. {
  605. info.enabledLayerCount = 0;
  606. info.ppEnabledLayerNames = NULL;
  607. }
  608. info.enabledExtensionCount = wanted_instance_extension_count;
  609. info.ppEnabledExtensionNames = (const char *const *)wanted_instance_extensions;
  610. VkResult err = vkCreateInstance(&info, NULL, &instance);
  611. if (err == VK_ERROR_INCOMPATIBLE_DRIVER) {
  612. iron_error("Vulkan driver is incompatible");
  613. }
  614. else if (err == VK_ERROR_EXTENSION_NOT_PRESENT) {
  615. iron_error("Vulkan extension not found");
  616. }
  617. else if (err) {
  618. iron_error("Can not create Vulkan instance");
  619. }
  620. uint32_t gpu_count;
  621. vkEnumeratePhysicalDevices(instance, &gpu_count, NULL);
  622. if (gpu_count > 0) {
  623. VkPhysicalDevice *physical_devices = (VkPhysicalDevice *)malloc(sizeof(VkPhysicalDevice) * gpu_count);
  624. vkEnumeratePhysicalDevices(instance, &gpu_count, physical_devices);
  625. float best_score = 0.0;
  626. for (uint32_t gpu_idx = 0; gpu_idx < gpu_count; gpu_idx++) {
  627. VkPhysicalDevice current_gpu = physical_devices[gpu_idx];
  628. uint32_t queue_count = 0;
  629. vkGetPhysicalDeviceQueueFamilyProperties(current_gpu, &queue_count, NULL);
  630. VkQueueFamilyProperties *queue_props = (VkQueueFamilyProperties *)malloc(queue_count * sizeof(VkQueueFamilyProperties));
  631. vkGetPhysicalDeviceQueueFamilyProperties(current_gpu, &queue_count, queue_props);
  632. bool can_present = false;
  633. bool can_render = false;
  634. for (uint32_t i = 0; i < queue_count; i++) {
  635. VkBool32 queue_supports_present = iron_vulkan_get_physical_device_presentation_support(current_gpu, i);
  636. if (queue_supports_present) {
  637. can_present = true;
  638. }
  639. VkQueueFamilyProperties queue_properties = queue_props[i];
  640. uint32_t flags = queue_properties.queueFlags;
  641. if ((flags & VK_QUEUE_GRAPHICS_BIT) != 0) {
  642. can_render = true;
  643. }
  644. }
  645. if (!can_present || !can_render) {
  646. continue;
  647. }
  648. float score = 0.0;
  649. VkPhysicalDeviceProperties properties;
  650. vkGetPhysicalDeviceProperties(current_gpu, &properties);
  651. switch (properties.deviceType) {
  652. case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
  653. score = 2;
  654. break;
  655. case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
  656. score = 1;
  657. break;
  658. }
  659. if (gpu == VK_NULL_HANDLE || score > best_score) {
  660. gpu = current_gpu;
  661. best_score = score;
  662. }
  663. }
  664. if (gpu == VK_NULL_HANDLE) {
  665. iron_error("No Vulkan device that supports presentation found");
  666. }
  667. VkPhysicalDeviceProperties properties;
  668. vkGetPhysicalDeviceProperties(gpu, &properties);
  669. iron_log("Chosen Vulkan device: %s", properties.deviceName);
  670. strcpy(device_name, properties.deviceName);
  671. is_amd = properties.vendorID == 0x1002;
  672. free(physical_devices);
  673. }
  674. else {
  675. iron_error("No Vulkan device found");
  676. }
  677. static const char *wanted_device_layers[64];
  678. int wanted_device_layer_count = 0;
  679. uint32_t device_layer_count = 0;
  680. vkEnumerateDeviceLayerProperties(gpu, &device_layer_count, NULL);
  681. if (device_layer_count > 0) {
  682. VkLayerProperties *device_layers = (VkLayerProperties *)malloc(sizeof(VkLayerProperties) * device_layer_count);
  683. vkEnumerateDeviceLayerProperties(gpu, &device_layer_count, device_layers);
  684. #ifdef VALIDATE
  685. validation_found = find_layer(device_layers, device_layer_count, "VK_LAYER_KHRONOS_validation");
  686. if (validation_found) {
  687. wanted_device_layers[wanted_device_layer_count++] = "VK_LAYER_KHRONOS_validation";
  688. }
  689. #endif
  690. free(device_layers);
  691. }
  692. const char *wanted_device_extensions[64];
  693. int wanted_device_extension_count = 0;
  694. wanted_device_extensions[wanted_device_extension_count++] = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
  695. if (gpu_raytrace_supported()) {
  696. wanted_device_extensions[wanted_device_extension_count++] = VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME;
  697. wanted_device_extensions[wanted_device_extension_count++] = VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME;
  698. wanted_device_extensions[wanted_device_extension_count++] = VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME;
  699. wanted_device_extensions[wanted_device_extension_count++] = VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME;
  700. wanted_device_extensions[wanted_device_extension_count++] = VK_KHR_RAY_QUERY_EXTENSION_NAME;
  701. }
  702. uint32_t device_extension_count = 0;
  703. vkEnumerateDeviceExtensionProperties(gpu, NULL, &device_extension_count, NULL);
  704. VkExtensionProperties *device_extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * device_extension_count);
  705. vkEnumerateDeviceExtensionProperties(gpu, NULL, &device_extension_count, device_extensions);
  706. bool missing_device_extensions = check_extensions(wanted_device_extensions, wanted_device_extension_count, device_extensions, device_extension_count);
  707. free(device_extensions);
  708. if (missing_device_extensions) {
  709. exit(1);
  710. }
  711. #ifdef VALIDATE
  712. if (validation_found) {
  713. VkDebugUtilsMessengerCreateInfoEXT create_info = {
  714. .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
  715. .pfnUserCallback = vk_debug_utils_messenger_callback_ext,
  716. .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
  717. .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,
  718. };
  719. PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT");
  720. vkCreateDebugUtilsMessengerEXT(instance, &create_info, NULL, &debug_messenger);
  721. }
  722. #endif
  723. uint32_t queue_count;
  724. vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_count, NULL);
  725. VkQueueFamilyProperties *queue_props = (VkQueueFamilyProperties *)malloc(queue_count * sizeof(VkQueueFamilyProperties));
  726. vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_count, queue_props);
  727. VkBool32 *supports_present = (VkBool32 *)malloc(queue_count * sizeof(VkBool32));
  728. for (uint32_t i = 0; i < queue_count; i++) {
  729. supports_present[i] = iron_vulkan_get_physical_device_presentation_support(gpu, i);
  730. }
  731. uint32_t graphics_queue_node_index = UINT32_MAX;
  732. uint32_t present_queue_node_index = UINT32_MAX;
  733. for (uint32_t i = 0; i < queue_count; i++) {
  734. if ((queue_props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) {
  735. if (graphics_queue_node_index == UINT32_MAX) {
  736. graphics_queue_node_index = i;
  737. }
  738. if (supports_present[i] == VK_TRUE) {
  739. graphics_queue_node_index = i;
  740. present_queue_node_index = i;
  741. break;
  742. }
  743. }
  744. }
  745. if (present_queue_node_index == UINT32_MAX) {
  746. for (uint32_t i = 0; i < queue_count; ++i) {
  747. if (supports_present[i] == VK_TRUE) {
  748. present_queue_node_index = i;
  749. break;
  750. }
  751. }
  752. }
  753. free(supports_present);
  754. if (graphics_queue_node_index == UINT32_MAX || present_queue_node_index == UINT32_MAX) {
  755. iron_error("Graphics or present queue not found");
  756. }
  757. if (graphics_queue_node_index != present_queue_node_index) {
  758. iron_error("Graphics and present queue do not match");
  759. }
  760. {
  761. float queue_priorities[1] = { 0.0 };
  762. VkDeviceQueueCreateInfo queue = {
  763. .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
  764. .queueFamilyIndex = graphics_queue_node_index,
  765. .queueCount = 1,
  766. .pQueuePriorities = queue_priorities,
  767. };
  768. VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features = {
  769. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES,
  770. .dynamicRendering = VK_TRUE,
  771. };
  772. VkPhysicalDeviceFeatures enabled_features = {};
  773. enabled_features.independentBlend = VK_TRUE;
  774. VkDeviceCreateInfo deviceinfo = {
  775. .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
  776. .pNext = &dynamic_rendering_features,
  777. .queueCreateInfoCount = 1,
  778. .pQueueCreateInfos = &queue,
  779. .enabledLayerCount = wanted_device_layer_count,
  780. .ppEnabledLayerNames = (const char *const *)wanted_device_layers,
  781. .enabledExtensionCount = wanted_device_extension_count,
  782. .ppEnabledExtensionNames = (const char *const *)wanted_device_extensions,
  783. .pEnabledFeatures = &enabled_features,
  784. };
  785. VkPhysicalDeviceRayTracingPipelineFeaturesKHR raytracing_pipeline_ext = {0};
  786. VkPhysicalDeviceAccelerationStructureFeaturesKHR raytracing_acceleration_structure_ext = {0};
  787. VkPhysicalDeviceBufferDeviceAddressFeatures buffer_device_address_ext = {0};
  788. VkPhysicalDeviceRayQueryFeaturesKHR ray_query_ext = {0};
  789. if (gpu_raytrace_supported()) {
  790. raytracing_pipeline_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR;
  791. raytracing_pipeline_ext.pNext = deviceinfo.pNext;
  792. raytracing_pipeline_ext.rayTracingPipeline = VK_TRUE;
  793. raytracing_acceleration_structure_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR;
  794. raytracing_acceleration_structure_ext.pNext = &raytracing_pipeline_ext;
  795. raytracing_acceleration_structure_ext.accelerationStructure = VK_TRUE;
  796. buffer_device_address_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES;
  797. buffer_device_address_ext.pNext = &raytracing_acceleration_structure_ext;
  798. buffer_device_address_ext.bufferDeviceAddress = VK_TRUE;
  799. ray_query_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR;
  800. ray_query_ext.pNext = &buffer_device_address_ext;
  801. ray_query_ext.rayQuery = VK_TRUE;
  802. deviceinfo.pNext = &ray_query_ext;
  803. }
  804. vkCreateDevice(gpu, &deviceinfo, NULL, &device);
  805. }
  806. vkGetDeviceQueue(device, graphics_queue_node_index, 0, &queue);
  807. vkGetPhysicalDeviceMemoryProperties(gpu, &memory_properties);
  808. VkCommandPoolCreateInfo cmd_pool_info = {
  809. .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
  810. .queueFamilyIndex = graphics_queue_node_index,
  811. .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
  812. };
  813. vkCreateCommandPool(device, &cmd_pool_info, NULL, &cmd_pool);
  814. create_descriptors();
  815. VkSemaphoreCreateInfo sem_info = {
  816. .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
  817. .flags = 0,
  818. };
  819. vkCreateSemaphore(device, &sem_info, NULL, &framebuffer_available_semaphore);
  820. for (uint32_t i = 0; i < GPU_FRAMEBUFFER_COUNT; i++) {
  821. vkCreateSemaphore(device, &sem_info, NULL, &rendering_finished_semaphores[i]);
  822. }
  823. window_depth_bits = depth_buffer_bits;
  824. window_vsync = vsync;
  825. iron_vulkan_create_surface(instance, &surface);
  826. VkBool32 surface_supported;
  827. vkGetPhysicalDeviceSurfaceSupportKHR(gpu, graphics_queue_node_index, surface, &surface_supported);
  828. VkSurfaceFormatKHR surf_formats[256];
  829. uint32_t format_count = sizeof(surf_formats) / sizeof(surf_formats[0]);
  830. VkResult result = vkGetPhysicalDeviceSurfaceFormatsKHR(gpu, surface, &format_count, surf_formats);
  831. if (format_count == 1 && surf_formats[0].format == VK_FORMAT_UNDEFINED) {
  832. surface_format = surf_formats[0];
  833. }
  834. else {
  835. bool found = false;
  836. for (uint32_t i = 0; i < format_count; ++i) {
  837. if (surf_formats[i].format != VK_FORMAT_B8G8R8A8_SRGB) {
  838. surface_format = surf_formats[i];
  839. found = true;
  840. break;
  841. }
  842. }
  843. if (!found) {
  844. surface_format = surf_formats[0];
  845. }
  846. }
  847. VkCommandBufferAllocateInfo cmd = {
  848. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  849. .commandPool = cmd_pool,
  850. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  851. .commandBufferCount = 1,
  852. };
  853. vkAllocateCommandBuffers(device, &cmd, &command_buffer);
  854. VkCommandBufferBeginInfo begin_info = {
  855. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  856. .flags = 0,
  857. };
  858. vkBeginCommandBuffer(command_buffer, &begin_info);
  859. gpu_create_framebuffers(depth_buffer_bits);
  860. create_swapchain();
  861. VkFenceCreateInfo fence_info = {
  862. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  863. .flags = VK_FENCE_CREATE_SIGNALED_BIT,
  864. };
  865. vkCreateFence(device, &fence_info, NULL, &fence);
  866. }
  867. void gpu_destroy() {
  868. if (readback_buffer_size > 0) {
  869. vkFreeMemory(device, readback_mem, NULL);
  870. vkDestroyBuffer(device, readback_buffer, NULL);
  871. }
  872. vkFreeCommandBuffers(device, cmd_pool, 1, &command_buffer);
  873. vkDestroyFence(device, fence, NULL);
  874. VkSwapchainKHR swapchain = cleanup_swapchain();
  875. vkDestroySwapchainKHR(device, swapchain, NULL);
  876. vkDestroySurfaceKHR(instance, surface, NULL);
  877. }
  878. void iron_vulkan_surface_destroyed() {
  879. surface_destroyed = true;
  880. }
  881. bool iron_vulkan_get_size(int *width, int *height) {
  882. if (surface) {
  883. VkSurfaceCapabilitiesKHR capabilities;
  884. vkGetPhysicalDeviceSurfaceCapabilitiesKHR(gpu, surface, &capabilities);
  885. *width = capabilities.currentExtent.width;
  886. *height = capabilities.currentExtent.height;
  887. return true;
  888. }
  889. return false;
  890. }
  891. void gpu_begin_internal(unsigned flags, unsigned color, float depth) {
  892. if (!framebuffer_acquired) {
  893. acquire_next_image();
  894. framebuffer_acquired = true;
  895. }
  896. gpu_texture_t *target = current_render_targets[0];
  897. VkRect2D render_area = {
  898. .offset = {0, 0}
  899. };
  900. render_area.extent.width = target->width;
  901. render_area.extent.height = target->height;
  902. VkClearValue clear_value;
  903. memset(&clear_value, 0, sizeof(VkClearValue));
  904. clear_value.color.float32[0] = ((color & 0x00ff0000) >> 16) / 255.0f;
  905. clear_value.color.float32[1] = ((color & 0x0000ff00) >> 8 ) / 255.0f;
  906. clear_value.color.float32[2] = ((color & 0x000000ff) ) / 255.0f;
  907. clear_value.color.float32[3] = ((color & 0xff000000) >> 24) / 255.0f;
  908. for (size_t i = 0; i < current_render_targets_count; ++i) {
  909. current_color_attachment_infos[i] = (VkRenderingAttachmentInfo){
  910. .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
  911. .imageView = current_render_targets[i]->impl.view,
  912. .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
  913. .resolveMode = VK_RESOLVE_MODE_NONE,
  914. .resolveImageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  915. .loadOp = (flags & GPU_CLEAR_COLOR) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD,
  916. .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
  917. .clearValue = clear_value,
  918. };
  919. }
  920. if (current_depth_buffer != NULL) {
  921. current_depth_attachment_info = (VkRenderingAttachmentInfo) {
  922. .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
  923. .imageView = current_depth_buffer->impl.view,
  924. .imageLayout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
  925. .resolveMode = VK_RESOLVE_MODE_NONE,
  926. .resolveImageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  927. .loadOp = (flags & GPU_CLEAR_DEPTH) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD,
  928. .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
  929. .clearValue = 1.0,
  930. };
  931. }
  932. current_rendering_info = (VkRenderingInfo) {
  933. .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
  934. .renderArea = render_area,
  935. .layerCount = 1,
  936. .viewMask = 0,
  937. .colorAttachmentCount = (uint32_t)current_render_targets_count,
  938. .pColorAttachments = current_color_attachment_infos,
  939. .pDepthAttachment = current_depth_buffer == NULL ? VK_NULL_HANDLE : &current_depth_attachment_info,
  940. };
  941. vkCmdBeginRendering(command_buffer, &current_rendering_info);
  942. gpu_viewport(0, 0, current_render_targets[0]->width, current_render_targets[0]->height);
  943. gpu_scissor(0, 0, current_render_targets[0]->width, current_render_targets[0]->height);
  944. if (flags != GPU_CLEAR_NONE) {
  945. int count = 0;
  946. VkClearAttachment attachments[2];
  947. if (flags & GPU_CLEAR_COLOR) {
  948. VkClearColorValue clear_color = {0};
  949. clear_color.float32[0] = ((color & 0x00ff0000) >> 16) / 255.0f;
  950. clear_color.float32[1] = ((color & 0x0000ff00) >> 8) / 255.0f;
  951. clear_color.float32[2] = (color & 0x000000ff) / 255.0f;
  952. clear_color.float32[3] = ((color & 0xff000000) >> 24) / 255.0f;
  953. attachments[count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  954. attachments[count].colorAttachment = 0;
  955. attachments[count].clearValue.color = clear_color;
  956. count++;
  957. }
  958. if (flags & GPU_CLEAR_DEPTH) {
  959. attachments[count].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
  960. attachments[count].clearValue.depthStencil.depth = depth;
  961. attachments[count].clearValue.depthStencil.stencil = 0;
  962. count++;
  963. }
  964. VkClearRect clear_rect = {
  965. .rect.offset.x = 0,
  966. .rect.offset.y = 0,
  967. .rect.extent.width = current_render_targets[0]->width,
  968. .rect.extent.height = current_render_targets[0]->height,
  969. .baseArrayLayer = 0,
  970. .layerCount = 1,
  971. };
  972. vkCmdClearAttachments(command_buffer, count, attachments, 1, &clear_rect);
  973. }
  974. }
  975. void gpu_end_internal() {
  976. vkCmdEndRendering(command_buffer);
  977. for (int i = 0; i < current_render_targets_count; ++i) {
  978. gpu_barrier(current_render_targets[i],
  979. current_render_targets[i] == &framebuffers[framebuffer_index] ? GPU_TEXTURE_STATE_PRESENT : GPU_TEXTURE_STATE_SHADER_RESOURCE);
  980. }
  981. current_render_targets_count = 0;
  982. if (is_amd) {
  983. gpu_execute_and_wait(); ////
  984. }
  985. }
  986. void gpu_execute_and_wait() {
  987. if (gpu_in_use) {
  988. vkCmdEndRendering(command_buffer);
  989. }
  990. vkEndCommandBuffer(command_buffer);
  991. vkResetFences(device, 1, &fence);
  992. VkSubmitInfo submit_info = {
  993. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  994. .commandBufferCount = 1,
  995. .pCommandBuffers = &command_buffer,
  996. };
  997. vkQueueSubmit(queue, 1, &submit_info, fence);
  998. vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
  999. vkResetCommandBuffer(command_buffer, 0);
  1000. VkCommandBufferBeginInfo begin_info = {
  1001. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  1002. };
  1003. vkBeginCommandBuffer(command_buffer, &begin_info);
  1004. if (gpu_in_use) {
  1005. vkCmdBeginRendering(command_buffer, &current_rendering_info);
  1006. vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, current_pipeline->impl.pipeline);
  1007. VkBuffer buffers[1];
  1008. VkDeviceSize offsets[1];
  1009. buffers[0] = current_vb->impl.buf;
  1010. offsets[0] = (VkDeviceSize)(0);
  1011. vkCmdBindVertexBuffers(command_buffer, 0, 1, buffers, offsets);
  1012. vkCmdBindIndexBuffer(command_buffer, current_ib->impl.buf, 0, VK_INDEX_TYPE_UINT32);
  1013. vkCmdSetViewport(command_buffer, 0, 1, &current_viewport);
  1014. vkCmdSetScissor(command_buffer, 0, 1, &current_scissor);
  1015. }
  1016. }
  1017. void gpu_present_internal() {
  1018. vkEndCommandBuffer(command_buffer);
  1019. vkResetFences(device, 1, &fence);
  1020. VkSubmitInfo submit_info = {
  1021. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  1022. .commandBufferCount = 1,
  1023. .pCommandBuffers = &command_buffer,
  1024. .signalSemaphoreCount = 1,
  1025. .pSignalSemaphores = &rendering_finished_semaphores[framebuffer_index],
  1026. .waitSemaphoreCount = 1,
  1027. .pWaitSemaphores = &framebuffer_available_semaphore,
  1028. .pWaitDstStageMask = (VkPipelineStageFlags[]){VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT},
  1029. };
  1030. vkQueueSubmit(queue, 1, &submit_info, fence);
  1031. vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
  1032. VkPresentInfoKHR present = {
  1033. .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
  1034. .swapchainCount = 1,
  1035. .pSwapchains = &swapchain,
  1036. .pImageIndices = &framebuffer_index,
  1037. .pWaitSemaphores = &rendering_finished_semaphores[framebuffer_index],
  1038. .waitSemaphoreCount = 1,
  1039. };
  1040. vkQueuePresentKHR(queue, &present);
  1041. vkResetCommandBuffer(command_buffer, 0);
  1042. VkCommandBufferBeginInfo begin_info = {
  1043. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  1044. };
  1045. vkBeginCommandBuffer(command_buffer, &begin_info);
  1046. // acquire_next_image(); // Breaks window resize
  1047. framebuffer_acquired = false;
  1048. framebuffer_index = (framebuffer_index + 1) % GPU_FRAMEBUFFER_COUNT;
  1049. while (buffers_to_destroy_count > 0) {
  1050. buffers_to_destroy_count--;
  1051. vkFreeMemory(device, buffer_memories_to_destroy[buffers_to_destroy_count], NULL);
  1052. vkDestroyBuffer(device, buffers_to_destroy[buffers_to_destroy_count], NULL);
  1053. }
  1054. }
  1055. void gpu_draw_internal() {
  1056. vkCmdDrawIndexed(command_buffer, current_ib->count, 1, 0, 0, 0);
  1057. }
  1058. void gpu_viewport(int x, int y, int width, int height) {
  1059. current_viewport = (VkViewport){
  1060. .x = (float)x,
  1061. .y = y + (float)height,
  1062. .width = (float)width,
  1063. .height = (float)-height,
  1064. .minDepth = (float)0.0f,
  1065. .maxDepth = (float)1.0f,
  1066. };
  1067. vkCmdSetViewport(command_buffer, 0, 1, &current_viewport);
  1068. }
  1069. void gpu_scissor(int x, int y, int width, int height) {
  1070. current_scissor = (VkRect2D){
  1071. .offset.x = x,
  1072. .offset.y = y,
  1073. .extent.width = width,
  1074. .extent.height = height,
  1075. };
  1076. vkCmdSetScissor(command_buffer, 0, 1, &current_scissor);
  1077. }
  1078. void gpu_disable_scissor() {
  1079. current_scissor = (VkRect2D){
  1080. .extent.width = current_render_targets[0]->width,
  1081. .extent.height = current_render_targets[0]->height,
  1082. };
  1083. vkCmdSetScissor(command_buffer, 0, 1, &current_scissor);
  1084. }
  1085. void gpu_set_pipeline(gpu_pipeline_t *pipeline) {
  1086. current_pipeline = pipeline;
  1087. vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, current_pipeline->impl.pipeline);
  1088. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  1089. current_textures[i] = NULL;
  1090. }
  1091. }
  1092. void gpu_set_vertex_buffer(gpu_buffer_t *buffer) {
  1093. current_vb = buffer;
  1094. VkBuffer buffers[1];
  1095. VkDeviceSize offsets[1];
  1096. buffers[0] = buffer->impl.buf;
  1097. offsets[0] = (VkDeviceSize)(0);
  1098. vkCmdBindVertexBuffers(command_buffer, 0, 1, buffers, offsets);
  1099. }
  1100. void gpu_set_index_buffer(gpu_buffer_t *buffer) {
  1101. current_ib = buffer;
  1102. vkCmdBindIndexBuffer(command_buffer, buffer->impl.buf, 0, VK_INDEX_TYPE_UINT32);
  1103. }
  1104. void gpu_get_render_target_pixels(gpu_texture_t *render_target, uint8_t *data) {
  1105. int buffer_size = render_target->width * render_target->height * gpu_texture_format_size(render_target->format);
  1106. int new_readback_buffer_size = buffer_size;
  1107. if (new_readback_buffer_size < (2048 * 2048 * 4)) {
  1108. new_readback_buffer_size = (2048 * 2048 * 4);
  1109. }
  1110. if (readback_buffer_size < new_readback_buffer_size) {
  1111. if (readback_buffer_size > 0 ) {
  1112. vkFreeMemory(device, readback_mem, NULL);
  1113. vkDestroyBuffer(device, readback_buffer, NULL);
  1114. }
  1115. readback_buffer_size = new_readback_buffer_size;
  1116. VkBufferCreateInfo buf_info = {
  1117. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  1118. .size = readback_buffer_size,
  1119. .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
  1120. };
  1121. vkCreateBuffer(device, &buf_info, NULL, &readback_buffer);
  1122. VkMemoryRequirements mem_reqs = {0};
  1123. vkGetBufferMemoryRequirements(device, readback_buffer, &mem_reqs);
  1124. VkMemoryAllocateInfo mem_alloc = {0};
  1125. mem_alloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  1126. mem_alloc.allocationSize = mem_reqs.size;
  1127. mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
  1128. vkAllocateMemory(device, &mem_alloc, NULL, &readback_mem);
  1129. vkBindBufferMemory(device, readback_buffer, readback_mem, 0);
  1130. }
  1131. set_image_layout(render_target->impl.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
  1132. VkBufferImageCopy region;
  1133. region.bufferOffset = 0;
  1134. region.bufferRowLength = render_target->width;
  1135. region.bufferImageHeight = render_target->height;
  1136. region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  1137. region.imageSubresource.baseArrayLayer = 0;
  1138. region.imageSubresource.layerCount = 1;
  1139. region.imageSubresource.mipLevel = 0;
  1140. region.imageOffset.x = 0;
  1141. region.imageOffset.y = 0;
  1142. region.imageOffset.z = 0;
  1143. region.imageExtent.width = (uint32_t)render_target->width;
  1144. region.imageExtent.height = (uint32_t)render_target->height;
  1145. region.imageExtent.depth = 1;
  1146. if (gpu_in_use) {
  1147. vkCmdEndRendering(command_buffer);
  1148. }
  1149. vkCmdCopyImageToBuffer(command_buffer, render_target->impl.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, readback_buffer, 1, &region);
  1150. if (gpu_in_use) {
  1151. vkCmdBeginRendering(command_buffer, &current_rendering_info);
  1152. }
  1153. set_image_layout(render_target->impl.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  1154. gpu_execute_and_wait();
  1155. // Read buffer
  1156. void *p;
  1157. vkMapMemory(device, readback_mem, 0, VK_WHOLE_SIZE, 0, (void **)&p);
  1158. memcpy(data, p, buffer_size);
  1159. vkUnmapMemory(device, readback_mem);
  1160. }
  1161. static VkDescriptorSet get_descriptor_set(VkBuffer buffer) {
  1162. VkDescriptorSet descriptor_set = descriptor_sets[constant_buffer_index];
  1163. VkDescriptorBufferInfo buffer_descs[1];
  1164. memset(&buffer_descs, 0, sizeof(buffer_descs));
  1165. buffer_descs[0].buffer = buffer;
  1166. buffer_descs[0].offset = 0;
  1167. buffer_descs[0].range = GPU_CONSTANT_BUFFER_SIZE;
  1168. VkDescriptorImageInfo tex_desc[GPU_MAX_TEXTURES];
  1169. memset(&tex_desc, 0, sizeof(tex_desc));
  1170. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  1171. if (current_textures[i] != NULL) {
  1172. tex_desc[i].imageView = current_textures[i]->impl.view;
  1173. tex_desc[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  1174. }
  1175. }
  1176. VkWriteDescriptorSet writes[18];
  1177. memset(&writes, 0, sizeof(writes));
  1178. int write_count = 0;
  1179. writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  1180. writes[0].dstSet = descriptor_set;
  1181. writes[0].dstBinding = 0;
  1182. writes[0].descriptorCount = 1;
  1183. writes[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
  1184. writes[0].pBufferInfo = &buffer_descs[0];
  1185. write_count++;
  1186. VkDescriptorImageInfo sampler_info = {
  1187. .sampler = linear_sampling ? linear_sampler : point_sampler,
  1188. };
  1189. writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  1190. writes[1].dstSet = descriptor_set;
  1191. writes[1].dstBinding = 1;
  1192. writes[1].descriptorCount = 1;
  1193. writes[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
  1194. writes[1].pImageInfo = &sampler_info;
  1195. write_count++;
  1196. for (int i = 0; i < GPU_MAX_TEXTURES; ++i) {
  1197. if (current_textures[i] != NULL) {
  1198. writes[2 + i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
  1199. writes[2 + i].dstSet = descriptor_set;
  1200. writes[2 + i].dstBinding = i + 2;
  1201. writes[2 + i].descriptorCount = 1;
  1202. writes[2 + i].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
  1203. writes[2 + i].pImageInfo = &tex_desc[i];
  1204. write_count++;
  1205. }
  1206. }
  1207. vkUpdateDescriptorSets(device, write_count, writes, 0, NULL);
  1208. return descriptor_set;
  1209. }
  1210. void gpu_set_constant_buffer(gpu_buffer_t *buffer, int offset, size_t size) {
  1211. VkDescriptorSet descriptor_set = get_descriptor_set(buffer->impl.buf);
  1212. uint32_t offsets[1] = {offset};
  1213. vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, current_pipeline->impl.pipeline_layout, 0, 1, &descriptor_set, 1, offsets);
  1214. }
  1215. void gpu_set_texture(int unit, gpu_texture_t *texture) {
  1216. current_textures[unit] = texture;
  1217. }
  1218. void gpu_use_linear_sampling(bool b) {
  1219. linear_sampling = b;
  1220. }
  1221. void gpu_pipeline_destroy_internal(gpu_pipeline_t *pipeline) {
  1222. vkDestroyPipeline(device, pipeline->impl.pipeline, NULL);
  1223. vkDestroyPipelineLayout(device, pipeline->impl.pipeline_layout, NULL);
  1224. }
  1225. static VkShaderModule create_shader_module(const void *code, size_t size) {
  1226. VkShaderModuleCreateInfo module_create_info = {0};
  1227. module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
  1228. module_create_info.codeSize = size;
  1229. module_create_info.pCode = (const uint32_t *)code;
  1230. VkShaderModule module;
  1231. vkCreateShaderModule(device, &module_create_info, NULL, &module);
  1232. return module;
  1233. }
  1234. void gpu_pipeline_compile(gpu_pipeline_t *pipeline) {
  1235. VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
  1236. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  1237. .setLayoutCount = 1,
  1238. .pSetLayouts = &descriptor_layout,
  1239. };
  1240. vkCreatePipelineLayout(device, &pipeline_layout_create_info, NULL, &pipeline->impl.pipeline_layout);
  1241. VkGraphicsPipelineCreateInfo pipeline_info = {0};
  1242. VkPipelineInputAssemblyStateCreateInfo ia = {0};
  1243. VkPipelineRasterizationStateCreateInfo rs = {0};
  1244. VkPipelineColorBlendStateCreateInfo cb = {0};
  1245. VkPipelineDepthStencilStateCreateInfo ds = {0};
  1246. VkPipelineViewportStateCreateInfo vp = {0};
  1247. VkPipelineMultisampleStateCreateInfo ms = {0};
  1248. VkDynamicState dynamic_state[2];
  1249. VkPipelineDynamicStateCreateInfo dynamic_state_create_info = {0};
  1250. memset(dynamic_state, 0, sizeof(dynamic_state));
  1251. dynamic_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
  1252. dynamic_state_create_info.pDynamicStates = dynamic_state;
  1253. memset(&pipeline_info, 0, sizeof(pipeline_info));
  1254. pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
  1255. pipeline_info.layout = pipeline->impl.pipeline_layout;
  1256. VkVertexInputBindingDescription vi_bindings[1];
  1257. int vertexAttributeCount = pipeline->input_layout->size;
  1258. VkVertexInputAttributeDescription vi_attrs[vertexAttributeCount];
  1259. VkPipelineVertexInputStateCreateInfo vi = {
  1260. .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
  1261. .vertexBindingDescriptionCount = 1,
  1262. .pVertexBindingDescriptions = vi_bindings,
  1263. .vertexAttributeDescriptionCount = vertexAttributeCount,
  1264. .pVertexAttributeDescriptions = vi_attrs,
  1265. };
  1266. uint32_t attr = 0;
  1267. uint32_t offset = 0;
  1268. for (int i = 0; i < pipeline->input_layout->size; ++i) {
  1269. gpu_vertex_element_t element = pipeline->input_layout->elements[i];
  1270. vi_attrs[attr].binding = 0;
  1271. vi_attrs[attr].location = i;
  1272. vi_attrs[attr].offset = offset;
  1273. offset += gpu_vertex_data_size(element.data);
  1274. switch (element.data) {
  1275. case GPU_VERTEX_DATA_F32_1X:
  1276. vi_attrs[attr].format = VK_FORMAT_R32_SFLOAT;
  1277. break;
  1278. case GPU_VERTEX_DATA_F32_2X:
  1279. vi_attrs[attr].format = VK_FORMAT_R32G32_SFLOAT;
  1280. break;
  1281. case GPU_VERTEX_DATA_F32_3X:
  1282. vi_attrs[attr].format = VK_FORMAT_R32G32B32_SFLOAT;
  1283. break;
  1284. case GPU_VERTEX_DATA_F32_4X:
  1285. vi_attrs[attr].format = VK_FORMAT_R32G32B32A32_SFLOAT;
  1286. break;
  1287. case GPU_VERTEX_DATA_I16_2X_NORM:
  1288. vi_attrs[attr].format = VK_FORMAT_R16G16_SNORM;
  1289. break;
  1290. case GPU_VERTEX_DATA_I16_4X_NORM:
  1291. vi_attrs[attr].format = VK_FORMAT_R16G16B16A16_SNORM;
  1292. break;
  1293. }
  1294. attr++;
  1295. }
  1296. vi_bindings[0].binding = 0;
  1297. vi_bindings[0].stride = offset;
  1298. vi_bindings[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
  1299. memset(&ia, 0, sizeof(ia));
  1300. ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
  1301. ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
  1302. memset(&rs, 0, sizeof(rs));
  1303. rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
  1304. rs.polygonMode = VK_POLYGON_MODE_FILL;
  1305. rs.cullMode = convert_cull_mode(pipeline->cull_mode);
  1306. rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
  1307. rs.depthClampEnable = VK_FALSE;
  1308. rs.rasterizerDiscardEnable = VK_FALSE;
  1309. rs.depthBiasEnable = VK_FALSE;
  1310. rs.lineWidth = 1.0f;
  1311. memset(&cb, 0, sizeof(cb));
  1312. cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
  1313. VkPipelineColorBlendAttachmentState att_state[8];
  1314. memset(att_state, 0, sizeof(att_state));
  1315. for (int i = 0; i < pipeline->color_attachment_count; ++i) {
  1316. att_state[i].colorWriteMask =
  1317. (pipeline->color_write_mask_red[i] ? VK_COLOR_COMPONENT_R_BIT : 0) |
  1318. (pipeline->color_write_mask_green[i] ? VK_COLOR_COMPONENT_G_BIT : 0) |
  1319. (pipeline->color_write_mask_blue[i] ? VK_COLOR_COMPONENT_B_BIT : 0) |
  1320. (pipeline->color_write_mask_alpha[i] ? VK_COLOR_COMPONENT_A_BIT : 0);
  1321. att_state[i].blendEnable = pipeline->blend_source != GPU_BLEND_ONE ||
  1322. pipeline->blend_destination != GPU_BLEND_ZERO ||
  1323. pipeline->alpha_blend_source != GPU_BLEND_ONE ||
  1324. pipeline->alpha_blend_destination != GPU_BLEND_ZERO;
  1325. att_state[i].srcColorBlendFactor = convert_blend_factor(pipeline->blend_source);
  1326. att_state[i].dstColorBlendFactor = convert_blend_factor(pipeline->blend_destination);
  1327. att_state[i].colorBlendOp = VK_BLEND_OP_ADD;
  1328. att_state[i].srcAlphaBlendFactor = convert_blend_factor(pipeline->alpha_blend_source);
  1329. att_state[i].dstAlphaBlendFactor = convert_blend_factor(pipeline->alpha_blend_destination);
  1330. att_state[i].alphaBlendOp = VK_BLEND_OP_ADD;
  1331. }
  1332. cb.attachmentCount = pipeline->color_attachment_count;
  1333. cb.pAttachments = att_state;
  1334. memset(&vp, 0, sizeof(vp));
  1335. vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
  1336. vp.viewportCount = 1;
  1337. dynamic_state[dynamic_state_create_info.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
  1338. vp.scissorCount = 1;
  1339. dynamic_state[dynamic_state_create_info.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
  1340. memset(&ds, 0, sizeof(ds));
  1341. ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
  1342. ds.depthTestEnable = pipeline->depth_mode != GPU_COMPARE_MODE_ALWAYS;
  1343. ds.depthWriteEnable = pipeline->depth_write;
  1344. ds.depthCompareOp = convert_compare_mode(pipeline->depth_mode);
  1345. ds.depthBoundsTestEnable = VK_FALSE;
  1346. ds.back.failOp = VK_STENCIL_OP_KEEP;
  1347. ds.back.passOp = VK_STENCIL_OP_KEEP;
  1348. ds.back.compareOp = VK_COMPARE_OP_ALWAYS;
  1349. ds.stencilTestEnable = VK_FALSE;
  1350. ds.front = ds.back;
  1351. memset(&ms, 0, sizeof(ms));
  1352. ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
  1353. ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
  1354. pipeline_info.stageCount = 2;
  1355. VkPipelineShaderStageCreateInfo shaderStages[2];
  1356. memset(&shaderStages, 0, 2 * sizeof(VkPipelineShaderStageCreateInfo));
  1357. shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
  1358. shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
  1359. VkShaderModule vert_shader_module = create_shader_module(pipeline->vertex_shader->impl.source, pipeline->vertex_shader->impl.length);
  1360. shaderStages[0].module = vert_shader_module;
  1361. shaderStages[0].pName = "main";
  1362. shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
  1363. shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
  1364. VkShaderModule frag_shader_module = create_shader_module(pipeline->fragment_shader->impl.source, pipeline->fragment_shader->impl.length);
  1365. shaderStages[1].module = frag_shader_module;
  1366. shaderStages[1].pName = "main";
  1367. pipeline_info.pVertexInputState = &vi;
  1368. pipeline_info.pInputAssemblyState = &ia;
  1369. pipeline_info.pRasterizationState = &rs;
  1370. pipeline_info.pColorBlendState = &cb;
  1371. pipeline_info.pMultisampleState = &ms;
  1372. pipeline_info.pViewportState = &vp;
  1373. pipeline_info.pDepthStencilState = &ds;
  1374. pipeline_info.pStages = shaderStages;
  1375. pipeline_info.pDynamicState = &dynamic_state_create_info;
  1376. VkFormat color_attachment_formats[8];
  1377. for (int i = 0; i < pipeline->color_attachment_count; ++i) {
  1378. color_attachment_formats[i] = convert_image_format(pipeline->color_attachment[i]);
  1379. }
  1380. VkPipelineRenderingCreateInfo rendering_info = {
  1381. .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
  1382. .colorAttachmentCount = pipeline->color_attachment_count,
  1383. .pColorAttachmentFormats = color_attachment_formats,
  1384. .depthAttachmentFormat = pipeline->depth_attachment_bits > 0 ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
  1385. };
  1386. pipeline_info.pNext = &rendering_info;
  1387. VkResult result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, &pipeline->impl.pipeline);
  1388. vkDestroyShaderModule(device, frag_shader_module, NULL);
  1389. vkDestroyShaderModule(device, vert_shader_module, NULL);
  1390. }
  1391. void gpu_shader_init(gpu_shader_t *shader, const void *source, size_t length, gpu_shader_type_t type) {
  1392. shader->impl.length = (int)length;
  1393. shader->impl.source = (char *)malloc(length);
  1394. memcpy(shader->impl.source, source, length);
  1395. }
  1396. void gpu_shader_destroy(gpu_shader_t *shader) {
  1397. free(shader->impl.source);
  1398. shader->impl.source = NULL;
  1399. }
  1400. void gpu_texture_init_from_bytes(gpu_texture_t *texture, void *data, int width, int height, gpu_texture_format_t format) {
  1401. texture->width = width;
  1402. texture->height = height;
  1403. texture->format = format;
  1404. texture->state = GPU_TEXTURE_STATE_SHADER_RESOURCE;
  1405. texture->buffer = NULL;
  1406. VkFormat vk_format = convert_image_format(format);
  1407. if (vk_format == VK_FORMAT_B8G8R8A8_UNORM) {
  1408. vk_format = VK_FORMAT_R8G8B8A8_UNORM;
  1409. }
  1410. VkDeviceSize _upload_size = width * height * gpu_texture_format_size(format);
  1411. int new_upload_buffer_size = _upload_size;
  1412. if (new_upload_buffer_size < (1024 * 1024 * 4)) {
  1413. new_upload_buffer_size = (1024 * 1024 * 4);
  1414. }
  1415. if (upload_buffer_size < new_upload_buffer_size) {
  1416. if (upload_buffer_size > 0) {
  1417. vkFreeMemory(device, upload_mem, NULL);
  1418. vkDestroyBuffer(device, upload_buffer, NULL);
  1419. }
  1420. upload_buffer_size = new_upload_buffer_size;
  1421. VkBufferCreateInfo buffer_info = {
  1422. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  1423. .size = upload_buffer_size,
  1424. .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
  1425. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1426. };
  1427. vkCreateBuffer(device, &buffer_info, NULL, &upload_buffer);
  1428. VkMemoryRequirements mem_reqs;
  1429. vkGetBufferMemoryRequirements(device, upload_buffer, &mem_reqs);
  1430. VkMemoryAllocateInfo mem_alloc = {
  1431. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1432. .allocationSize = mem_reqs.size,
  1433. };
  1434. mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
  1435. vkAllocateMemory(device, &mem_alloc, NULL, &upload_mem);
  1436. vkBindBufferMemory(device, upload_buffer, upload_mem, 0);
  1437. }
  1438. void *mapped_data;
  1439. vkMapMemory(device, upload_mem, 0, _upload_size, 0, &mapped_data);
  1440. memcpy(mapped_data, data, _upload_size);
  1441. vkUnmapMemory(device, upload_mem);
  1442. VkImageCreateInfo image_info = {
  1443. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  1444. .imageType = VK_IMAGE_TYPE_2D,
  1445. .format = vk_format,
  1446. .extent.width = (uint32_t)width,
  1447. .extent.height = (uint32_t)height,
  1448. .extent.depth = 1,
  1449. .mipLevels = 1,
  1450. .arrayLayers = 1,
  1451. .samples = VK_SAMPLE_COUNT_1_BIT,
  1452. .tiling = VK_IMAGE_TILING_OPTIMAL,
  1453. .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
  1454. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1455. .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1456. };
  1457. vkCreateImage(device, &image_info, NULL, &texture->impl.image);
  1458. VkMemoryRequirements mem_reqs;
  1459. vkGetImageMemoryRequirements(device, texture->impl.image, &mem_reqs);
  1460. VkMemoryAllocateInfo mem_alloc = {
  1461. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1462. .allocationSize = mem_reqs.size,
  1463. };
  1464. mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  1465. vkAllocateMemory(device, &mem_alloc, NULL, &texture->impl.mem);
  1466. vkBindImageMemory(device, texture->impl.image, texture->impl.mem, 0);
  1467. if (gpu_in_use) {
  1468. vkCmdEndRendering(command_buffer);
  1469. }
  1470. VkImageMemoryBarrier barrier = {
  1471. .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
  1472. .srcAccessMask = 0,
  1473. .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
  1474. .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
  1475. .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
  1476. .image = texture->impl.image,
  1477. .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1478. .subresourceRange.baseMipLevel = 0,
  1479. .subresourceRange.levelCount = 1,
  1480. .subresourceRange.baseArrayLayer = 0,
  1481. .subresourceRange.layerCount = 1,
  1482. };
  1483. vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier);
  1484. VkBufferImageCopy copy_region = {
  1485. .bufferOffset = 0,
  1486. .bufferRowLength = 0,
  1487. .bufferImageHeight = 0,
  1488. .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1489. .imageSubresource.mipLevel = 0,
  1490. .imageSubresource.baseArrayLayer = 0,
  1491. .imageSubresource.layerCount = 1,
  1492. .imageOffset = {0, 0, 0},
  1493. .imageExtent = {(uint32_t)width, (uint32_t)height, 1},
  1494. };
  1495. vkCmdCopyBufferToImage(command_buffer, upload_buffer, texture->impl.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_region);
  1496. barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
  1497. barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
  1498. barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  1499. barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  1500. vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier);
  1501. VkImageViewCreateInfo view_info = {
  1502. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  1503. .image = texture->impl.image,
  1504. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  1505. .format = vk_format,
  1506. .components = {
  1507. .r = VK_COMPONENT_SWIZZLE_R,
  1508. .g = VK_COMPONENT_SWIZZLE_G,
  1509. .b = VK_COMPONENT_SWIZZLE_B,
  1510. .a = VK_COMPONENT_SWIZZLE_A,
  1511. },
  1512. .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  1513. .subresourceRange.baseMipLevel = 0,
  1514. .subresourceRange.levelCount = 1,
  1515. .subresourceRange.baseArrayLayer = 0,
  1516. .subresourceRange.layerCount = 1,
  1517. };
  1518. vkCreateImageView(device, &view_info, NULL, &texture->impl.view);
  1519. if (gpu_in_use) {
  1520. vkCmdBeginRendering(command_buffer, &current_rendering_info);
  1521. }
  1522. gpu_execute_and_wait(); ////
  1523. }
  1524. void gpu_texture_destroy_internal(gpu_texture_t *target) {
  1525. if (target->impl.image != NULL) {
  1526. vkDestroyImage(device, target->impl.image, NULL);
  1527. vkFreeMemory(device, target->impl.mem, NULL);
  1528. }
  1529. if (target->impl.view != NULL) {
  1530. vkDestroyImageView(device, target->impl.view, NULL);
  1531. }
  1532. }
  1533. void gpu_render_target_init(gpu_texture_t *target, int width, int height, gpu_texture_format_t format) {
  1534. gpu_render_target_init2(target, width, height, format, -1);
  1535. }
  1536. void _gpu_buffer_init(gpu_buffer_impl_t *buffer, int size, int usage, int memory_requirements) {
  1537. if (buffer->buf != NULL) {
  1538. assert(buffers_to_destroy_count < 256);
  1539. buffers_to_destroy[buffers_to_destroy_count] = buffer->buf;
  1540. buffer_memories_to_destroy[buffers_to_destroy_count] = buffer->mem;
  1541. buffers_to_destroy_count++;
  1542. }
  1543. VkBufferCreateInfo buf_info = {
  1544. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  1545. .size = size,
  1546. .usage = usage,
  1547. };
  1548. bool raytrace = gpu_raytrace_supported() && ((usage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) || (usage & VK_BUFFER_USAGE_INDEX_BUFFER_BIT));
  1549. if (raytrace) {
  1550. buf_info.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
  1551. buf_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
  1552. buf_info.usage |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
  1553. }
  1554. vkCreateBuffer(device, &buf_info, NULL, &buffer->buf);
  1555. VkMemoryRequirements mem_reqs = {0};
  1556. vkGetBufferMemoryRequirements(device, buffer->buf, &mem_reqs);
  1557. VkMemoryAllocateInfo mem_alloc = {
  1558. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1559. .allocationSize = mem_reqs.size,
  1560. };
  1561. mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, memory_requirements);
  1562. VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {0};
  1563. if (raytrace) {
  1564. memory_allocate_flags_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
  1565. memory_allocate_flags_info.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
  1566. mem_alloc.pNext = &memory_allocate_flags_info;
  1567. }
  1568. vkAllocateMemory(device, &mem_alloc, NULL, &buffer->mem);
  1569. vkBindBufferMemory(device, buffer->buf, buffer->mem, 0);
  1570. }
  1571. void _gpu_buffer_copy(VkBuffer dest, VkBuffer source, uint32_t size) {
  1572. if (gpu_in_use) {
  1573. vkCmdEndRendering(command_buffer);
  1574. }
  1575. VkBufferCopy copy_region = {
  1576. .size = size,
  1577. };
  1578. vkCmdCopyBuffer(command_buffer, source, dest, 1, &copy_region);
  1579. if (gpu_in_use) {
  1580. vkCmdBeginRendering(command_buffer, &current_rendering_info);
  1581. }
  1582. }
  1583. void gpu_vertex_buffer_init(gpu_buffer_t *buffer, int count, gpu_vertex_structure_t *structure) {
  1584. buffer->count = count;
  1585. buffer->stride = 0;
  1586. for (int i = 0; i < structure->size; ++i) {
  1587. gpu_vertex_element_t element = structure->elements[i];
  1588. buffer->stride += gpu_vertex_data_size(element.data);
  1589. }
  1590. buffer->impl.buf = NULL;
  1591. }
  1592. void *gpu_vertex_buffer_lock(gpu_buffer_t *buffer) {
  1593. _gpu_buffer_init(&buffer->impl, buffer->count * buffer->stride, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
  1594. void *p;
  1595. vkMapMemory(device, buffer->impl.mem, 0, buffer->count * buffer->stride, 0, (void **)&p);
  1596. return p;
  1597. }
  1598. void gpu_vertex_buffer_unlock(gpu_buffer_t *buffer) {
  1599. vkUnmapMemory(device, buffer->impl.mem);
  1600. VkBuffer upload_buffer = buffer->impl.buf;
  1601. _gpu_buffer_init(&buffer->impl, buffer->count * buffer->stride, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  1602. _gpu_buffer_copy(buffer->impl.buf, upload_buffer, buffer->count * buffer->stride);
  1603. gpu_execute_and_wait(); ////
  1604. }
  1605. void gpu_index_buffer_init(gpu_buffer_t *buffer, int count) {
  1606. buffer->count = count;
  1607. buffer->stride = sizeof(uint32_t);
  1608. buffer->impl.buf = NULL;
  1609. }
  1610. void *gpu_index_buffer_lock(gpu_buffer_t *buffer) {
  1611. _gpu_buffer_init(&buffer->impl, buffer->count * buffer->stride, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
  1612. void *p;
  1613. vkMapMemory(device, buffer->impl.mem, 0, buffer->count * buffer->stride, 0, (void **)&p);
  1614. return p;
  1615. }
  1616. void gpu_index_buffer_unlock(gpu_buffer_t *buffer) {
  1617. vkUnmapMemory(device, buffer->impl.mem);
  1618. VkBuffer upload_buffer = buffer->impl.buf;
  1619. _gpu_buffer_init(&buffer->impl, buffer->count * buffer->stride, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  1620. _gpu_buffer_copy(buffer->impl.buf, upload_buffer, buffer->count * buffer->stride);
  1621. gpu_execute_and_wait(); ////
  1622. }
  1623. void gpu_constant_buffer_init(gpu_buffer_t *buffer, int size) {
  1624. buffer->count = size;
  1625. buffer->data = NULL;
  1626. buffer->impl.buf = NULL;
  1627. _gpu_buffer_init(&buffer->impl, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
  1628. }
  1629. void gpu_constant_buffer_lock(gpu_buffer_t *buffer, int start, int count) {
  1630. vkMapMemory(device, buffer->impl.mem, start, count, 0, (void **)&buffer->data);
  1631. }
  1632. void gpu_constant_buffer_unlock(gpu_buffer_t *buffer) {
  1633. vkUnmapMemory(device, buffer->impl.mem);
  1634. buffer->data = NULL;
  1635. }
  1636. void gpu_buffer_destroy_internal(gpu_buffer_t *buffer) {
  1637. vkFreeMemory(device, buffer->impl.mem, NULL);
  1638. vkDestroyBuffer(device, buffer->impl.buf, NULL);
  1639. }
  1640. char *gpu_device_name() {
  1641. return device_name;
  1642. }
  1643. typedef struct inst {
  1644. iron_matrix4x4_t m;
  1645. int i;
  1646. } inst_t;
  1647. static VkDescriptorPool raytrace_descriptor_pool;
  1648. static gpu_raytrace_acceleration_structure_t *accel;
  1649. static gpu_raytrace_pipeline_t *pipeline;
  1650. static gpu_texture_t *output = NULL;
  1651. static gpu_texture_t *texpaint0;
  1652. static gpu_texture_t *texpaint1;
  1653. static gpu_texture_t *texpaint2;
  1654. static gpu_texture_t *texenv;
  1655. static gpu_texture_t *texsobol;
  1656. static gpu_texture_t *texscramble;
  1657. static gpu_texture_t *texrank;
  1658. static gpu_buffer_t *vb[16];
  1659. static gpu_buffer_t *vb_last[16];
  1660. static gpu_buffer_t *ib[16];
  1661. static int vb_count = 0;
  1662. static int vb_count_last = 0;
  1663. static inst_t instances[1024];
  1664. static int instances_count = 0;
  1665. static VkBuffer vb_full = VK_NULL_HANDLE;
  1666. static VkBuffer ib_full = VK_NULL_HANDLE;
  1667. static VkDeviceMemory vb_full_mem = VK_NULL_HANDLE;
  1668. static VkDeviceMemory ib_full_mem = VK_NULL_HANDLE;
  1669. static PFN_vkCreateRayTracingPipelinesKHR _vkCreateRayTracingPipelinesKHR = NULL;
  1670. static PFN_vkGetRayTracingShaderGroupHandlesKHR _vkGetRayTracingShaderGroupHandlesKHR = NULL;
  1671. static PFN_vkGetBufferDeviceAddressKHR _vkGetBufferDeviceAddressKHR = NULL;
  1672. static PFN_vkCreateAccelerationStructureKHR _vkCreateAccelerationStructureKHR = NULL;
  1673. static PFN_vkGetAccelerationStructureDeviceAddressKHR _vkGetAccelerationStructureDeviceAddressKHR = NULL;
  1674. static PFN_vkGetAccelerationStructureBuildSizesKHR _vkGetAccelerationStructureBuildSizesKHR = NULL;
  1675. static PFN_vkCmdBuildAccelerationStructuresKHR _vkCmdBuildAccelerationStructuresKHR = NULL;
  1676. static PFN_vkDestroyAccelerationStructureKHR _vkDestroyAccelerationStructureKHR = NULL;
  1677. static PFN_vkCmdTraceRaysKHR _vkCmdTraceRaysKHR = NULL;
  1678. bool gpu_raytrace_supported() {
  1679. #ifdef IRON_ANDROID
  1680. return false; // Use VK_KHR_ray_query
  1681. #else
  1682. static bool extensions_checked = false;
  1683. static bool raytrace_supported = true;
  1684. if (extensions_checked) {
  1685. return raytrace_supported;
  1686. }
  1687. const char *required_extensions[] = {
  1688. VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,
  1689. VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME,
  1690. VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
  1691. VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME,
  1692. VK_KHR_RAY_QUERY_EXTENSION_NAME
  1693. };
  1694. uint32_t required_extensions_count = sizeof(required_extensions) / sizeof(required_extensions[0]);
  1695. uint32_t extensions_count = 0;
  1696. vkEnumerateDeviceExtensionProperties(gpu, NULL, &extensions_count, NULL);
  1697. VkExtensionProperties *extensions = (VkExtensionProperties *)malloc(sizeof(VkExtensionProperties) * extensions_count);
  1698. vkEnumerateDeviceExtensionProperties(gpu, NULL, &extensions_count, extensions);
  1699. for (uint32_t i = 0; i < required_extensions_count; i++) {
  1700. bool found = false;
  1701. for (uint32_t j = 0; j < extensions_count; j++) {
  1702. if (strcmp(required_extensions[i], extensions[j].extensionName) == 0) {
  1703. found = true;
  1704. break;
  1705. }
  1706. }
  1707. if (!found) {
  1708. raytrace_supported = false;
  1709. break;
  1710. }
  1711. }
  1712. free(extensions);
  1713. extensions_checked = true;
  1714. return raytrace_supported;
  1715. #endif
  1716. }
  1717. void gpu_raytrace_pipeline_init(gpu_raytrace_pipeline_t *pipeline, void *ray_shader, int ray_shader_size, gpu_buffer_t *constant_buffer) {
  1718. output = NULL;
  1719. pipeline->constant_buffer = constant_buffer;
  1720. {
  1721. VkDescriptorSetLayoutBinding bindings[] = {
  1722. {0, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1723. {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1724. {2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1725. {3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1726. {4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1727. {5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1728. {6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1729. {7, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1730. {8, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1731. {9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1732. {10, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR},
  1733. {11, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR}
  1734. };
  1735. VkDescriptorSetLayoutCreateInfo layout_info = {
  1736. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  1737. .bindingCount = 12,
  1738. .pBindings = &bindings[0],
  1739. };
  1740. vkCreateDescriptorSetLayout(device, &layout_info, NULL, &pipeline->impl.descriptor_set_layout);
  1741. VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
  1742. .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  1743. .setLayoutCount = 1,
  1744. .pSetLayouts = &pipeline->impl.descriptor_set_layout,
  1745. };
  1746. vkCreatePipelineLayout(device, &pipeline_layout_create_info, NULL, &pipeline->impl.pipeline_layout);
  1747. VkShaderModuleCreateInfo module_create_info = {
  1748. .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
  1749. .codeSize = ray_shader_size,
  1750. .pCode = (const uint32_t *)ray_shader,
  1751. };
  1752. VkShaderModule shader_module;
  1753. vkCreateShaderModule(device, &module_create_info, NULL, &shader_module);
  1754. VkPipelineShaderStageCreateInfo shader_stages[3] = {
  1755. { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_RAYGEN_BIT_KHR, .module = shader_module, .pName = "raygeneration" },
  1756. { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_MISS_BIT_KHR, .module = shader_module, .pName = "miss" },
  1757. { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, .module = shader_module, .pName = "closesthit" }
  1758. };
  1759. VkRayTracingShaderGroupCreateInfoKHR groups[3] = {
  1760. { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, .generalShader = 0, .closestHitShader = VK_SHADER_UNUSED_KHR, .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR },
  1761. { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, .generalShader = 1, .closestHitShader = VK_SHADER_UNUSED_KHR, .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR },
  1762. { .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, .generalShader = VK_SHADER_UNUSED_KHR, .closestHitShader = 2, .anyHitShader = VK_SHADER_UNUSED_KHR, .intersectionShader = VK_SHADER_UNUSED_KHR }
  1763. };
  1764. VkRayTracingPipelineCreateInfoKHR raytracing_pipeline_create_info = {
  1765. .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,
  1766. .stageCount = 3,
  1767. .pStages = &shader_stages[0],
  1768. .groupCount = 3,
  1769. .pGroups = &groups[0],
  1770. .maxPipelineRayRecursionDepth = 1,
  1771. .layout = pipeline->impl.pipeline_layout,
  1772. };
  1773. _vkCreateRayTracingPipelinesKHR = (void *)vkGetDeviceProcAddr(device, "vkCreateRayTracingPipelinesKHR");
  1774. _vkCreateRayTracingPipelinesKHR(device, VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &raytracing_pipeline_create_info, NULL, &pipeline->impl.pipeline);
  1775. }
  1776. {
  1777. VkPhysicalDeviceRayTracingPipelinePropertiesKHR ray_tracing_pipeline_properties = {0};
  1778. ray_tracing_pipeline_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR;
  1779. VkPhysicalDeviceProperties2 device_properties = {
  1780. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
  1781. .pNext = &ray_tracing_pipeline_properties,
  1782. };
  1783. vkGetPhysicalDeviceProperties2(gpu, &device_properties);
  1784. _vkGetRayTracingShaderGroupHandlesKHR = (void *)vkGetDeviceProcAddr(device, "vkGetRayTracingShaderGroupHandlesKHR");
  1785. uint32_t handle_size = ray_tracing_pipeline_properties.shaderGroupHandleSize;
  1786. uint32_t handle_size_aligned =
  1787. (ray_tracing_pipeline_properties.shaderGroupHandleSize + ray_tracing_pipeline_properties.shaderGroupHandleAlignment - 1) &
  1788. ~(ray_tracing_pipeline_properties.shaderGroupHandleAlignment - 1);
  1789. VkBufferCreateInfo buf_info = {
  1790. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  1791. .size = handle_size,
  1792. .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
  1793. .flags = 0,
  1794. };
  1795. vkCreateBuffer(device, &buf_info, NULL, &pipeline->impl.raygen_shader_binding_table);
  1796. vkCreateBuffer(device, &buf_info, NULL, &pipeline->impl.hit_shader_binding_table);
  1797. vkCreateBuffer(device, &buf_info, NULL, &pipeline->impl.miss_shader_binding_table);
  1798. uint8_t shader_handle_storage[1024];
  1799. _vkGetRayTracingShaderGroupHandlesKHR(device, pipeline->impl.pipeline, 0, 3, handle_size_aligned * 3, shader_handle_storage);
  1800. VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
  1801. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  1802. .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR,
  1803. };
  1804. VkMemoryAllocateInfo memory_allocate_info = {
  1805. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1806. .pNext = &memory_allocate_flags_info,
  1807. };
  1808. VkMemoryRequirements mem_reqs = {0};
  1809. vkGetBufferMemoryRequirements(device, pipeline->impl.raygen_shader_binding_table, &mem_reqs);
  1810. memory_allocate_info.allocationSize = mem_reqs.size;
  1811. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  1812. VkDeviceMemory mem;
  1813. void *data;
  1814. vkAllocateMemory(device, &memory_allocate_info, NULL, &mem);
  1815. vkBindBufferMemory(device, pipeline->impl.raygen_shader_binding_table, mem, 0);
  1816. vkMapMemory(device, mem, 0, handle_size, 0, (void **)&data);
  1817. memcpy(data, shader_handle_storage, handle_size);
  1818. vkUnmapMemory(device, mem);
  1819. vkGetBufferMemoryRequirements(device, pipeline->impl.miss_shader_binding_table, &mem_reqs);
  1820. memory_allocate_info.allocationSize = mem_reqs.size;
  1821. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  1822. vkAllocateMemory(device, &memory_allocate_info, NULL, &mem);
  1823. vkBindBufferMemory(device, pipeline->impl.miss_shader_binding_table, mem, 0);
  1824. vkMapMemory(device, mem, 0, handle_size, 0, (void **)&data);
  1825. memcpy(data, shader_handle_storage + handle_size_aligned, handle_size);
  1826. vkUnmapMemory(device, mem);
  1827. vkGetBufferMemoryRequirements(device, pipeline->impl.hit_shader_binding_table, &mem_reqs);
  1828. memory_allocate_info.allocationSize = mem_reqs.size;
  1829. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  1830. vkAllocateMemory(device, &memory_allocate_info, NULL, &mem);
  1831. vkBindBufferMemory(device, pipeline->impl.hit_shader_binding_table, mem, 0);
  1832. vkMapMemory(device, mem, 0, handle_size, 0, (void **)&data);
  1833. memcpy(data, shader_handle_storage + handle_size_aligned * 2, handle_size);
  1834. vkUnmapMemory(device, mem);
  1835. }
  1836. {
  1837. VkDescriptorPoolSize type_counts[] = {
  1838. {VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1},
  1839. {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
  1840. {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
  1841. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1842. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1843. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1844. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1845. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1846. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1847. {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1},
  1848. {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1},
  1849. {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1}
  1850. };
  1851. VkDescriptorPoolCreateInfo descriptor_pool_create_info = {
  1852. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
  1853. .maxSets = 1024,
  1854. .poolSizeCount = 12,
  1855. .pPoolSizes = type_counts,
  1856. };
  1857. vkCreateDescriptorPool(device, &descriptor_pool_create_info, NULL, &raytrace_descriptor_pool);
  1858. VkDescriptorSetAllocateInfo alloc_info = {
  1859. .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
  1860. .pNext = NULL,
  1861. .descriptorPool = raytrace_descriptor_pool,
  1862. .descriptorSetCount = 1,
  1863. .pSetLayouts = &pipeline->impl.descriptor_set_layout,
  1864. };
  1865. vkAllocateDescriptorSets(device, &alloc_info, &pipeline->impl.descriptor_set);
  1866. }
  1867. }
  1868. void gpu_raytrace_pipeline_destroy(gpu_raytrace_pipeline_t *pipeline) {
  1869. vkDestroyPipeline(device, pipeline->impl.pipeline, NULL);
  1870. vkDestroyPipelineLayout(device, pipeline->impl.pipeline_layout, NULL);
  1871. vkDestroyDescriptorSetLayout(device, pipeline->impl.descriptor_set_layout, NULL);
  1872. }
  1873. uint64_t get_buffer_device_address(VkBuffer buffer) {
  1874. VkBufferDeviceAddressInfoKHR buffer_device_address_info = {
  1875. .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
  1876. .buffer = buffer,
  1877. };
  1878. _vkGetBufferDeviceAddressKHR = (void *)vkGetDeviceProcAddr(device, "vkGetBufferDeviceAddressKHR");
  1879. return _vkGetBufferDeviceAddressKHR(device, &buffer_device_address_info);
  1880. }
  1881. void gpu_raytrace_acceleration_structure_init(gpu_raytrace_acceleration_structure_t *accel) {
  1882. _vkGetBufferDeviceAddressKHR = (void *)vkGetDeviceProcAddr(device, "vkGetBufferDeviceAddressKHR");
  1883. _vkCreateAccelerationStructureKHR = (void *)vkGetDeviceProcAddr(device, "vkCreateAccelerationStructureKHR");
  1884. _vkGetAccelerationStructureDeviceAddressKHR = (void *)vkGetDeviceProcAddr(device, "vkGetAccelerationStructureDeviceAddressKHR");
  1885. _vkGetAccelerationStructureBuildSizesKHR = (void *)vkGetDeviceProcAddr(device, "vkGetAccelerationStructureBuildSizesKHR");
  1886. vb_count = 0;
  1887. instances_count = 0;
  1888. }
  1889. void gpu_raytrace_acceleration_structure_add(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *_vb, gpu_buffer_t *_ib,
  1890. iron_matrix4x4_t _transform) {
  1891. int vb_i = -1;
  1892. for (int i = 0; i < vb_count; ++i) {
  1893. if (_vb == vb[i]) {
  1894. vb_i = i;
  1895. break;
  1896. }
  1897. }
  1898. if (vb_i == -1) {
  1899. vb_i = vb_count;
  1900. vb[vb_count] = _vb;
  1901. ib[vb_count] = _ib;
  1902. vb_count++;
  1903. }
  1904. inst_t inst = { .i = vb_i, .m = _transform };
  1905. instances[instances_count] = inst;
  1906. instances_count++;
  1907. }
  1908. void _gpu_raytrace_acceleration_structure_destroy_bottom(gpu_raytrace_acceleration_structure_t *accel) {
  1909. _vkDestroyAccelerationStructureKHR = (void *)vkGetDeviceProcAddr(device, "vkDestroyAccelerationStructureKHR");
  1910. for (int i = 0; i < vb_count_last; ++i) {
  1911. _vkDestroyAccelerationStructureKHR(device, accel->impl.bottom_level_acceleration_structure[i], NULL);
  1912. vkFreeMemory(device, accel->impl.bottom_level_mem[i], NULL);
  1913. vkDestroyBuffer(device, accel->impl.bottom_level_buffer[i], NULL);
  1914. }
  1915. }
  1916. void _gpu_raytrace_acceleration_structure_destroy_top(gpu_raytrace_acceleration_structure_t *accel) {
  1917. _vkDestroyAccelerationStructureKHR = (void *)vkGetDeviceProcAddr(device, "vkDestroyAccelerationStructureKHR");
  1918. _vkDestroyAccelerationStructureKHR(device, accel->impl.top_level_acceleration_structure, NULL);
  1919. vkFreeMemory(device, accel->impl.top_level_mem, NULL);
  1920. vkDestroyBuffer(device, accel->impl.top_level_buffer, NULL);
  1921. vkFreeMemory(device, accel->impl.instances_mem, NULL);
  1922. vkDestroyBuffer(device, accel->impl.instances_buffer, NULL);
  1923. }
  1924. void gpu_raytrace_acceleration_structure_build(gpu_raytrace_acceleration_structure_t *accel, gpu_buffer_t *_vb_full, gpu_buffer_t *_ib_full) {
  1925. bool build_bottom = false;
  1926. for (int i = 0; i < 16; ++i) {
  1927. if (vb_last[i] != vb[i]) {
  1928. build_bottom = true;
  1929. }
  1930. vb_last[i] = vb[i];
  1931. }
  1932. if (vb_count_last > 0) {
  1933. if (build_bottom) {
  1934. _gpu_raytrace_acceleration_structure_destroy_bottom(accel);
  1935. }
  1936. _gpu_raytrace_acceleration_structure_destroy_top(accel);
  1937. }
  1938. vb_count_last = vb_count;
  1939. if (vb_count == 0) {
  1940. return;
  1941. }
  1942. // Bottom level
  1943. if (build_bottom) {
  1944. for (int i = 0; i < vb_count; ++i) {
  1945. uint32_t prim_count = ib[i]->count / 3;
  1946. uint32_t vert_count = vb[i]->count;
  1947. VkDeviceOrHostAddressConstKHR vertex_data_device_address = {0};
  1948. VkDeviceOrHostAddressConstKHR index_data_device_address = {0};
  1949. vertex_data_device_address.deviceAddress = get_buffer_device_address(vb[i]->impl.buf);
  1950. index_data_device_address.deviceAddress = get_buffer_device_address(ib[i]->impl.buf);
  1951. VkAccelerationStructureGeometryKHR acceleration_geometry = {
  1952. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
  1953. .flags = VK_GEOMETRY_OPAQUE_BIT_KHR,
  1954. .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
  1955. .geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
  1956. .geometry.triangles.vertexFormat = VK_FORMAT_R16G16B16A16_SNORM,
  1957. .geometry.triangles.vertexData.deviceAddress = vertex_data_device_address.deviceAddress,
  1958. .geometry.triangles.vertexStride = vb[i]->stride,
  1959. .geometry.triangles.maxVertex = vb[i]->count,
  1960. .geometry.triangles.indexType = VK_INDEX_TYPE_UINT32,
  1961. .geometry.triangles.indexData.deviceAddress = index_data_device_address.deviceAddress,
  1962. };
  1963. VkAccelerationStructureBuildGeometryInfoKHR acceleration_structure_build_geometry_info = {
  1964. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
  1965. .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
  1966. .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
  1967. .geometryCount = 1,
  1968. .pGeometries = &acceleration_geometry,
  1969. };
  1970. VkAccelerationStructureBuildSizesInfoKHR acceleration_build_sizes_info = {
  1971. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,
  1972. };
  1973. _vkGetAccelerationStructureBuildSizesKHR(device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &acceleration_structure_build_geometry_info,
  1974. &prim_count, &acceleration_build_sizes_info);
  1975. VkBufferCreateInfo buffer_create_info = {
  1976. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  1977. .size = acceleration_build_sizes_info.accelerationStructureSize,
  1978. .usage = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
  1979. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  1980. };
  1981. VkBuffer bottom_level_buffer = VK_NULL_HANDLE;
  1982. vkCreateBuffer(device, &buffer_create_info, NULL, &bottom_level_buffer);
  1983. VkMemoryRequirements memory_requirements2;
  1984. vkGetBufferMemoryRequirements(device, bottom_level_buffer, &memory_requirements2);
  1985. VkMemoryAllocateFlagsInfo memory_allocate_flags_info2 = {
  1986. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  1987. .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR,
  1988. };
  1989. VkMemoryAllocateInfo memory_allocate_info = {
  1990. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  1991. .pNext = &memory_allocate_flags_info2,
  1992. .allocationSize = memory_requirements2.size,
  1993. };
  1994. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(memory_requirements2.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  1995. VkDeviceMemory bottom_level_mem;
  1996. vkAllocateMemory(device, &memory_allocate_info, NULL, &bottom_level_mem);
  1997. vkBindBufferMemory(device, bottom_level_buffer, bottom_level_mem, 0);
  1998. VkAccelerationStructureCreateInfoKHR acceleration_create_info = {
  1999. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
  2000. .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
  2001. .buffer = bottom_level_buffer,
  2002. .size = acceleration_build_sizes_info.accelerationStructureSize,
  2003. };
  2004. _vkCreateAccelerationStructureKHR(device, &acceleration_create_info, NULL, &accel->impl.bottom_level_acceleration_structure[i]);
  2005. VkBuffer scratch_buffer = VK_NULL_HANDLE;
  2006. VkDeviceMemory scratch_memory = VK_NULL_HANDLE;
  2007. buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  2008. buffer_create_info.size = acceleration_build_sizes_info.buildScratchSize;
  2009. buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
  2010. buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  2011. vkCreateBuffer(device, &buffer_create_info, NULL, &scratch_buffer);
  2012. VkMemoryRequirements memory_requirements;
  2013. vkGetBufferMemoryRequirements(device, scratch_buffer, &memory_requirements);
  2014. VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
  2015. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  2016. .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR,
  2017. };
  2018. memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  2019. memory_allocate_info.pNext = &memory_allocate_flags_info;
  2020. memory_allocate_info.allocationSize = memory_requirements.size;
  2021. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  2022. vkAllocateMemory(device, &memory_allocate_info, NULL, &scratch_memory);
  2023. vkBindBufferMemory(device, scratch_buffer, scratch_memory, 0);
  2024. VkBufferDeviceAddressInfoKHR buffer_device_address_info = {
  2025. .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
  2026. .buffer = scratch_buffer,
  2027. };
  2028. uint64_t scratch_buffer_device_address = _vkGetBufferDeviceAddressKHR(device, &buffer_device_address_info);
  2029. VkAccelerationStructureBuildGeometryInfoKHR acceleration_build_geometry_info = {
  2030. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
  2031. .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
  2032. .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
  2033. .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
  2034. .dstAccelerationStructure = accel->impl.bottom_level_acceleration_structure[i],
  2035. .geometryCount = 1,
  2036. .pGeometries = &acceleration_geometry,
  2037. .scratchData.deviceAddress = scratch_buffer_device_address,
  2038. };
  2039. VkAccelerationStructureBuildRangeInfoKHR acceleration_build_range_info = {
  2040. .primitiveCount = prim_count,
  2041. };
  2042. const VkAccelerationStructureBuildRangeInfoKHR *acceleration_build_infos[1] = {&acceleration_build_range_info};
  2043. {
  2044. VkCommandBufferAllocateInfo cmd_buf_allocate_info = {
  2045. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  2046. .commandPool = cmd_pool,
  2047. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  2048. .commandBufferCount = 1,
  2049. };
  2050. VkCommandBuffer command_buffer;
  2051. vkAllocateCommandBuffers(device, &cmd_buf_allocate_info, &command_buffer);
  2052. VkCommandBufferBeginInfo command_buffer_info = {
  2053. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  2054. };
  2055. vkBeginCommandBuffer(command_buffer, &command_buffer_info);
  2056. _vkCmdBuildAccelerationStructuresKHR = (void *)vkGetDeviceProcAddr(device, "vkCmdBuildAccelerationStructuresKHR");
  2057. _vkCmdBuildAccelerationStructuresKHR(command_buffer, 1, &acceleration_build_geometry_info, &acceleration_build_infos[0]);
  2058. vkEndCommandBuffer(command_buffer);
  2059. VkSubmitInfo submit_info = {
  2060. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2061. .commandBufferCount = 1,
  2062. .pCommandBuffers = &command_buffer,
  2063. };
  2064. VkFenceCreateInfo fence_info = {
  2065. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  2066. };
  2067. VkFence fence;
  2068. vkCreateFence(device, &fence_info, NULL, &fence);
  2069. vkQueueSubmit(queue, 1, &submit_info, fence);
  2070. vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000);
  2071. vkDestroyFence(device, fence, NULL);
  2072. vkFreeCommandBuffers(device, cmd_pool, 1, &command_buffer);
  2073. }
  2074. VkAccelerationStructureDeviceAddressInfoKHR acceleration_device_address_info = {
  2075. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
  2076. .accelerationStructure = accel->impl.bottom_level_acceleration_structure[i],
  2077. };
  2078. accel->impl.bottom_level_acceleration_structure_handle[i] = _vkGetAccelerationStructureDeviceAddressKHR(device, &acceleration_device_address_info);
  2079. vkFreeMemory(device, scratch_memory, NULL);
  2080. vkDestroyBuffer(device, scratch_buffer, NULL);
  2081. accel->impl.bottom_level_buffer[i] = bottom_level_buffer;
  2082. accel->impl.bottom_level_mem[i] = bottom_level_mem;
  2083. }
  2084. }
  2085. // Top level
  2086. {
  2087. VkBufferCreateInfo buf_info = {
  2088. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2089. .size = instances_count * sizeof(VkAccelerationStructureInstanceKHR),
  2090. .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
  2091. .flags = 0,
  2092. };
  2093. VkMemoryAllocateInfo mem_alloc = {0};
  2094. memset(&mem_alloc, 0, sizeof(VkMemoryAllocateInfo));
  2095. mem_alloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  2096. VkBuffer instances_buffer;
  2097. vkCreateBuffer(device, &buf_info, NULL, &instances_buffer);
  2098. VkMemoryRequirements mem_reqs = {0};
  2099. vkGetBufferMemoryRequirements(device, instances_buffer, &mem_reqs);
  2100. mem_alloc.allocationSize = mem_reqs.size;
  2101. mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  2102. VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
  2103. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  2104. .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR,
  2105. };
  2106. mem_alloc.pNext = &memory_allocate_flags_info;
  2107. VkDeviceMemory instances_mem;
  2108. vkAllocateMemory(device, &mem_alloc, NULL, &instances_mem);
  2109. vkBindBufferMemory(device, instances_buffer, instances_mem, 0);
  2110. void *data;
  2111. vkMapMemory(device, instances_mem, 0, sizeof(VkAccelerationStructureInstanceKHR), 0, (void **)&data);
  2112. for (int i = 0; i < instances_count; ++i) {
  2113. VkTransformMatrixKHR transform_matrix = {
  2114. instances[i].m.m[0],
  2115. instances[i].m.m[4],
  2116. instances[i].m.m[8],
  2117. instances[i].m.m[12],
  2118. instances[i].m.m[1],
  2119. instances[i].m.m[5],
  2120. instances[i].m.m[9],
  2121. instances[i].m.m[13],
  2122. instances[i].m.m[2],
  2123. instances[i].m.m[6],
  2124. instances[i].m.m[10],
  2125. instances[i].m.m[14]
  2126. };
  2127. VkAccelerationStructureInstanceKHR instance = {
  2128. .transform = transform_matrix,
  2129. };
  2130. int ib_off = 0;
  2131. for (int j = 0; j < instances[i].i; ++j) {
  2132. ib_off += ib[j]->count * 4;
  2133. }
  2134. instance.instanceCustomIndex = ib_off;
  2135. instance.mask = 0xFF;
  2136. instance.instanceShaderBindingTableRecordOffset = 0;
  2137. instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
  2138. instance.accelerationStructureReference = accel->impl.bottom_level_acceleration_structure_handle[instances[i].i];
  2139. memcpy(data + i * sizeof(VkAccelerationStructureInstanceKHR), &instance, sizeof(VkAccelerationStructureInstanceKHR));
  2140. }
  2141. vkUnmapMemory(device, instances_mem);
  2142. VkDeviceOrHostAddressConstKHR instance_data_device_address = {
  2143. .deviceAddress = get_buffer_device_address(instances_buffer),
  2144. };
  2145. VkAccelerationStructureGeometryKHR acceleration_geometry = {
  2146. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
  2147. .flags = VK_GEOMETRY_OPAQUE_BIT_KHR,
  2148. .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR,
  2149. .geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,
  2150. .geometry.instances.arrayOfPointers = VK_FALSE,
  2151. .geometry.instances.data.deviceAddress = instance_data_device_address.deviceAddress,
  2152. };
  2153. VkAccelerationStructureBuildGeometryInfoKHR acceleration_structure_build_geometry_info = {
  2154. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
  2155. .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
  2156. .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
  2157. .geometryCount = 1,
  2158. .pGeometries = &acceleration_geometry,
  2159. };
  2160. VkAccelerationStructureBuildSizesInfoKHR acceleration_build_sizes_info = {
  2161. acceleration_build_sizes_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR,
  2162. };
  2163. uint32_t instance_count = instances_count;
  2164. _vkGetAccelerationStructureBuildSizesKHR(device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &acceleration_structure_build_geometry_info,
  2165. &instance_count, &acceleration_build_sizes_info);
  2166. VkBufferCreateInfo buffer_create_info = {
  2167. .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2168. .size = acceleration_build_sizes_info.accelerationStructureSize,
  2169. .usage = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
  2170. .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
  2171. };
  2172. VkBuffer top_level_buffer = VK_NULL_HANDLE;
  2173. vkCreateBuffer(device, &buffer_create_info, NULL, &top_level_buffer);
  2174. VkMemoryRequirements memory_requirements2;
  2175. vkGetBufferMemoryRequirements(device, top_level_buffer, &memory_requirements2);
  2176. memory_allocate_flags_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
  2177. memory_allocate_flags_info.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
  2178. VkMemoryAllocateInfo memory_allocate_info = {
  2179. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  2180. .pNext = &memory_allocate_flags_info,
  2181. .allocationSize = memory_requirements2.size,
  2182. };
  2183. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(memory_requirements2.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  2184. VkDeviceMemory top_level_mem;
  2185. vkAllocateMemory(device, &memory_allocate_info, NULL, &top_level_mem);
  2186. vkBindBufferMemory(device, top_level_buffer, top_level_mem, 0);
  2187. VkAccelerationStructureCreateInfoKHR acceleration_create_info = {
  2188. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
  2189. .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
  2190. .buffer = top_level_buffer,
  2191. .size = acceleration_build_sizes_info.accelerationStructureSize,
  2192. };
  2193. _vkCreateAccelerationStructureKHR(device, &acceleration_create_info, NULL, &accel->impl.top_level_acceleration_structure);
  2194. VkBuffer scratch_buffer = VK_NULL_HANDLE;
  2195. VkDeviceMemory scratch_memory = VK_NULL_HANDLE;
  2196. buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  2197. buffer_create_info.size = acceleration_build_sizes_info.buildScratchSize;
  2198. buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
  2199. buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  2200. vkCreateBuffer(device, &buffer_create_info, NULL, &scratch_buffer);
  2201. VkMemoryRequirements memory_requirements;
  2202. vkGetBufferMemoryRequirements(device, scratch_buffer, &memory_requirements);
  2203. memory_allocate_flags_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
  2204. memory_allocate_flags_info.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
  2205. memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  2206. memory_allocate_info.pNext = &memory_allocate_flags_info;
  2207. memory_allocate_info.allocationSize = memory_requirements.size;
  2208. memory_allocate_info.memoryTypeIndex = memory_type_from_properties(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  2209. vkAllocateMemory(device, &memory_allocate_info, NULL, &scratch_memory);
  2210. vkBindBufferMemory(device, scratch_buffer, scratch_memory, 0);
  2211. VkBufferDeviceAddressInfoKHR buffer_device_address_info = {
  2212. .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
  2213. .buffer = scratch_buffer,
  2214. };
  2215. uint64_t scratch_buffer_device_address = _vkGetBufferDeviceAddressKHR(device, &buffer_device_address_info);
  2216. VkAccelerationStructureBuildGeometryInfoKHR acceleration_build_geometry_info = {
  2217. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
  2218. .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
  2219. .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
  2220. .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
  2221. .srcAccelerationStructure = VK_NULL_HANDLE,
  2222. .dstAccelerationStructure = accel->impl.top_level_acceleration_structure,
  2223. .geometryCount = 1,
  2224. .pGeometries = &acceleration_geometry,
  2225. .scratchData.deviceAddress = scratch_buffer_device_address,
  2226. };
  2227. VkAccelerationStructureBuildRangeInfoKHR acceleration_build_range_info = {
  2228. .primitiveCount = instances_count,
  2229. };
  2230. const VkAccelerationStructureBuildRangeInfoKHR *acceleration_build_infos[1] = {&acceleration_build_range_info};
  2231. {
  2232. VkCommandBufferAllocateInfo cmd_buf_allocate_info = {
  2233. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
  2234. .commandPool = cmd_pool,
  2235. .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
  2236. .commandBufferCount = 1,
  2237. };
  2238. VkCommandBuffer command_buffer;
  2239. vkAllocateCommandBuffers(device, &cmd_buf_allocate_info, &command_buffer);
  2240. VkCommandBufferBeginInfo command_buffer_info = {
  2241. .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  2242. };
  2243. vkBeginCommandBuffer(command_buffer, &command_buffer_info);
  2244. _vkCmdBuildAccelerationStructuresKHR = (void *)vkGetDeviceProcAddr(device, "vkCmdBuildAccelerationStructuresKHR");
  2245. _vkCmdBuildAccelerationStructuresKHR(command_buffer, 1, &acceleration_build_geometry_info, &acceleration_build_infos[0]);
  2246. vkEndCommandBuffer(command_buffer);
  2247. VkSubmitInfo submit_info = {
  2248. .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
  2249. .commandBufferCount = 1,
  2250. .pCommandBuffers = &command_buffer,
  2251. };
  2252. VkFenceCreateInfo fence_info = {
  2253. .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
  2254. };
  2255. VkFence fence;
  2256. vkCreateFence(device, &fence_info, NULL, &fence);
  2257. vkQueueSubmit(queue, 1, &submit_info, fence);
  2258. vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000);
  2259. vkDestroyFence(device, fence, NULL);
  2260. vkFreeCommandBuffers(device, cmd_pool, 1, &command_buffer);
  2261. }
  2262. VkAccelerationStructureDeviceAddressInfoKHR acceleration_device_address_info = {
  2263. .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
  2264. .accelerationStructure = accel->impl.top_level_acceleration_structure,
  2265. };
  2266. accel->impl.top_level_acceleration_structure_handle = _vkGetAccelerationStructureDeviceAddressKHR(device, &acceleration_device_address_info);
  2267. vkFreeMemory(device, scratch_memory, NULL);
  2268. vkDestroyBuffer(device, scratch_buffer, NULL);
  2269. accel->impl.top_level_buffer = top_level_buffer;
  2270. accel->impl.top_level_mem = top_level_mem;
  2271. accel->impl.instances_buffer = instances_buffer;
  2272. accel->impl.instances_mem = instances_mem;
  2273. }
  2274. {
  2275. // if (vb_full != NULL) {
  2276. // vkFreeMemory(device, vb_full_mem, NULL);
  2277. // vkDestroyBuffer(device, vb_full, NULL);
  2278. // }
  2279. // VkBufferCreateInfo buf_info = {
  2280. // .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2281. // .pNext = NULL,
  2282. // .size = vert_count * vb[0]->stride,
  2283. // .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
  2284. // .usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
  2285. // .usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
  2286. // .usage |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
  2287. // .flags = 0,
  2288. // };
  2289. // VkMemoryAllocateInfo mem_alloc = {
  2290. // .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  2291. // .pNext = NULL,
  2292. // .allocationSize = 0,
  2293. // .memoryTypeIndex = 0,
  2294. // };
  2295. // vkCreateBuffer(device, &buf_info, NULL, &vb_full);
  2296. // VkMemoryRequirements mem_reqs = {0};
  2297. // vkGetBufferMemoryRequirements(device, vb_full, &mem_reqs);
  2298. // mem_alloc.allocationSize = mem_reqs.size;
  2299. // mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  2300. // VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
  2301. // .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  2302. // .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR,
  2303. // };
  2304. // mem_alloc.pNext = &memory_allocate_flags_info;
  2305. // vkAllocateMemory(device, &mem_alloc, NULL, &vb_full_mem);
  2306. // vkBindBufferMemory(device, vb_full, vb_full_mem, 0);
  2307. // float *data;
  2308. // vkMapMemory(device, vb_full_mem, 0, vert_count * vb[0]->stride, 0, (void **)&data);
  2309. // vkUnmapMemory(device, vb_full_mem);
  2310. ////
  2311. #ifdef is_forge
  2312. vb_full = _vb_full->impl.buf;
  2313. vb_full_mem = _vb_full->impl.mem;
  2314. #else
  2315. vb_full = vb[0]->impl.buf;
  2316. vb_full_mem = vb[0]->impl.mem;
  2317. #endif
  2318. }
  2319. {
  2320. // if (ib_full != NULL) {
  2321. // vkFreeMemory(device, ib_full_mem, NULL);
  2322. // vkDestroyBuffer(device, ib_full, NULL);
  2323. // }
  2324. // VkBufferCreateInfo buf_info = {
  2325. // .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
  2326. // .pNext = NULL,
  2327. // .size = prim_count * 3 * sizeof(uint32_t),
  2328. // .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
  2329. // .usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
  2330. // .usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
  2331. // .usage |= VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
  2332. // .flags = 0,
  2333. // };
  2334. // VkMemoryAllocateInfo mem_alloc = {
  2335. // .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  2336. // .pNext = NULL,
  2337. // .allocationSize = 0,
  2338. // .memoryTypeIndex = 0,
  2339. // };
  2340. // vkCreateBuffer(device, &buf_info, NULL, &ib_full);
  2341. // VkMemoryRequirements mem_reqs = {0};
  2342. // vkGetBufferMemoryRequirements(device, ib_full, &mem_reqs);
  2343. // mem_alloc.allocationSize = mem_reqs.size;
  2344. // mem_alloc.memoryTypeIndex = memory_type_from_properties(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
  2345. // VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
  2346. // .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
  2347. // .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR,
  2348. // };
  2349. // mem_alloc.pNext = &memory_allocate_flags_info;
  2350. // vkAllocateMemory(device, &mem_alloc, NULL, &ib_full_mem);
  2351. // vkBindBufferMemory(device, ib_full, ib_full_mem, 0);
  2352. // uint8_t *data;
  2353. // vkMapMemory(device, ib_full_mem, 0, mem_alloc.allocationSize, 0, (void **)&data);
  2354. // for (int i = 0; i < instances_count; ++i) {
  2355. // memcpy(data, ib[i]->impl., sizeof(VkAccelerationStructureInstanceKHR));
  2356. // }
  2357. // vkUnmapMemory(device, ib_full_mem);
  2358. ////
  2359. #ifdef is_forge
  2360. ib_full = _ib_full->impl.buf;
  2361. ib_full_mem = _ib_full->impl.mem;
  2362. #else
  2363. ib_full = ib[0]->impl.buf;
  2364. ib_full_mem = ib[0]->impl.mem;
  2365. #endif
  2366. }
  2367. }
  2368. void gpu_raytrace_acceleration_structure_destroy(gpu_raytrace_acceleration_structure_t *accel) {
  2369. // _vkDestroyAccelerationStructureKHR = (void *)vkGetDeviceProcAddr(device, "vkDestroyAccelerationStructureKHR");
  2370. // for (int i = 0; i < vb_count; ++i) {
  2371. // _vkDestroyAccelerationStructureKHR(device, accel->impl.bottom_level_acceleration_structure[i], NULL);
  2372. // vkFreeMemory(device, accel->impl.bottom_level_mem[i], NULL);
  2373. // vkDestroyBuffer(device, accel->impl.bottom_level_buffer[i], NULL);
  2374. // }
  2375. // _vkDestroyAccelerationStructureKHR(device, accel->impl.top_level_acceleration_structure, NULL);
  2376. // vkFreeMemory(device, accel->impl.top_level_mem, NULL);
  2377. // vkDestroyBuffer(device, accel->impl.top_level_buffer, NULL);
  2378. // vkFreeMemory(device, accel->impl.instances_mem, NULL);
  2379. // vkDestroyBuffer(device, accel->impl.instances_buffer, NULL);
  2380. }
  2381. void gpu_raytrace_set_textures(gpu_texture_t *_texpaint0, gpu_texture_t *_texpaint1, gpu_texture_t *_texpaint2, gpu_texture_t *_texenv, gpu_texture_t *_texsobol, gpu_texture_t *_texscramble, gpu_texture_t *_texrank) {
  2382. texpaint0 = _texpaint0;
  2383. texpaint1 = _texpaint1;
  2384. texpaint2 = _texpaint2;
  2385. texenv = _texenv;
  2386. texsobol = _texsobol;
  2387. texscramble = _texscramble;
  2388. texrank = _texrank;
  2389. }
  2390. void gpu_raytrace_set_acceleration_structure(gpu_raytrace_acceleration_structure_t *_accel) {
  2391. accel = _accel;
  2392. }
  2393. void gpu_raytrace_set_pipeline(gpu_raytrace_pipeline_t *_pipeline) {
  2394. pipeline = _pipeline;
  2395. }
  2396. void gpu_raytrace_set_target(gpu_texture_t *_output) {
  2397. if (!_output->impl.has_storage_bit) {
  2398. _output->impl.has_storage_bit = true;
  2399. gpu_texture_destroy(_output);
  2400. VkImageCreateInfo image_info = {
  2401. .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
  2402. .imageType = VK_IMAGE_TYPE_2D,
  2403. .format = convert_image_format(_output->format),
  2404. .extent.width = _output->width,
  2405. .extent.height = _output->height,
  2406. .extent.depth = 1,
  2407. .mipLevels = 1,
  2408. .arrayLayers = 1,
  2409. .samples = VK_SAMPLE_COUNT_1_BIT,
  2410. .tiling = VK_IMAGE_TILING_OPTIMAL,
  2411. .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT,
  2412. };
  2413. vkCreateImage(device, &image_info, NULL, &_output->impl.image);
  2414. VkMemoryRequirements memory_reqs;
  2415. vkGetImageMemoryRequirements(device, _output->impl.image, &memory_reqs);
  2416. VkMemoryAllocateInfo allocation_nfo = {
  2417. .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
  2418. .allocationSize = memory_reqs.size,
  2419. };
  2420. allocation_nfo.memoryTypeIndex = memory_type_from_properties(memory_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
  2421. vkAllocateMemory(device, &allocation_nfo, NULL, &_output->impl.mem);
  2422. vkBindImageMemory(device, _output->impl.image, _output->impl.mem, 0);
  2423. VkImageViewCreateInfo image_view_info = {
  2424. .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
  2425. .viewType = VK_IMAGE_VIEW_TYPE_2D,
  2426. .format = convert_image_format(_output->format),
  2427. .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
  2428. .subresourceRange.baseMipLevel = 0,
  2429. .subresourceRange.levelCount = 1,
  2430. .subresourceRange.baseArrayLayer = 0,
  2431. .subresourceRange.layerCount = 1,
  2432. .image = _output->impl.image,
  2433. };
  2434. vkCreateImageView(device, &image_view_info, NULL, &_output->impl.view);
  2435. set_image_layout(_output->impl.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  2436. }
  2437. output = _output;
  2438. }
  2439. void gpu_raytrace_dispatch_rays() {
  2440. VkWriteDescriptorSetAccelerationStructureKHR descriptor_acceleration_structure_info = {
  2441. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
  2442. .accelerationStructureCount = 1,
  2443. .pAccelerationStructures = &accel->impl.top_level_acceleration_structure,
  2444. };
  2445. VkWriteDescriptorSet acceleration_structure_write = {
  2446. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2447. .pNext = &descriptor_acceleration_structure_info,
  2448. .dstSet = pipeline->impl.descriptor_set,
  2449. .dstBinding = 0,
  2450. .descriptorCount = 1,
  2451. .descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
  2452. };
  2453. VkDescriptorImageInfo image_descriptor = {
  2454. .imageView = output->impl.view,
  2455. .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
  2456. };
  2457. VkDescriptorBufferInfo buffer_descriptor = {
  2458. .buffer = pipeline->constant_buffer->impl.buf,
  2459. .range = VK_WHOLE_SIZE,
  2460. };
  2461. VkWriteDescriptorSet result_image_write = {
  2462. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2463. .dstSet = pipeline->impl.descriptor_set,
  2464. .dstBinding = 10,
  2465. .descriptorCount = 1,
  2466. .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
  2467. .pImageInfo = &image_descriptor,
  2468. };
  2469. VkWriteDescriptorSet uniform_buffer_write = {
  2470. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2471. .dstSet = pipeline->impl.descriptor_set,
  2472. .dstBinding = 11,
  2473. .descriptorCount = 1,
  2474. .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
  2475. .pBufferInfo = &buffer_descriptor,
  2476. };
  2477. VkDescriptorBufferInfo ib_descriptor = {
  2478. .buffer = ib_full,
  2479. .range = VK_WHOLE_SIZE,
  2480. };
  2481. VkWriteDescriptorSet ib_write = {
  2482. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2483. .dstSet = pipeline->impl.descriptor_set,
  2484. .dstBinding = 1,
  2485. .descriptorCount = 1,
  2486. .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
  2487. .pBufferInfo = &ib_descriptor,
  2488. };
  2489. VkDescriptorBufferInfo vb_descriptor = {
  2490. .buffer = vb_full,
  2491. .range = VK_WHOLE_SIZE,
  2492. };
  2493. VkWriteDescriptorSet vb_write = {
  2494. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2495. .dstSet = pipeline->impl.descriptor_set,
  2496. .dstBinding = 2,
  2497. .descriptorCount = 1,
  2498. .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
  2499. .pBufferInfo = &vb_descriptor,
  2500. };
  2501. VkDescriptorImageInfo tex0image_descriptor = {
  2502. .imageView = texpaint0->impl.view,
  2503. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2504. };
  2505. VkWriteDescriptorSet tex0_image_write = {
  2506. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2507. .dstSet = pipeline->impl.descriptor_set,
  2508. .dstBinding = 3,
  2509. .descriptorCount = 1,
  2510. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2511. .pImageInfo = &tex0image_descriptor,
  2512. };
  2513. VkDescriptorImageInfo tex1image_descriptor = {
  2514. .imageView = texpaint1->impl.view,
  2515. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2516. };
  2517. VkWriteDescriptorSet tex1_image_write = {
  2518. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2519. .dstSet = pipeline->impl.descriptor_set,
  2520. .dstBinding = 4,
  2521. .descriptorCount = 1,
  2522. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2523. .pImageInfo = &tex1image_descriptor,
  2524. };
  2525. VkDescriptorImageInfo tex2image_descriptor = {
  2526. .imageView = texpaint2->impl.view,
  2527. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2528. };
  2529. VkWriteDescriptorSet tex2_image_write = {
  2530. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2531. .dstSet = pipeline->impl.descriptor_set,
  2532. .dstBinding = 5,
  2533. .descriptorCount = 1,
  2534. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2535. .pImageInfo = &tex2image_descriptor,
  2536. };
  2537. VkDescriptorImageInfo texenvimage_descriptor = {
  2538. .imageView = texenv->impl.view,
  2539. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2540. };
  2541. VkWriteDescriptorSet texenv_image_write = {
  2542. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2543. .dstSet = pipeline->impl.descriptor_set,
  2544. .dstBinding = 6,
  2545. .descriptorCount = 1,
  2546. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2547. .pImageInfo = &texenvimage_descriptor,
  2548. };
  2549. VkDescriptorImageInfo texsobolimage_descriptor = {
  2550. .imageView = texsobol->impl.view,
  2551. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2552. };
  2553. VkWriteDescriptorSet texsobol_image_write = {
  2554. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2555. .dstSet = pipeline->impl.descriptor_set,
  2556. .dstBinding = 7,
  2557. .descriptorCount = 1,
  2558. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2559. .pImageInfo = &texsobolimage_descriptor,
  2560. };
  2561. VkDescriptorImageInfo texscrambleimage_descriptor = {
  2562. .imageView = texscramble->impl.view,
  2563. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2564. };
  2565. VkWriteDescriptorSet texscramble_image_write = {
  2566. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2567. .dstSet = pipeline->impl.descriptor_set,
  2568. .dstBinding = 8,
  2569. .descriptorCount = 1,
  2570. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2571. .pImageInfo = &texscrambleimage_descriptor,
  2572. };
  2573. VkDescriptorImageInfo texrankimage_descriptor = {
  2574. .imageView = texrank->impl.view,
  2575. .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
  2576. };
  2577. VkWriteDescriptorSet texrank_image_write = {
  2578. .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
  2579. .dstSet = pipeline->impl.descriptor_set,
  2580. .dstBinding = 9,
  2581. .descriptorCount = 1,
  2582. .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  2583. .pImageInfo = &texrankimage_descriptor,
  2584. };
  2585. VkWriteDescriptorSet write_descriptor_sets[12] = {
  2586. acceleration_structure_write,
  2587. result_image_write,
  2588. uniform_buffer_write,
  2589. vb_write,
  2590. ib_write,
  2591. tex0_image_write,
  2592. tex1_image_write,
  2593. tex2_image_write,
  2594. texenv_image_write,
  2595. texsobol_image_write,
  2596. texscramble_image_write,
  2597. texrank_image_write
  2598. };
  2599. vkUpdateDescriptorSets(device, 12, write_descriptor_sets, 0, VK_NULL_HANDLE);
  2600. set_image_layout(output->impl.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL);
  2601. VkPhysicalDeviceRayTracingPipelinePropertiesKHR ray_tracing_pipeline_properties = {0};
  2602. ray_tracing_pipeline_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR;
  2603. ray_tracing_pipeline_properties.pNext = NULL;
  2604. VkPhysicalDeviceProperties2 device_properties = {
  2605. .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
  2606. .pNext = &ray_tracing_pipeline_properties,
  2607. };
  2608. vkGetPhysicalDeviceProperties2(gpu, &device_properties);
  2609. // Setup the strided buffer regions pointing to the shaders in our shader binding table
  2610. const uint32_t handle_size_aligned =
  2611. (ray_tracing_pipeline_properties.shaderGroupHandleSize + ray_tracing_pipeline_properties.shaderGroupHandleAlignment - 1) &
  2612. ~(ray_tracing_pipeline_properties.shaderGroupHandleAlignment - 1);
  2613. VkStridedDeviceAddressRegionKHR raygen_shader_sbt_entry = {
  2614. .deviceAddress = get_buffer_device_address(pipeline->impl.raygen_shader_binding_table),
  2615. .stride = handle_size_aligned,
  2616. .size = handle_size_aligned,
  2617. };
  2618. VkStridedDeviceAddressRegionKHR miss_shader_sbt_entry = {
  2619. .deviceAddress = get_buffer_device_address(pipeline->impl.miss_shader_binding_table),
  2620. .stride = handle_size_aligned,
  2621. .size = handle_size_aligned,
  2622. };
  2623. VkStridedDeviceAddressRegionKHR hit_shader_sbt_entry = {
  2624. .deviceAddress = get_buffer_device_address(pipeline->impl.hit_shader_binding_table),
  2625. .stride = handle_size_aligned,
  2626. .size = handle_size_aligned,
  2627. };
  2628. VkStridedDeviceAddressRegionKHR callable_shader_sbt_entry = {0};
  2629. // Dispatch the ray tracing commands
  2630. vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline->impl.pipeline);
  2631. vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline->impl.pipeline_layout, 0, 1,
  2632. &pipeline->impl.descriptor_set, 0, 0);
  2633. _vkCmdTraceRaysKHR = (void *)vkGetDeviceProcAddr(device, "vkCmdTraceRaysKHR");
  2634. _vkCmdTraceRaysKHR(command_buffer, &raygen_shader_sbt_entry, &miss_shader_sbt_entry, &hit_shader_sbt_entry, &callable_shader_sbt_entry,
  2635. output->width, output->height, 1);
  2636. set_image_layout(output->impl.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  2637. }