// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics) // SPDX-FileCopyrightText: 2025 Jorrit Rouwe // SPDX-License-Identifier: MIT #include #ifdef JPH_USE_VK #include #include #include JPH_NAMESPACE_BEGIN ComputeQueueVK::~ComputeQueueVK() { Wait(); VkDevice device = mComputeSystem->GetDevice(); if (mCommandBuffer != VK_NULL_HANDLE) vkFreeCommandBuffers(device, mCommandPool, 1, &mCommandBuffer); if (mCommandPool != VK_NULL_HANDLE) vkDestroyCommandPool(device, mCommandPool, nullptr); if (mDescriptorPool != VK_NULL_HANDLE) vkDestroyDescriptorPool(device, mDescriptorPool, nullptr); if (mFence != VK_NULL_HANDLE) vkDestroyFence(device, mFence, nullptr); } bool ComputeQueueVK::Initialize(uint32 inComputeQueueIndex, ComputeQueueResult &outResult) { // Get the queue VkDevice device = mComputeSystem->GetDevice(); vkGetDeviceQueue(device, inComputeQueueIndex, 0, &mQueue); // Create a command pool VkCommandPoolCreateInfo pool_info = {}; pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; pool_info.queueFamilyIndex = inComputeQueueIndex; if (VKFailed(vkCreateCommandPool(device, &pool_info, nullptr, &mCommandPool), outResult)) return false; // Create descriptor pool VkDescriptorPoolSize descriptor_pool_sizes[] = { { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1024 }, { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16 * 1024 }, }; VkDescriptorPoolCreateInfo descriptor_info = {}; descriptor_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; descriptor_info.poolSizeCount = (uint32)std::size(descriptor_pool_sizes); descriptor_info.pPoolSizes = descriptor_pool_sizes; descriptor_info.maxSets = 256; if (VKFailed(vkCreateDescriptorPool(device, &descriptor_info, nullptr, &mDescriptorPool), outResult)) return false; // Create a command buffer VkCommandBufferAllocateInfo alloc_info = {}; alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; alloc_info.commandPool = mCommandPool; alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; alloc_info.commandBufferCount = 1; if (VKFailed(vkAllocateCommandBuffers(device, &alloc_info, &mCommandBuffer), outResult)) return false; // Create a fence VkFenceCreateInfo fence_info = {}; fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; if (VKFailed(vkCreateFence(device, &fence_info, nullptr, &mFence), outResult)) return false; return true; } bool ComputeQueueVK::BeginCommandBuffer() { if (!mCommandBufferRecording) { VkCommandBufferBeginInfo begin_info = {}; begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; if (VKFailed(vkBeginCommandBuffer(mCommandBuffer, &begin_info))) return false; mCommandBufferRecording = true; } return true; } void ComputeQueueVK::SetShader(const ComputeShader *inShader) { mShader = static_cast(inShader); mBufferInfos = mShader->GetBufferInfos(); } void ComputeQueueVK::SetConstantBuffer(const char *inName, const ComputeBuffer *inBuffer) { if (inBuffer == nullptr) return; JPH_ASSERT(inBuffer->GetType() == ComputeBuffer::EType::ConstantBuffer); if (!BeginCommandBuffer()) return; const ComputeBufferVK *buffer = static_cast(inBuffer); buffer->Barrier(mCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_UNIFORM_READ_BIT, false); uint index = mShader->NameToBufferInfoIndex(inName); JPH_ASSERT(mShader->GetLayoutBindings()[index].descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); mBufferInfos[index].buffer = buffer->GetBufferCPU(); mUsedBuffers.insert(buffer); } void ComputeQueueVK::SyncCPUToGPU(const ComputeBufferVK *inBuffer) { // Ensure that any CPU writes are visible to the GPU if (inBuffer->SyncCPUToGPU(mCommandBuffer) && (inBuffer->GetType() == ComputeBuffer::EType::Buffer || inBuffer->GetType() == ComputeBuffer::EType::RWBuffer)) { // After the first upload, the CPU buffer is no longer needed for Buffer and RWBuffer types mDelayedFreedBuffers.push_back(inBuffer->ReleaseBufferCPU()); } } void ComputeQueueVK::SetBuffer(const char *inName, const ComputeBuffer *inBuffer) { if (inBuffer == nullptr) return; JPH_ASSERT(inBuffer->GetType() == ComputeBuffer::EType::UploadBuffer || inBuffer->GetType() == ComputeBuffer::EType::Buffer || inBuffer->GetType() == ComputeBuffer::EType::RWBuffer); if (!BeginCommandBuffer()) return; const ComputeBufferVK *buffer = static_cast(inBuffer); SyncCPUToGPU(buffer); buffer->Barrier(mCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, false); uint index = mShader->NameToBufferInfoIndex(inName); JPH_ASSERT(mShader->GetLayoutBindings()[index].descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); mBufferInfos[index].buffer = buffer->GetBufferGPU(); mUsedBuffers.insert(buffer); } void ComputeQueueVK::SetRWBuffer(const char *inName, ComputeBuffer *inBuffer, EBarrier inBarrier) { if (inBuffer == nullptr) return; JPH_ASSERT(inBuffer->GetType() == ComputeBuffer::EType::RWBuffer); if (!BeginCommandBuffer()) return; const ComputeBufferVK *buffer = static_cast(inBuffer); SyncCPUToGPU(buffer); buffer->Barrier(mCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VkAccessFlagBits(VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), inBarrier == EBarrier::Yes); uint index = mShader->NameToBufferInfoIndex(inName); JPH_ASSERT(mShader->GetLayoutBindings()[index].descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); mBufferInfos[index].buffer = buffer->GetBufferGPU(); mUsedBuffers.insert(buffer); } void ComputeQueueVK::ScheduleReadback(ComputeBuffer *inDst, const ComputeBuffer *inSrc) { if (inDst == nullptr || inSrc == nullptr) return; JPH_ASSERT(inDst->GetType() == ComputeBuffer::EType::ReadbackBuffer); if (!BeginCommandBuffer()) return; const ComputeBufferVK *src_vk = static_cast(inSrc); const ComputeBufferVK *dst_vk = static_cast(inDst); // Barrier to start reading from GPU buffer and writing to CPU buffer src_vk->Barrier(mCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, false); dst_vk->Barrier(mCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, false); // Copy VkBufferCopy copy = {}; copy.srcOffset = 0; copy.dstOffset = 0; copy.size = src_vk->GetSize() * src_vk->GetStride(); vkCmdCopyBuffer(mCommandBuffer, src_vk->GetBufferGPU(), dst_vk->GetBufferCPU(), 1, ©); // Barrier to indicate that CPU can read from the buffer dst_vk->Barrier(mCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT, false); mUsedBuffers.insert(src_vk); mUsedBuffers.insert(dst_vk); } void ComputeQueueVK::Dispatch(uint inThreadGroupsX, uint inThreadGroupsY, uint inThreadGroupsZ) { if (!BeginCommandBuffer()) return; vkCmdBindPipeline(mCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, mShader->GetPipeline()); VkDevice device = mComputeSystem->GetDevice(); const Array &ds_bindings = mShader->GetLayoutBindings(); if (!ds_bindings.empty()) { // Create a descriptor set VkDescriptorSetAllocateInfo alloc_info = {}; alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; alloc_info.descriptorPool = mDescriptorPool; alloc_info.descriptorSetCount = 1; VkDescriptorSetLayout ds_layout = mShader->GetDescriptorSetLayout(); alloc_info.pSetLayouts = &ds_layout; VkDescriptorSet descriptor_set; if (VKFailed(vkAllocateDescriptorSets(device, &alloc_info, &descriptor_set))) return; // Write the values to the descriptor set Array writes; writes.reserve(ds_bindings.size()); for (uint32 i = 0; i < (uint32)ds_bindings.size(); ++i) { VkWriteDescriptorSet w = {}; w.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; w.dstSet = descriptor_set; w.dstBinding = ds_bindings[i].binding; w.dstArrayElement = 0; w.descriptorCount = ds_bindings[i].descriptorCount; w.descriptorType = ds_bindings[i].descriptorType; w.pBufferInfo = &mBufferInfos[i]; writes.push_back(w); } vkUpdateDescriptorSets(device, (uint32)writes.size(), writes.data(), 0, nullptr); // Bind the descriptor set vkCmdBindDescriptorSets(mCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, mShader->GetPipelineLayout(), 0, 1, &descriptor_set, 0, nullptr); } vkCmdDispatch(mCommandBuffer, inThreadGroupsX, inThreadGroupsY, inThreadGroupsZ); } void ComputeQueueVK::Execute() { // End command buffer if (!mCommandBufferRecording) return; if (VKFailed(vkEndCommandBuffer(mCommandBuffer))) return; mCommandBufferRecording = false; // Reset fence VkDevice device = mComputeSystem->GetDevice(); if (VKFailed(vkResetFences(device, 1, &mFence))) return; // Submit VkSubmitInfo submit = {}; submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit.commandBufferCount = 1; submit.pCommandBuffers = &mCommandBuffer; if (VKFailed(vkQueueSubmit(mQueue, 1, &submit, mFence))) return; // Clear the current shader mShader = nullptr; // Mark that we're executing mIsExecuting = true; } void ComputeQueueVK::Wait() { if (!mIsExecuting) return; // Wait for the work to complete VkDevice device = mComputeSystem->GetDevice(); if (VKFailed(vkWaitForFences(device, 1, &mFence, VK_TRUE, UINT64_MAX))) return; // Reset command buffer so it can be reused if (mCommandBuffer != VK_NULL_HANDLE) vkResetCommandBuffer(mCommandBuffer, 0); // Allow reusing the descriptors for next run vkResetDescriptorPool(device, mDescriptorPool, 0); // Buffers can be freed now mUsedBuffers.clear(); // Free delayed buffers for (BufferVK &buffer : mDelayedFreedBuffers) mComputeSystem->FreeBuffer(buffer); mDelayedFreedBuffers.clear(); mIsExecuting = false; } JPH_NAMESPACE_END #endif // JPH_USE_VK