GpuMemoryPools.cpp 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Core/GpuMemoryPools.h>
  6. #include <AnKi/Core/ConfigSet.h>
  7. #include <AnKi/Gr/GrManager.h>
  8. #include <AnKi/Gr/CommandBuffer.h>
  9. #include <AnKi/Util/Tracer.h>
  10. #include <AnKi/Resource/ResourceManager.h>
  11. namespace anki {
  12. void UnifiedGeometryMemoryPool::init()
  13. {
  14. const PtrSize poolSize = ConfigSet::getSingleton().getCoreGlobalVertexMemorySize();
  15. const Array classes = {1_KB, 8_KB, 32_KB, 128_KB, 512_KB, 4_MB, 8_MB, 16_MB, poolSize};
  16. BufferUsageBit buffUsage = BufferUsageBit::kVertex | BufferUsageBit::kIndex | BufferUsageBit::kTransferDestination
  17. | (BufferUsageBit::kAllTexture & BufferUsageBit::kAllRead);
  18. if(GrManager::getSingleton().getDeviceCapabilities().m_rayTracingEnabled)
  19. {
  20. buffUsage |= BufferUsageBit::kAccelerationStructureBuild;
  21. }
  22. m_pool.init(buffUsage, classes, poolSize, "UnifiedGeometry", false);
  23. // Allocate something dummy to force creating the GPU buffer
  24. SegregatedListsGpuMemoryPoolToken token;
  25. allocate(16, 4, token);
  26. deferredFree(token);
  27. }
  28. void GpuSceneMemoryPool::init()
  29. {
  30. const PtrSize poolSize = ConfigSet::getSingleton().getCoreGpuSceneInitialSize();
  31. const Array classes = {32_B, 64_B, 128_B, 256_B, poolSize};
  32. BufferUsageBit buffUsage = BufferUsageBit::kAllStorage | BufferUsageBit::kTransferDestination;
  33. m_pool.init(buffUsage, classes, poolSize, "GpuScene", true);
  34. // Allocate something dummy to force creating the GPU buffer
  35. SegregatedListsGpuMemoryPoolToken token;
  36. allocate(16, 4, token);
  37. deferredFree(token);
  38. }
  39. RebarStagingGpuMemoryPool::~RebarStagingGpuMemoryPool()
  40. {
  41. GrManager::getSingleton().finish();
  42. m_buffer->unmap();
  43. m_buffer.reset(nullptr);
  44. }
  45. void RebarStagingGpuMemoryPool::init()
  46. {
  47. BufferInitInfo buffInit("ReBar");
  48. buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
  49. buffInit.m_size = ConfigSet::getSingleton().getCoreRebarGpuMemorySize();
  50. buffInit.m_usage = BufferUsageBit::kAllUniform | BufferUsageBit::kAllStorage | BufferUsageBit::kVertex
  51. | BufferUsageBit::kIndex | BufferUsageBit::kShaderBindingTable;
  52. m_buffer = GrManager::getSingleton().newBuffer(buffInit);
  53. m_bufferSize = buffInit.m_size;
  54. m_alignment = GrManager::getSingleton().getDeviceCapabilities().m_uniformBufferBindOffsetAlignment;
  55. m_alignment =
  56. max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment);
  57. m_alignment = max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
  58. m_mappedMem = static_cast<U8*>(m_buffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
  59. }
  60. void* RebarStagingGpuMemoryPool::allocateFrame(PtrSize size, RebarGpuMemoryToken& token)
  61. {
  62. void* address = tryAllocateFrame(size, token);
  63. if(address == nullptr) [[unlikely]]
  64. {
  65. ANKI_CORE_LOGF("Out of ReBAR GPU memory");
  66. }
  67. return address;
  68. }
  69. void* RebarStagingGpuMemoryPool::tryAllocateFrame(PtrSize origSize, RebarGpuMemoryToken& token)
  70. {
  71. const PtrSize size = getAlignedRoundUp(m_alignment, origSize);
  72. // Try in a loop because we may end up with an allocation its offset crosses the buffer's end
  73. PtrSize offset;
  74. Bool done = false;
  75. do
  76. {
  77. offset = m_offset.fetchAdd(size) % m_bufferSize;
  78. const PtrSize end = (offset + origSize) % (m_bufferSize + 1);
  79. done = offset < end;
  80. } while(!done);
  81. void* address = m_mappedMem + offset;
  82. token.m_offset = offset;
  83. token.m_range = origSize;
  84. return address;
  85. }
  86. PtrSize RebarStagingGpuMemoryPool::endFrame()
  87. {
  88. const PtrSize crntOffset = m_offset.getNonAtomically();
  89. const PtrSize usedMemory = crntOffset - m_previousFrameEndOffset;
  90. m_previousFrameEndOffset = crntOffset;
  91. if(usedMemory >= PtrSize(0.8 * F64(m_bufferSize / kMaxFramesInFlight)))
  92. {
  93. ANKI_CORE_LOGW("Frame used more that 80%% of its safe limit of ReBAR memory");
  94. }
  95. ANKI_TRACE_INC_COUNTER(ReBarUsedMemory, usedMemory);
  96. return usedMemory;
  97. }
  98. /// It packs the source and destination offsets as well as the size of the patch itself.
  99. class GpuSceneMicroPatcher::PatchHeader
  100. {
  101. public:
  102. U32 m_dwordCountAndSrcDwordOffsetPack;
  103. U32 m_dstDwordOffset;
  104. };
  105. GpuSceneMicroPatcher::GpuSceneMicroPatcher()
  106. {
  107. }
  108. GpuSceneMicroPatcher::~GpuSceneMicroPatcher()
  109. {
  110. static_assert(sizeof(PatchHeader) == 8);
  111. }
  112. Error GpuSceneMicroPatcher::init()
  113. {
  114. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuSceneMicroPatching.ankiprogbin",
  115. m_copyProgram));
  116. const ShaderProgramResourceVariant* variant;
  117. m_copyProgram->getOrCreateVariant(variant);
  118. m_grProgram = variant->getProgram();
  119. return Error::kNone;
  120. }
  121. void GpuSceneMicroPatcher::newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize,
  122. const void* data)
  123. {
  124. ANKI_ASSERT(dataSize > 0 && (dataSize % 4) == 0);
  125. ANKI_ASSERT((ptrToNumber(data) % 4) == 0);
  126. ANKI_ASSERT((gpuSceneDestOffset % 4) == 0 && gpuSceneDestOffset / 4 < kMaxU32);
  127. const U32 dataDwords = U32(dataSize / 4);
  128. U32 gpuSceneDestDwordOffset = U32(gpuSceneDestOffset / 4);
  129. const U32* patchIt = static_cast<const U32*>(data);
  130. const U32* const patchEnd = patchIt + dataDwords;
  131. // Break the data into multiple copies
  132. LockGuard lock(m_mtx);
  133. if(m_crntFramePatchHeaders.getSize() == 0)
  134. {
  135. m_crntFramePatchHeaders = DynamicArray<PatchHeader, MemoryPoolPtrWrapper<StackMemoryPool>>(&frameCpuPool);
  136. m_crntFramePatchData = DynamicArray<U32, MemoryPoolPtrWrapper<StackMemoryPool>>(&frameCpuPool);
  137. }
  138. while(patchIt < patchEnd)
  139. {
  140. const U32 patchDwords = min(kDwordsPerPatch, U32(patchEnd - patchIt));
  141. PatchHeader& header = *m_crntFramePatchHeaders.emplaceBack();
  142. ANKI_ASSERT(((patchDwords - 1) & 0b111111) == (patchDwords - 1));
  143. header.m_dwordCountAndSrcDwordOffsetPack = patchDwords - 1;
  144. header.m_dwordCountAndSrcDwordOffsetPack <<= 26;
  145. ANKI_ASSERT((m_crntFramePatchData.getSize() & 0x3FFFFFF) == m_crntFramePatchData.getSize());
  146. header.m_dwordCountAndSrcDwordOffsetPack |= m_crntFramePatchData.getSize();
  147. header.m_dstDwordOffset = gpuSceneDestDwordOffset;
  148. const U32 srcOffset = m_crntFramePatchData.getSize();
  149. m_crntFramePatchData.resize(srcOffset + patchDwords);
  150. memcpy(&m_crntFramePatchData[srcOffset], patchIt, patchDwords * 4);
  151. patchIt += patchDwords;
  152. gpuSceneDestDwordOffset += patchDwords;
  153. }
  154. }
  155. void GpuSceneMicroPatcher::patchGpuScene(CommandBuffer& cmdb)
  156. {
  157. if(m_crntFramePatchHeaders.getSize() == 0)
  158. {
  159. return;
  160. }
  161. ANKI_ASSERT(m_crntFramePatchData.getSize() > 0);
  162. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize());
  163. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes());
  164. RebarGpuMemoryToken headersToken;
  165. void* mapped =
  166. RebarStagingGpuMemoryPool::getSingleton().allocateFrame(m_crntFramePatchHeaders.getSizeInBytes(), headersToken);
  167. memcpy(mapped, &m_crntFramePatchHeaders[0], m_crntFramePatchHeaders.getSizeInBytes());
  168. RebarGpuMemoryToken dataToken;
  169. mapped = RebarStagingGpuMemoryPool::getSingleton().allocateFrame(m_crntFramePatchData.getSizeInBytes(), dataToken);
  170. memcpy(mapped, &m_crntFramePatchData[0], m_crntFramePatchData.getSizeInBytes());
  171. cmdb.bindStorageBuffer(0, 0, RebarStagingGpuMemoryPool::getSingleton().getBuffer(), headersToken.m_offset,
  172. headersToken.m_range);
  173. cmdb.bindStorageBuffer(0, 1, RebarStagingGpuMemoryPool::getSingleton().getBuffer(), dataToken.m_offset,
  174. dataToken.m_range);
  175. cmdb.bindStorageBuffer(0, 2, GpuSceneMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize);
  176. cmdb.bindShaderProgram(m_grProgram);
  177. const U32 workgroupCountX = m_crntFramePatchHeaders.getSize();
  178. cmdb.dispatchCompute(workgroupCountX, 1, 1);
  179. // Cleanup to prepare for the new frame
  180. U32* data;
  181. U32 size, storage;
  182. m_crntFramePatchData.moveAndReset(data, size, storage);
  183. PatchHeader* datah;
  184. m_crntFramePatchHeaders.moveAndReset(datah, size, storage);
  185. }
  186. } // end namespace anki