2
0

GpuMemoryPools.cpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. // Copyright (C) 2009-2023, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Core/GpuMemoryPools.h>
  6. #include <AnKi/Core/ConfigSet.h>
  7. #include <AnKi/Gr/GrManager.h>
  8. #include <AnKi/Gr/CommandBuffer.h>
  9. #include <AnKi/Util/Tracer.h>
  10. #include <AnKi/Resource/ResourceManager.h>
  11. namespace anki {
  12. void UnifiedGeometryMemoryPool::init(GrManager* gr)
  13. {
  14. ANKI_ASSERT(gr);
  15. const PtrSize poolSize = ConfigSet::getSingleton().getCoreGlobalVertexMemorySize();
  16. const Array classes = {1_KB, 8_KB, 32_KB, 128_KB, 512_KB, 4_MB, 8_MB, 16_MB, poolSize};
  17. BufferUsageBit buffUsage = BufferUsageBit::kVertex | BufferUsageBit::kIndex | BufferUsageBit::kTransferDestination
  18. | (BufferUsageBit::kAllTexture & BufferUsageBit::kAllRead);
  19. if(gr->getDeviceCapabilities().m_rayTracingEnabled)
  20. {
  21. buffUsage |= BufferUsageBit::kAccelerationStructureBuild;
  22. }
  23. m_pool.init(gr, &CoreMemoryPool::getSingleton(), buffUsage, classes, poolSize, "UnifiedGeometry", false);
  24. // Allocate something dummy to force creating the GPU buffer
  25. SegregatedListsGpuMemoryPoolToken token;
  26. allocate(16, 4, token);
  27. deferredFree(token);
  28. }
  29. void GpuSceneMemoryPool::init(GrManager* gr)
  30. {
  31. ANKI_ASSERT(gr);
  32. const PtrSize poolSize = ConfigSet::getSingleton().getCoreGpuSceneInitialSize();
  33. const Array classes = {32_B, 64_B, 128_B, 256_B, poolSize};
  34. BufferUsageBit buffUsage = BufferUsageBit::kAllStorage | BufferUsageBit::kTransferDestination;
  35. m_pool.init(gr, &CoreMemoryPool::getSingleton(), buffUsage, classes, poolSize, "GpuScene", true);
  36. }
  37. RebarStagingGpuMemoryPool::~RebarStagingGpuMemoryPool()
  38. {
  39. GrManager& gr = m_buffer->getManager();
  40. gr.finish();
  41. m_buffer->unmap();
  42. m_buffer.reset(nullptr);
  43. }
  44. void RebarStagingGpuMemoryPool::init(GrManager* gr)
  45. {
  46. BufferInitInfo buffInit("ReBar");
  47. buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
  48. buffInit.m_size = ConfigSet::getSingleton().getCoreRebarGpuMemorySize();
  49. buffInit.m_usage = BufferUsageBit::kAllUniform | BufferUsageBit::kAllStorage | BufferUsageBit::kVertex
  50. | BufferUsageBit::kIndex | BufferUsageBit::kShaderBindingTable;
  51. m_buffer = gr->newBuffer(buffInit);
  52. m_bufferSize = buffInit.m_size;
  53. m_alignment = gr->getDeviceCapabilities().m_uniformBufferBindOffsetAlignment;
  54. m_alignment = max(m_alignment, gr->getDeviceCapabilities().m_storageBufferBindOffsetAlignment);
  55. m_alignment = max(m_alignment, gr->getDeviceCapabilities().m_sbtRecordAlignment);
  56. m_mappedMem = static_cast<U8*>(m_buffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
  57. }
  58. void* RebarStagingGpuMemoryPool::allocateFrame(PtrSize size, RebarGpuMemoryToken& token)
  59. {
  60. void* address = tryAllocateFrame(size, token);
  61. if(address == nullptr) [[unlikely]]
  62. {
  63. ANKI_CORE_LOGF("Out of ReBAR GPU memory");
  64. }
  65. return address;
  66. }
  67. void* RebarStagingGpuMemoryPool::tryAllocateFrame(PtrSize origSize, RebarGpuMemoryToken& token)
  68. {
  69. const PtrSize size = getAlignedRoundUp(m_alignment, origSize);
  70. // Try in a loop because we may end up with an allocation its offset crosses the buffer's end
  71. PtrSize offset;
  72. Bool done = false;
  73. do
  74. {
  75. offset = m_offset.fetchAdd(size) % m_bufferSize;
  76. const PtrSize end = (offset + origSize) % (m_bufferSize + 1);
  77. done = offset < end;
  78. } while(!done);
  79. void* address = m_mappedMem + offset;
  80. token.m_offset = offset;
  81. token.m_range = origSize;
  82. return address;
  83. }
  84. PtrSize RebarStagingGpuMemoryPool::endFrame()
  85. {
  86. const PtrSize crntOffset = m_offset.getNonAtomically();
  87. const PtrSize usedMemory = crntOffset - m_previousFrameEndOffset;
  88. m_previousFrameEndOffset = crntOffset;
  89. if(usedMemory >= PtrSize(0.8 * F64(m_bufferSize / kMaxFramesInFlight)))
  90. {
  91. ANKI_CORE_LOGW("Frame used more that 80%% of its safe limit of ReBAR memory");
  92. }
  93. ANKI_TRACE_INC_COUNTER(ReBarUsedMemory, usedMemory);
  94. return usedMemory;
  95. }
  96. /// It packs the source and destination offsets as well as the size of the patch itself.
  97. class GpuSceneMicroPatcher::PatchHeader
  98. {
  99. public:
  100. U32 m_dwordCountAndSrcDwordOffsetPack;
  101. U32 m_dstDwordOffset;
  102. };
  103. GpuSceneMicroPatcher::~GpuSceneMicroPatcher()
  104. {
  105. static_assert(sizeof(PatchHeader) == 8);
  106. }
  107. Error GpuSceneMicroPatcher::init(ResourceManager* rsrc)
  108. {
  109. ANKI_CHECK(rsrc->loadResource("ShaderBinaries/GpuSceneMicroPatching.ankiprogbin", m_copyProgram));
  110. const ShaderProgramResourceVariant* variant;
  111. m_copyProgram->getOrCreateVariant(variant);
  112. m_grProgram = variant->getProgram();
  113. return Error::kNone;
  114. }
  115. void GpuSceneMicroPatcher::newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize,
  116. const void* data)
  117. {
  118. ANKI_ASSERT(dataSize > 0 && (dataSize % 4) == 0);
  119. ANKI_ASSERT((ptrToNumber(data) % 4) == 0);
  120. ANKI_ASSERT((gpuSceneDestOffset % 4) == 0 && gpuSceneDestOffset / 4 < kMaxU32);
  121. const U32 dataDwords = U32(dataSize / 4);
  122. U32 gpuSceneDestDwordOffset = U32(gpuSceneDestOffset / 4);
  123. const U32* patchIt = static_cast<const U32*>(data);
  124. const U32* const patchEnd = patchIt + dataDwords;
  125. // Break the data into multiple copies
  126. LockGuard lock(m_mtx);
  127. while(patchIt < patchEnd)
  128. {
  129. const U32 patchDwords = min(kDwordsPerPatch, U32(patchEnd - patchIt));
  130. PatchHeader& header = *m_crntFramePatchHeaders.emplaceBack(frameCpuPool);
  131. ANKI_ASSERT(((patchDwords - 1) & 0b111111) == (patchDwords - 1));
  132. header.m_dwordCountAndSrcDwordOffsetPack = patchDwords - 1;
  133. header.m_dwordCountAndSrcDwordOffsetPack <<= 26;
  134. ANKI_ASSERT((m_crntFramePatchData.getSize() & 0x3FFFFFF) == m_crntFramePatchData.getSize());
  135. header.m_dwordCountAndSrcDwordOffsetPack |= m_crntFramePatchData.getSize();
  136. header.m_dstDwordOffset = gpuSceneDestDwordOffset;
  137. const U32 srcOffset = m_crntFramePatchData.getSize();
  138. m_crntFramePatchData.resize(frameCpuPool, srcOffset + patchDwords);
  139. memcpy(&m_crntFramePatchData[srcOffset], patchIt, patchDwords * 4);
  140. patchIt += patchDwords;
  141. gpuSceneDestDwordOffset += patchDwords;
  142. }
  143. }
  144. void GpuSceneMicroPatcher::patchGpuScene(CommandBuffer& cmdb)
  145. {
  146. if(m_crntFramePatchHeaders.getSize() == 0)
  147. {
  148. return;
  149. }
  150. ANKI_ASSERT(m_crntFramePatchData.getSize() > 0);
  151. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize());
  152. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes());
  153. RebarGpuMemoryToken headersToken;
  154. void* mapped =
  155. RebarStagingGpuMemoryPool::getSingleton().allocateFrame(m_crntFramePatchHeaders.getSizeInBytes(), headersToken);
  156. memcpy(mapped, &m_crntFramePatchHeaders[0], m_crntFramePatchHeaders.getSizeInBytes());
  157. RebarGpuMemoryToken dataToken;
  158. mapped = RebarStagingGpuMemoryPool::getSingleton().allocateFrame(m_crntFramePatchData.getSizeInBytes(), dataToken);
  159. memcpy(mapped, &m_crntFramePatchData[0], m_crntFramePatchData.getSizeInBytes());
  160. cmdb.bindStorageBuffer(0, 0, RebarStagingGpuMemoryPool::getSingleton().getBuffer(), headersToken.m_offset,
  161. headersToken.m_range);
  162. cmdb.bindStorageBuffer(0, 1, RebarStagingGpuMemoryPool::getSingleton().getBuffer(), dataToken.m_offset,
  163. dataToken.m_range);
  164. cmdb.bindStorageBuffer(0, 2, GpuSceneMemoryPool::getSingleton().getBuffer(), 0, kMaxPtrSize);
  165. cmdb.bindShaderProgram(m_grProgram);
  166. const U32 workgroupCountX = m_crntFramePatchHeaders.getSize();
  167. cmdb.dispatchCompute(workgroupCountX, 1, 1);
  168. // Cleanup to prepare for the new frame
  169. U32* data;
  170. U32 size, storage;
  171. m_crntFramePatchData.moveAndReset(data, size, storage);
  172. PatchHeader* datah;
  173. m_crntFramePatchHeaders.moveAndReset(datah, size, storage);
  174. }
  175. } // end namespace anki