GpuMemoryPools.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. // Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/Core/GpuMemoryPools.h>
  6. #include <AnKi/Core/ConfigSet.h>
  7. #include <AnKi/Gr/GrManager.h>
  8. #include <AnKi/Gr/CommandBuffer.h>
  9. #include <AnKi/Util/Tracer.h>
  10. #include <AnKi/Resource/ResourceManager.h>
  11. namespace anki {
  12. void UnifiedGeometryMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const ConfigSet& cfg)
  13. {
  14. ANKI_ASSERT(pool && gr);
  15. const PtrSize poolSize = cfg.getCoreGlobalVertexMemorySize();
  16. const Array classes = {1_KB, 8_KB, 32_KB, 128_KB, 512_KB, 4_MB, 8_MB, 16_MB, poolSize};
  17. BufferUsageBit buffUsage = BufferUsageBit::kVertex | BufferUsageBit::kIndex | BufferUsageBit::kTransferDestination
  18. | (BufferUsageBit::kAllTexture & BufferUsageBit::kAllRead);
  19. if(gr->getDeviceCapabilities().m_rayTracingEnabled)
  20. {
  21. buffUsage |= BufferUsageBit::kAccelerationStructureBuild;
  22. }
  23. m_pool.init(gr, pool, buffUsage, classes, poolSize, "UnifiedGeometry", false);
  24. // Allocate something dummy to force creating the GPU buffer
  25. SegregatedListsGpuMemoryPoolToken token;
  26. allocate(16, 4, token);
  27. free(token);
  28. }
  29. void GpuSceneMemoryPool::init(HeapMemoryPool* pool, GrManager* gr, const ConfigSet& cfg)
  30. {
  31. ANKI_ASSERT(pool && gr);
  32. const PtrSize poolSize = cfg.getCoreGpuSceneInitialSize();
  33. const Array classes = {32_B, 64_B, 128_B, 256_B, poolSize};
  34. BufferUsageBit buffUsage = BufferUsageBit::kAllStorage | BufferUsageBit::kTransferDestination;
  35. m_pool.init(gr, pool, buffUsage, classes, poolSize, "GpuScene", true);
  36. }
  37. RebarStagingGpuMemoryPool::~RebarStagingGpuMemoryPool()
  38. {
  39. GrManager& gr = m_buffer->getManager();
  40. gr.finish();
  41. m_buffer->unmap();
  42. m_buffer.reset(nullptr);
  43. }
  44. Error RebarStagingGpuMemoryPool::init(GrManager* gr, const ConfigSet& cfg)
  45. {
  46. BufferInitInfo buffInit("ReBar");
  47. buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
  48. buffInit.m_size = cfg.getCoreRebarGpuMemorySize();
  49. buffInit.m_usage =
  50. BufferUsageBit::kAllUniform | BufferUsageBit::kAllStorage | BufferUsageBit::kVertex | BufferUsageBit::kIndex;
  51. m_buffer = gr->newBuffer(buffInit);
  52. m_bufferSize = buffInit.m_size;
  53. m_alignment = gr->getDeviceCapabilities().m_uniformBufferBindOffsetAlignment;
  54. m_alignment = max(m_alignment, gr->getDeviceCapabilities().m_storageBufferBindOffsetAlignment);
  55. m_alignment = max(m_alignment, gr->getDeviceCapabilities().m_sbtRecordAlignment);
  56. m_mappedMem = static_cast<U8*>(m_buffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
  57. return Error::kNone;
  58. }
  59. void* RebarStagingGpuMemoryPool::allocateFrame(PtrSize size, RebarGpuMemoryToken& token)
  60. {
  61. void* address = tryAllocateFrame(size, token);
  62. if(ANKI_UNLIKELY(address == nullptr))
  63. {
  64. ANKI_CORE_LOGF("Out of ReBAR GPU memory");
  65. }
  66. return address;
  67. }
  68. void* RebarStagingGpuMemoryPool::tryAllocateFrame(PtrSize origSize, RebarGpuMemoryToken& token)
  69. {
  70. const PtrSize size = getAlignedRoundUp(m_alignment, origSize);
  71. // Try in a loop because we may end up with an allocation its offset crosses the buffer's end
  72. PtrSize offset;
  73. Bool done = false;
  74. do
  75. {
  76. offset = m_offset.fetchAdd(size) % m_bufferSize;
  77. const PtrSize end = (offset + origSize) % (m_bufferSize + 1);
  78. done = offset < end;
  79. } while(!done);
  80. void* address = m_mappedMem + offset;
  81. token.m_offset = offset;
  82. token.m_range = origSize;
  83. return address;
  84. }
  85. PtrSize RebarStagingGpuMemoryPool::endFrame()
  86. {
  87. const PtrSize crntOffset = m_offset.getNonAtomically();
  88. const PtrSize usedMemory = crntOffset - m_previousFrameEndOffset;
  89. m_previousFrameEndOffset = crntOffset;
  90. if(usedMemory >= PtrSize(0.8 * F64(m_bufferSize / kMaxFramesInFlight)))
  91. {
  92. ANKI_CORE_LOGW("Frame used more that 80%% of its safe limit of ReBAR memory");
  93. }
  94. ANKI_TRACE_INC_COUNTER(ReBarUsedMemory, usedMemory);
  95. return usedMemory;
  96. }
  97. /// It packs the source and destination offsets as well as the size of the patch itself.
  98. class GpuSceneMicroPatcher::PatchHeader
  99. {
  100. public:
  101. U32 m_dwordCountAndSrcDwordOffsetPack;
  102. U32 m_dstDwordOffset;
  103. };
  104. GpuSceneMicroPatcher::~GpuSceneMicroPatcher()
  105. {
  106. static_assert(sizeof(PatchHeader) == 8);
  107. }
  108. Error GpuSceneMicroPatcher::init(ResourceManager* rsrc)
  109. {
  110. ANKI_CHECK(rsrc->loadResource("ShaderBinaries/GpuSceneMicroPatching.ankiprogbin", m_copyProgram));
  111. const ShaderProgramResourceVariant* variant;
  112. m_copyProgram->getOrCreateVariant(variant);
  113. m_grProgram = variant->getProgram();
  114. return Error::kNone;
  115. }
  116. void GpuSceneMicroPatcher::newCopy(StackMemoryPool& frameCpuPool, PtrSize gpuSceneDestOffset, PtrSize dataSize,
  117. const void* data)
  118. {
  119. ANKI_ASSERT(dataSize > 0 && (dataSize % 4) == 0);
  120. ANKI_ASSERT((ptrToNumber(data) % 4) == 0);
  121. ANKI_ASSERT((gpuSceneDestOffset % 4) == 0 && gpuSceneDestOffset / 4 < kMaxU32);
  122. const U32 dataDwords = U32(dataSize / 4);
  123. U32 gpuSceneDestDwordOffset = U32(gpuSceneDestOffset / 4);
  124. const U32* patchIt = static_cast<const U32*>(data);
  125. const U32* const patchEnd = patchIt + dataDwords;
  126. // Break the data into multiple copies
  127. LockGuard lock(m_mtx);
  128. while(patchIt < patchEnd)
  129. {
  130. const U32 patchDwords = U32(patchEnd - patchIt);
  131. PatchHeader& header = *m_crntFramePatchHeaders.emplaceBack(frameCpuPool);
  132. ANKI_ASSERT(((patchDwords - 1) & 0b111111) == (patchDwords - 1));
  133. header.m_dwordCountAndSrcDwordOffsetPack = patchDwords - 1;
  134. header.m_dwordCountAndSrcDwordOffsetPack <<= 26;
  135. ANKI_ASSERT((m_crntFramePatchData.getSize() & 0x3FFFFFF) == m_crntFramePatchData.getSize());
  136. header.m_dwordCountAndSrcDwordOffsetPack |= m_crntFramePatchData.getSize();
  137. header.m_dstDwordOffset = gpuSceneDestDwordOffset;
  138. const U32 srcOffset = m_crntFramePatchData.getSize();
  139. m_crntFramePatchData.resize(frameCpuPool, srcOffset + patchDwords);
  140. memcpy(&m_crntFramePatchData[srcOffset], patchIt, patchDwords * 4);
  141. patchIt += kDwordsPerPatch;
  142. gpuSceneDestDwordOffset += kDwordsPerPatch;
  143. }
  144. }
  145. void GpuSceneMicroPatcher::patchGpuScene(RebarStagingGpuMemoryPool& rebarPool, CommandBuffer& cmdb,
  146. const BufferPtr& gpuSceneBuffer)
  147. {
  148. if(m_crntFramePatchHeaders.getSize() == 0)
  149. {
  150. return;
  151. }
  152. ANKI_ASSERT(m_crntFramePatchData.getSize() > 0);
  153. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize());
  154. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes());
  155. RebarGpuMemoryToken headersToken;
  156. void* mapped = rebarPool.allocateFrame(m_crntFramePatchHeaders.getSizeInBytes(), headersToken);
  157. memcpy(mapped, &m_crntFramePatchHeaders[0], m_crntFramePatchHeaders.getSizeInBytes());
  158. RebarGpuMemoryToken dataToken;
  159. mapped = rebarPool.allocateFrame(m_crntFramePatchData.getSizeInBytes(), dataToken);
  160. memcpy(mapped, &m_crntFramePatchData[0], m_crntFramePatchData.getSizeInBytes());
  161. cmdb.bindStorageBuffer(0, 0, rebarPool.getBuffer(), headersToken.m_offset, headersToken.m_range);
  162. cmdb.bindStorageBuffer(0, 1, rebarPool.getBuffer(), dataToken.m_offset, dataToken.m_range);
  163. cmdb.bindStorageBuffer(0, 2, gpuSceneBuffer, 0, kMaxPtrSize);
  164. cmdb.bindShaderProgram(m_grProgram);
  165. const U32 workgroupCountX = m_crntFramePatchHeaders.getSize();
  166. cmdb.dispatchCompute(workgroupCountX, 1, 1);
  167. // Cleanup to prepare for the new frame
  168. U32* data;
  169. U32 size, storage;
  170. m_crntFramePatchData.moveAndReset(data, size, storage);
  171. PatchHeader* datah;
  172. m_crntFramePatchHeaders.moveAndReset(datah, size, storage);
  173. }
  174. } // end namespace anki