GpuSceneBuffer.cpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <AnKi/GpuMemory/GpuSceneBuffer.h>
  6. #include <AnKi/GpuMemory/RebarTransientMemoryPool.h>
  7. #include <AnKi/Util/Tracer.h>
  8. #include <AnKi/Resource/ResourceManager.h>
  9. #include <AnKi/Gr/CommandBuffer.h>
  10. namespace anki {
  11. ANKI_SVAR(GpuSceneBufferAllocatedSize, StatCategory::kGpuMem, "GPU scene allocated", StatFlag::kBytes | StatFlag::kMainThreadUpdates)
  12. ANKI_SVAR(GpuSceneBufferTotal, StatCategory::kGpuMem, "GPU scene total", StatFlag::kBytes | StatFlag::kMainThreadUpdates)
  13. ANKI_SVAR(GpuSceneBufferFragmentation, StatCategory::kGpuMem, "GPU scene fragmentation", StatFlag::kFloat | StatFlag::kMainThreadUpdates);
  14. void GpuSceneBuffer::init()
  15. {
  16. const PtrSize poolSize = g_cvarCoreGpuSceneInitialSize;
  17. const Array classes = {32_B, 64_B, 128_B, 256_B, poolSize};
  18. BufferUsageBit buffUsage = BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv | BufferUsageBit::kCopyDestination;
  19. m_pool.init(buffUsage, classes, poolSize, "GpuScene", true);
  20. // Allocate something dummy to force creating the GPU buffer
  21. GpuSceneBufferAllocation alloc = allocate(16, 4);
  22. deferredFree(alloc);
  23. }
  24. void GpuSceneBuffer::updateStats() const
  25. {
  26. F32 externalFragmentation;
  27. PtrSize userAllocatedSize, totalSize;
  28. m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize);
  29. g_svarGpuSceneBufferAllocatedSize.set(userAllocatedSize);
  30. g_svarGpuSceneBufferTotal.set(totalSize);
  31. g_svarGpuSceneBufferFragmentation.set(externalFragmentation);
  32. }
  33. // It packs the source and destination offsets as well as the size of the patch itself. Needs to match the HLSL structure
  34. class GpuSceneMicroPatcher::PatchHeader
  35. {
  36. public:
  37. U32 m_dwordSizeMinusOne : kDwordsPerPatchBitCount;
  38. U32 m_srcDwordOffset : 32 - kDwordsPerPatchBitCount;
  39. U32 m_dstDwordOffset;
  40. };
  41. GpuSceneMicroPatcher::GpuSceneMicroPatcher()
  42. {
  43. }
  44. GpuSceneMicroPatcher::~GpuSceneMicroPatcher()
  45. {
  46. static_assert(sizeof(PatchHeader) == 8);
  47. }
  48. Error GpuSceneMicroPatcher::init()
  49. {
  50. ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuSceneMicroPatching.ankiprogbin", m_copyProgram));
  51. ShaderProgramResourceVariantInitInfo varInit(m_copyProgram);
  52. const ShaderProgramResourceVariant* variant;
  53. m_copyProgram->getOrCreateVariant(varInit, variant);
  54. m_grProgram.reset(&variant->getProgram());
  55. m_stackMemPool.init(CoreMemoryPool::getSingleton().getAllocationCallback(), CoreMemoryPool::getSingleton().getAllocationCallbackUserData(),
  56. 512_KB);
  57. return Error::kNone;
  58. }
  59. void GpuSceneMicroPatcher::beginPatching()
  60. {
  61. ANKI_ASSERT(m_bPatchingMode.fetchAdd(1) == 0);
  62. m_stackMemPool.reset();
  63. m_crntFramePatchHeaders = DynamicArray<PatchHeader, MemoryPoolPtrWrapper<StackMemoryPool>>(&m_stackMemPool);
  64. m_crntFramePatchData = DynamicArray<U32, MemoryPoolPtrWrapper<StackMemoryPool>>(&m_stackMemPool);
  65. }
  66. void GpuSceneMicroPatcher::newCopy(PtrSize gpuSceneDestOffset, PtrSize dataSize, const void* data)
  67. {
  68. ANKI_ASSERT(m_bPatchingMode.load() == 1);
  69. ANKI_ASSERT(dataSize > 0 && (dataSize % 4) == 0);
  70. ANKI_ASSERT((ptrToNumber(data) % 4) == 0);
  71. ANKI_ASSERT((gpuSceneDestOffset % 4) == 0 && gpuSceneDestOffset / 4 < kMaxU32);
  72. ANKI_ASSERT(gpuSceneDestOffset + dataSize <= GpuSceneBuffer::getSingleton().getBufferView().getRange());
  73. const U32 dataDwords = U32(dataSize / 4);
  74. U32 gpuSceneDestDwordOffset = U32(gpuSceneDestOffset / 4);
  75. const U32* patchIt = static_cast<const U32*>(data);
  76. const U32* const patchEnd = patchIt + dataDwords;
  77. // Break the data into multiple copies
  78. LockGuard lock(m_mtx);
  79. while(patchIt < patchEnd)
  80. {
  81. const U32 patchDwords = min(kDwordsPerPatch, U32(patchEnd - patchIt));
  82. PatchHeader& header = *m_crntFramePatchHeaders.emplaceBack();
  83. ANKI_ASSERT(((patchDwords - 1) & 0b111111) == (patchDwords - 1));
  84. header.m_dwordSizeMinusOne = patchDwords - 1;
  85. ANKI_ASSERT((m_crntFramePatchData.getSize() & 0x3FFFFFF) == m_crntFramePatchData.getSize());
  86. header.m_srcDwordOffset = m_crntFramePatchData.getSize();
  87. header.m_dstDwordOffset = gpuSceneDestDwordOffset;
  88. const U32 srcOffset = m_crntFramePatchData.getSize();
  89. m_crntFramePatchData.resize(srcOffset + patchDwords);
  90. memcpy(&m_crntFramePatchData[srcOffset], patchIt, patchDwords * 4);
  91. patchIt += patchDwords;
  92. gpuSceneDestDwordOffset += patchDwords;
  93. }
  94. }
  95. void GpuSceneMicroPatcher::patchGpuScene(CommandBuffer& cmdb)
  96. {
  97. ANKI_ASSERT(m_bPatchingMode.load() == 0);
  98. if(m_crntFramePatchHeaders.getSize() == 0)
  99. {
  100. return;
  101. }
  102. ANKI_ASSERT(m_crntFramePatchData.getSize() > 0);
  103. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize());
  104. ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes());
  105. WeakArray<PatchHeader> mapped;
  106. const BufferView headersBuff = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(m_crntFramePatchHeaders.getSize(), mapped);
  107. memcpy(mapped.getBegin(), m_crntFramePatchHeaders.getBegin(), m_crntFramePatchHeaders.getSizeInBytes());
  108. WeakArray<U32> mapped2;
  109. const BufferView dataBuff = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(m_crntFramePatchData.getSize(), mapped2);
  110. memcpy(mapped2.getBegin(), m_crntFramePatchData.getBegin(), m_crntFramePatchData.getSizeInBytes());
  111. cmdb.bindSrv(0, 0, headersBuff);
  112. cmdb.bindSrv(1, 0, dataBuff);
  113. cmdb.bindUav(0, 0, BufferView(&GpuSceneBuffer::getSingleton().getBuffer()));
  114. cmdb.bindShaderProgram(m_grProgram.get());
  115. const U32 workgroupCountX = m_crntFramePatchHeaders.getSize();
  116. cmdb.dispatchCompute(workgroupCountX, 1, 1);
  117. // Cleanup to prepare for the new frame
  118. U32* data;
  119. U32 size, storage;
  120. m_crntFramePatchData.moveAndReset(data, size, storage);
  121. PatchHeader* datah;
  122. m_crntFramePatchHeaders.moveAndReset(datah, size, storage);
  123. }
  124. } // end namespace anki