// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE #include #include #include #include #include namespace anki { ANKI_SVAR(GpuSceneBufferAllocatedSize, StatCategory::kGpuMem, "GPU scene allocated", StatFlag::kBytes | StatFlag::kMainThreadUpdates) ANKI_SVAR(GpuSceneBufferTotal, StatCategory::kGpuMem, "GPU scene total", StatFlag::kBytes | StatFlag::kMainThreadUpdates) ANKI_SVAR(GpuSceneBufferFragmentation, StatCategory::kGpuMem, "GPU scene fragmentation", StatFlag::kFloat | StatFlag::kMainThreadUpdates); void GpuSceneBuffer::init() { const PtrSize poolSize = g_cvarCoreGpuSceneInitialSize; const Array classes = {32_B, 64_B, 128_B, 256_B, poolSize}; BufferUsageBit buffUsage = BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv | BufferUsageBit::kCopyDestination; m_pool.init(buffUsage, classes, poolSize, "GpuScene", true); // Allocate something dummy to force creating the GPU buffer GpuSceneBufferAllocation alloc = allocate(16, 4); deferredFree(alloc); } void GpuSceneBuffer::updateStats() const { F32 externalFragmentation; PtrSize userAllocatedSize, totalSize; m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize); g_svarGpuSceneBufferAllocatedSize.set(userAllocatedSize); g_svarGpuSceneBufferTotal.set(totalSize); g_svarGpuSceneBufferFragmentation.set(externalFragmentation); } // It packs the source and destination offsets as well as the size of the patch itself. Needs to match the HLSL structure class GpuSceneMicroPatcher::PatchHeader { public: U32 m_dwordSizeMinusOne : kDwordsPerPatchBitCount; U32 m_srcDwordOffset : 32 - kDwordsPerPatchBitCount; U32 m_dstDwordOffset; }; GpuSceneMicroPatcher::GpuSceneMicroPatcher() { } GpuSceneMicroPatcher::~GpuSceneMicroPatcher() { static_assert(sizeof(PatchHeader) == 8); } Error GpuSceneMicroPatcher::init() { ANKI_CHECK(ResourceManager::getSingleton().loadResource("ShaderBinaries/GpuSceneMicroPatching.ankiprogbin", m_copyProgram)); ShaderProgramResourceVariantInitInfo varInit(m_copyProgram); const ShaderProgramResourceVariant* variant; m_copyProgram->getOrCreateVariant(varInit, variant); m_grProgram.reset(&variant->getProgram()); m_stackMemPool.init(CoreMemoryPool::getSingleton().getAllocationCallback(), CoreMemoryPool::getSingleton().getAllocationCallbackUserData(), 512_KB); return Error::kNone; } void GpuSceneMicroPatcher::beginPatching() { ANKI_ASSERT(m_bPatchingMode.fetchAdd(1) == 0); m_stackMemPool.reset(); m_crntFramePatchHeaders = DynamicArray>(&m_stackMemPool); m_crntFramePatchData = DynamicArray>(&m_stackMemPool); } void GpuSceneMicroPatcher::newCopy(PtrSize gpuSceneDestOffset, PtrSize dataSize, const void* data) { ANKI_ASSERT(m_bPatchingMode.load() == 1); ANKI_ASSERT(dataSize > 0 && (dataSize % 4) == 0); ANKI_ASSERT((ptrToNumber(data) % 4) == 0); ANKI_ASSERT((gpuSceneDestOffset % 4) == 0 && gpuSceneDestOffset / 4 < kMaxU32); ANKI_ASSERT(gpuSceneDestOffset + dataSize <= GpuSceneBuffer::getSingleton().getBufferView().getRange()); const U32 dataDwords = U32(dataSize / 4); U32 gpuSceneDestDwordOffset = U32(gpuSceneDestOffset / 4); const U32* patchIt = static_cast(data); const U32* const patchEnd = patchIt + dataDwords; // Break the data into multiple copies LockGuard lock(m_mtx); while(patchIt < patchEnd) { const U32 patchDwords = min(kDwordsPerPatch, U32(patchEnd - patchIt)); PatchHeader& header = *m_crntFramePatchHeaders.emplaceBack(); ANKI_ASSERT(((patchDwords - 1) & 0b111111) == (patchDwords - 1)); header.m_dwordSizeMinusOne = patchDwords - 1; ANKI_ASSERT((m_crntFramePatchData.getSize() & 0x3FFFFFF) == m_crntFramePatchData.getSize()); header.m_srcDwordOffset = m_crntFramePatchData.getSize(); header.m_dstDwordOffset = gpuSceneDestDwordOffset; const U32 srcOffset = m_crntFramePatchData.getSize(); m_crntFramePatchData.resize(srcOffset + patchDwords); memcpy(&m_crntFramePatchData[srcOffset], patchIt, patchDwords * 4); patchIt += patchDwords; gpuSceneDestDwordOffset += patchDwords; } } void GpuSceneMicroPatcher::patchGpuScene(CommandBuffer& cmdb) { ANKI_ASSERT(m_bPatchingMode.load() == 0); if(m_crntFramePatchHeaders.getSize() == 0) { return; } ANKI_ASSERT(m_crntFramePatchData.getSize() > 0); ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize()); ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes()); WeakArray mapped; const BufferView headersBuff = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(m_crntFramePatchHeaders.getSize(), mapped); memcpy(mapped.getBegin(), m_crntFramePatchHeaders.getBegin(), m_crntFramePatchHeaders.getSizeInBytes()); WeakArray mapped2; const BufferView dataBuff = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(m_crntFramePatchData.getSize(), mapped2); memcpy(mapped2.getBegin(), m_crntFramePatchData.getBegin(), m_crntFramePatchData.getSizeInBytes()); cmdb.bindSrv(0, 0, headersBuff); cmdb.bindSrv(1, 0, dataBuff); cmdb.bindUav(0, 0, BufferView(&GpuSceneBuffer::getSingleton().getBuffer())); cmdb.bindShaderProgram(m_grProgram.get()); const U32 workgroupCountX = m_crntFramePatchHeaders.getSize(); cmdb.dispatchCompute(workgroupCountX, 1, 1); // Cleanup to prepare for the new frame U32* data; U32 size, storage; m_crntFramePatchData.moveAndReset(data, size, storage); PatchHeader* datah; m_crntFramePatchHeaders.moveAndReset(datah, size, storage); } } // end namespace anki