GrAsyncCompute.cpp 12 KB


  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <Tests/Framework/Framework.h>
  6. #include <Tests/Gr/GrCommon.h>
  7. #include <AnKi/Gr.h>
  8. #include <AnKi/Util/MemoryPool.h>
  9. #include <AnKi/Util/HighRezTimer.h>
  10. using namespace anki;
  11. static void generateSphere(DynamicArray<Vec3>& positions, DynamicArray<UVec3>& indices, U32 sliceCount, U32 stackCount)
  12. {
  13. positions.emplaceBack(0.0f, 1.0f, 0.0f);
  14. const U32 v0 = 0;
  15. // generate vertices per stack / slice
  16. for(U32 i = 0u; i < stackCount - 1; i++)
  17. {
  18. const F32 phi = kPi * (i + 1) / stackCount;
  19. for(F32 j = 0u; j < sliceCount; j++)
  20. {
  21. const F32 theta = 2.0f * kPi * F32(j) / sliceCount;
  22. const F32 x = sin(phi) * cos(theta);
  23. const F32 y = cos(phi);
  24. const F32 z = sin(phi) * sin(theta);
  25. positions.emplaceBack(x, y, z);
  26. }
  27. }
  28. // add bottom vertex
  29. positions.emplaceBack(0.0f, -1.0f, 0.0f);
  30. const U32 v1 = U32(positions.getSize() - 1);
  31. // add top / bottom triangles
  32. for(auto i = 0u; i < sliceCount; ++i)
  33. {
  34. auto i0 = i + 1;
  35. auto i1 = (i + 1) % sliceCount + 1;
  36. indices.emplaceBack(v0, i1, i0);
  37. i0 = i + sliceCount * (stackCount - 2) + 1;
  38. i1 = (i + 1) % sliceCount + sliceCount * (stackCount - 2) + 1;
  39. indices.emplaceBack(v1, i0, i1);
  40. }
  41. // add quads per stack / slice
  42. for(U32 j = 0u; j < stackCount - 2; j++)
  43. {
  44. const U32 j0 = j * sliceCount + 1;
  45. const U32 j1 = (j + 1) * sliceCount + 1;
  46. for(U32 i = 0u; i < sliceCount; i++)
  47. {
  48. const U32 i0 = j0 + i;
  49. const U32 i1 = j0 + (i + 1) % sliceCount;
  50. const U32 i2 = j1 + (i + 1) % sliceCount;
  51. const U32 i3 = j1 + i;
  52. indices.emplaceBack(i0, i1, i2);
  53. indices.emplaceBack(i0, i2, i3);
  54. }
  55. }
  56. }
  57. ANKI_TEST(Gr, AsyncComputeBench)
  58. {
  59. const Bool useAsyncQueue = true;
  60. const Bool runConcurently = true;
  61. const U32 spheresToDrawPerDimension = 100;
  62. const U32 windowSize = 512;
  63. g_validationCVar = false; // TODO
  64. g_debugMarkersCVar = false;
  65. g_windowWidthCVar = windowSize;
  66. g_windowHeightCVar = windowSize;
  67. g_asyncComputeCVar = 0;
  68. DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
  69. ShaderCompilerMemoryPool::allocateSingleton(allocAligned, nullptr);
  70. initWindow();
  71. initGrManager();
  72. Input::allocateSingleton();
  73. {
  74. const CString computeShaderSrc = R"(
  75. RWTexture2D<float4> g_inTex : register(u0);
  76. RWTexture2D<float4> g_outTex : register(u1);
  77. [NumThreads(8, 8, 1)] void main(uint2 svDispatchThreadId : SV_DispatchThreadID)
  78. {
  79. uint2 texSize;
  80. g_inTex.GetDimensions(texSize.x, texSize.y);
  81. float4 val = 0.0;
  82. for(int x = -9; x <= 9; ++x)
  83. {
  84. for(int y = -9; y <= 9; ++y)
  85. {
  86. int2 coord = int2(svDispatchThreadId) + int2(x, y);
  87. if(coord.x < 0 || coord.y < 0 || coord.x >= texSize.x || coord.y >= texSize.y)
  88. {
  89. continue;
  90. }
  91. val += g_inTex[coord];
  92. }
  93. }
  94. g_outTex[svDispatchThreadId] = val;
  95. })";
  96. const CString vertShaderSrc = R"(
  97. struct Consts
  98. {
  99. float3 m_worldPosition;
  100. float m_scale;
  101. float4x4 m_viewProjMat;
  102. };
  103. #if defined(__spirv__)
  104. [[vk::push_constant]] ConstantBuffer<Consts> g_consts;
  105. #else
  106. ConstantBuffer<Consts> g_consts : register(b0, space3000);
  107. #endif
  108. float4 main(float3 svPosition : POSITION) : SV_Position
  109. {
  110. return mul(g_consts.m_viewProjMat, float4(svPosition * g_consts.m_scale + g_consts.m_worldPosition, 1.0));
  111. })";
  112. const CString pixelShaderSrc = R"(
  113. float4 main() : SV_Target0
  114. {
  115. return float4(1.0, 0.0, 0.5, 0.0);
  116. })";
  117. const CString blitVertShader = R"(
  118. struct VertOut
  119. {
  120. float4 m_svPosition : SV_POSITION;
  121. float2 m_uv : TEXCOORD;
  122. };
  123. VertOut main(uint vertId : SV_VERTEXID)
  124. {
  125. const float2 coord = float2(vertId >> 1, vertId & 1);
  126. VertOut output;
  127. output.m_svPosition = float4(coord * float2(4.0, -4.0) + float2(-1.0, 1.0), 0.0, 1.0);
  128. output.m_uv = coord * 2.0f;
  129. return output;
  130. })";
  131. const CString blitPixelShader = R"(
  132. struct VertOut
  133. {
  134. float4 m_svPosition : SV_POSITION;
  135. float2 m_uv : TEXCOORD;
  136. };
  137. Texture2D g_inTex : register(t0);
  138. SamplerState g_sampler : register(s0);
  139. float4 main(VertOut input) : SV_Target0
  140. {
  141. return g_inTex.Sample(g_sampler, input.m_uv);
  142. })";
  143. ShaderProgramPtr compProg = createComputeProg(computeShaderSrc);
  144. ShaderProgramPtr graphicsProg = createVertFragProg(vertShaderSrc, pixelShaderSrc);
  145. ShaderProgramPtr blitProg = createVertFragProg(blitVertShader, blitPixelShader);
  146. DynamicArray<Vec3> positions;
  147. DynamicArray<UVec3> indices;
  148. generateSphere(positions, indices, 50, 50);
  149. BufferPtr posBuff = createBuffer(BufferUsageBit::kVertexOrIndex, ConstWeakArray(positions), "PosBuffer");
  150. BufferPtr indexBuff = createBuffer(BufferUsageBit::kVertexOrIndex, ConstWeakArray(indices), "IdxBuffer");
  151. TextureInitInfo texInit("Tex");
  152. texInit.m_width = texInit.m_height = 2048;
  153. texInit.m_format = Format::kR32G32B32A32_Sfloat;
  154. texInit.m_usage = TextureUsageBit::kUavCompute;
  155. TexturePtr inTex = createTexture2d(texInit, Vec4(0.5f));
  156. TexturePtr outTex = createTexture2d(texInit, Vec4(0.1f));
  157. {
  158. CommandBufferInitInfo cinit;
  159. cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
  160. CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  161. const TextureBarrierInfo barrier2 = {TextureView(inTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kCopyDestination,
  162. TextureUsageBit::kUavCompute};
  163. cmdb->setPipelineBarrier({&barrier2, 1}, {}, {});
  164. cmdb->endRecording();
  165. FencePtr fence;
  166. GrManager::getSingleton().submit(cmdb.get(), {}, &fence);
  167. fence->clientWait(kMaxSecond);
  168. }
  169. TextureInitInfo texInit2("RT");
  170. texInit2.m_width = texInit2.m_height = windowSize;
  171. texInit2.m_format = Format::kR32G32B32A32_Sfloat;
  172. texInit2.m_usage = TextureUsageBit::kRtvDsvWrite | TextureUsageBit::kSrvPixel;
  173. TexturePtr rtTex = createTexture2d(texInit2, Vec4(0.5f));
  174. SamplerInitInfo samplerInit("sampler");
  175. SamplerPtr sampler = GrManager::getSingleton().newSampler(samplerInit);
  176. Array<TimestampQueryPtr, 2> startTimestamps = {GrManager::getSingleton().newTimestampQuery(), GrManager::getSingleton().newTimestampQuery()};
  177. TimestampQueryPtr endTimestamp = GrManager::getSingleton().newTimestampQuery();
  178. FencePtr finalFence;
  179. const U32 iterationCount = 1000;
  180. for(U32 i = 0; i < iterationCount; ++i)
  181. {
  182. ANKI_TEST_EXPECT_NO_ERR(Input::getSingleton().handleEvents());
  183. TexturePtr presentTex = GrManager::getSingleton().acquireNextPresentableTexture();
  184. // Init command buffers
  185. CommandBufferInitInfo cinit;
  186. cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
  187. CommandBufferPtr gfxCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  188. CommandBufferPtr compCmdb;
  189. if(useAsyncQueue)
  190. {
  191. CommandBufferInitInfo cinit;
  192. cinit.m_flags = CommandBufferFlag::kComputeWork | CommandBufferFlag::kSmallBatch;
  193. compCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  194. }
  195. else
  196. {
  197. compCmdb = gfxCmdb;
  198. }
  199. CommandBufferPtr blitCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  200. // Barriers
  201. {
  202. const TextureBarrierInfo rtBarrier = {TextureView(rtTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
  203. TextureUsageBit::kRtvDsvWrite};
  204. gfxCmdb->setPipelineBarrier({&rtBarrier, 1}, {}, {});
  205. const TextureBarrierInfo uavBarrier = {TextureView(outTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
  206. TextureUsageBit::kUavCompute};
  207. compCmdb->setPipelineBarrier({&uavBarrier, 1}, {}, {});
  208. const TextureBarrierInfo blitBarrier = {TextureView(presentTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
  209. TextureUsageBit::kRtvDsvWrite};
  210. blitCmdb->setPipelineBarrier({&blitBarrier, 1}, {}, {});
  211. }
  212. // Compute dispatch
  213. {
  214. if(i == 0)
  215. {
  216. compCmdb->writeTimestamp(startTimestamps[0].get());
  217. }
  218. compCmdb->bindShaderProgram(compProg.get());
  219. compCmdb->bindUav(0, 0, TextureView(inTex.get(), TextureSubresourceDesc::all()));
  220. compCmdb->bindUav(1, 0, TextureView(outTex.get(), TextureSubresourceDesc::all()));
  221. compCmdb->dispatchCompute(inTex->getWidth() / 8, inTex->getHeight() / 8, 1);
  222. }
  223. // Draw spheres
  224. {
  225. if(i == 0)
  226. {
  227. compCmdb->writeTimestamp(startTimestamps[1].get());
  228. }
  229. RenderTarget rt;
  230. rt.m_textureView = TextureView(rtTex.get(), TextureSubresourceDesc::all());
  231. rt.m_loadOperation = RenderTargetLoadOperation::kClear;
  232. rt.m_clearValue.m_colorf = {getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), 1.0f};
  233. gfxCmdb->beginRenderPass({rt});
  234. gfxCmdb->bindVertexBuffer(0, BufferView(posBuff.get()), sizeof(Vec3));
  235. gfxCmdb->setVertexAttribute(VertexAttributeSemantic::kPosition, 0, Format::kR32G32B32_Sfloat, 0);
  236. gfxCmdb->bindIndexBuffer(BufferView(indexBuff.get()), IndexType::kU32);
  237. gfxCmdb->bindShaderProgram(graphicsProg.get());
  238. gfxCmdb->setViewport(0, 0, windowSize, windowSize);
  239. struct Consts
  240. {
  241. Vec3 m_worldPosition;
  242. F32 m_scale;
  243. Mat4 m_viewProjMat;
  244. } consts;
  245. constexpr F32 orthoHalfSize = 10.0f;
  246. constexpr F32 orthoSize = orthoHalfSize * 2.0f;
  247. const Mat4 viewMat = Mat4::getIdentity().invert();
  248. const Mat4 projMat =
  249. Mat4::calculateOrthographicProjectionMatrix(orthoHalfSize, -orthoHalfSize, orthoHalfSize, -orthoHalfSize, 0.1f, 200.0f);
  250. consts.m_viewProjMat = projMat * viewMat;
  251. consts.m_scale = 0.07f;
  252. for(U32 x = 0; x < spheresToDrawPerDimension; ++x)
  253. {
  254. for(U32 y = 0; y < spheresToDrawPerDimension; ++y)
  255. {
  256. consts.m_worldPosition = Vec3(F32(x) / (spheresToDrawPerDimension - 1) * orthoSize - orthoHalfSize,
  257. F32(y) / (spheresToDrawPerDimension - 1) * orthoSize - orthoHalfSize, -1.0f);
  258. gfxCmdb->setFastConstants(&consts, sizeof(consts));
  259. gfxCmdb->drawIndexed(PrimitiveTopology::kTriangles, U32(indexBuff->getSize() / sizeof(U32)));
  260. }
  261. }
  262. gfxCmdb->endRenderPass();
  263. }
  264. // Blit
  265. {
  266. const TextureBarrierInfo blitBarrier = {TextureView(rtTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kRtvDsvWrite,
  267. TextureUsageBit::kSrvPixel};
  268. blitCmdb->setPipelineBarrier({&blitBarrier, 1}, {}, {});
  269. RenderTarget rt;
  270. rt.m_textureView = TextureView(presentTex.get(), TextureSubresourceDesc::all());
  271. rt.m_loadOperation = RenderTargetLoadOperation::kDontCare;
  272. rt.m_clearValue.m_colorf = {getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), 1.0f};
  273. blitCmdb->beginRenderPass({rt});
  274. blitCmdb->bindShaderProgram(blitProg.get());
  275. blitCmdb->bindSrv(0, 0, TextureView(rtTex.get(), TextureSubresourceDesc::all()));
  276. blitCmdb->bindSampler(0, 0, sampler.get());
  277. blitCmdb->setViewport(0, 0, windowSize, windowSize);
  278. blitCmdb->draw(PrimitiveTopology::kTriangles, 3);
  279. blitCmdb->endRenderPass();
  280. const TextureBarrierInfo presentBarrier = {TextureView(presentTex.get(), TextureSubresourceDesc::all()),
  281. TextureUsageBit::kRtvDsvWrite, TextureUsageBit::kPresent};
  282. blitCmdb->setPipelineBarrier({&presentBarrier, 1}, {}, {});
  283. if(i == iterationCount - 1)
  284. {
  285. compCmdb->writeTimestamp(endTimestamp.get());
  286. }
  287. }
  288. gfxCmdb->endRecording();
  289. blitCmdb->endRecording();
  290. if(useAsyncQueue)
  291. {
  292. compCmdb->endRecording();
  293. }
  294. if(useAsyncQueue)
  295. {
  296. WeakArray<Fence*> firstWaveWaitFences;
  297. Array<Fence*, 1> arr;
  298. if(finalFence.isCreated())
  299. {
  300. arr = {finalFence.get()};
  301. firstWaveWaitFences = {arr};
  302. }
  303. FencePtr fence2;
  304. GrManager::getSingleton().submit(compCmdb.get(), firstWaveWaitFences, &fence2);
  305. FencePtr fence1;
  306. GrManager::getSingleton().submit(gfxCmdb.get(), firstWaveWaitFences, &fence1);
  307. Array<Fence*, 2> waitFences = {{fence1.get(), fence2.get()}};
  308. GrManager::getSingleton().submit(blitCmdb.get(), {waitFences}, &finalFence);
  309. }
  310. else
  311. {
  312. GrManager::getSingleton().submit(gfxCmdb.get());
  313. GrManager::getSingleton().submit(blitCmdb.get(), {}, &finalFence);
  314. }
  315. GrManager::getSingleton().swapBuffers();
  316. }
  317. finalFence->clientWait(kMaxSecond);
  318. Array<Second, 2> startTime;
  319. ANKI_TEST_EXPECT_EQ(startTimestamps[0]->getResult(startTime[0]), TimestampQueryResult::kAvailable);
  320. ANKI_TEST_EXPECT_EQ(startTimestamps[1]->getResult(startTime[1]), TimestampQueryResult::kAvailable);
  321. Second endTime;
  322. ANKI_TEST_EXPECT_EQ(endTimestamp->getResult(endTime), TimestampQueryResult::kAvailable);
  323. ANKI_TEST_LOGI("GPU time %f\n", endTime - min(startTime[0], startTime[1]));
  324. }
  325. Input::freeSingleton();
  326. GrManager::freeSingleton();
  327. NativeWindow::freeSingleton();
  328. ShaderCompilerMemoryPool::freeSingleton();
  329. DefaultMemoryPool::freeSingleton();
  330. }