GrAsyncCompute.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <Tests/Framework/Framework.h>
  6. #include <Tests/Gr/GrCommon.h>
  7. #include <AnKi/Gr.h>
  8. #include <AnKi/Util/MemoryPool.h>
  9. #include <AnKi/Util/HighRezTimer.h>
  10. using namespace anki;
  11. static void generateSphere(DynamicArray<Vec3>& positions, DynamicArray<UVec3>& indices, U32 sliceCount, U32 stackCount)
  12. {
  13. const F32 stackCountf = F32(stackCount);
  14. const F32 sliceCountf = F32(sliceCount);
  15. positions.emplaceBack(0.0f, 1.0f, 0.0f);
  16. const U32 v0 = 0;
  17. // generate vertices per stack / slice
  18. for(F32 i = 0.0f; i < stackCountf - 1.0f; i += 1.0f)
  19. {
  20. const F32 phi = kPi * (i + 1.0f) / stackCountf;
  21. for(F32 j = 0.0f; j < sliceCountf; j += 1.0f)
  22. {
  23. const F32 theta = 2.0f * kPi * F32(j) / sliceCountf;
  24. const F32 x = sin(phi) * cos(theta);
  25. const F32 y = cos(phi);
  26. const F32 z = sin(phi) * sin(theta);
  27. positions.emplaceBack(x, y, z);
  28. }
  29. }
  30. // add bottom vertex
  31. positions.emplaceBack(0.0f, -1.0f, 0.0f);
  32. const U32 v1 = U32(positions.getSize() - 1);
  33. // add top / bottom triangles
  34. for(auto i = 0u; i < sliceCount; ++i)
  35. {
  36. auto i0 = i + 1;
  37. auto i1 = (i + 1) % sliceCount + 1;
  38. indices.emplaceBack(v0, i1, i0);
  39. i0 = i + sliceCount * (stackCount - 2) + 1;
  40. i1 = (i + 1) % sliceCount + sliceCount * (stackCount - 2) + 1;
  41. indices.emplaceBack(v1, i0, i1);
  42. }
  43. // add quads per stack / slice
  44. for(U32 j = 0u; j < stackCount - 2; j++)
  45. {
  46. const U32 j0 = j * sliceCount + 1;
  47. const U32 j1 = (j + 1) * sliceCount + 1;
  48. for(U32 i = 0u; i < sliceCount; i++)
  49. {
  50. const U32 i0 = j0 + i;
  51. const U32 i1 = j0 + (i + 1) % sliceCount;
  52. const U32 i2 = j1 + (i + 1) % sliceCount;
  53. const U32 i3 = j1 + i;
  54. indices.emplaceBack(i0, i1, i2);
  55. indices.emplaceBack(i0, i2, i3);
  56. }
  57. }
  58. }
  59. ANKI_TEST(Gr, AsyncComputeBench)
  60. {
  61. const Bool useAsyncQueue = true;
  62. const U32 spheresToDrawPerDimension = 100;
  63. const U32 windowSize = 512;
  64. g_cvarGrValidation = false; // TODO
  65. g_cvarGrDebugMarkers = false;
  66. g_cvarWindowWidth = windowSize;
  67. g_cvarWindowHeight = windowSize;
  68. g_cvarGrAsyncCompute = 0;
  69. DefaultMemoryPool::allocateSingleton(allocAligned, nullptr);
  70. ShaderCompilerMemoryPool::allocateSingleton(allocAligned, nullptr);
  71. initWindow();
  72. initGrManager();
  73. Input::allocateSingleton();
  74. {
  75. const CString computeShaderSrc = R"(
  76. RWTexture2D<float4> g_inTex : register(u0);
  77. RWTexture2D<float4> g_outTex : register(u1);
  78. [numthreads(8, 8, 1)] void main(uint2 svDispatchThreadId : SV_DISPATCHTHREADID)
  79. {
  80. uint2 texSize;
  81. g_inTex.GetDimensions(texSize.x, texSize.y);
  82. float4 val = 0.0;
  83. for(int x = -9; x <= 9; ++x)
  84. {
  85. for(int y = -9; y <= 9; ++y)
  86. {
  87. int2 coord = int2(svDispatchThreadId) + int2(x, y);
  88. if(coord.x < 0 || coord.y < 0 || coord.x >= texSize.x || coord.y >= texSize.y)
  89. {
  90. continue;
  91. }
  92. val += g_inTex[coord];
  93. }
  94. }
  95. g_outTex[svDispatchThreadId] = val;
  96. })";
  97. const CString vertShaderSrc = R"(
  98. struct Consts
  99. {
  100. float3 m_worldPosition;
  101. float m_scale;
  102. float4x4 m_viewProjMat;
  103. };
  104. #if defined(__spirv__)
  105. [[vk::push_constant]] ConstantBuffer<Consts> g_consts;
  106. #else
  107. ConstantBuffer<Consts> g_consts : register(b0, space3000);
  108. #endif
  109. float4 main(float3 svPosition : POSITION) : SV_POSITION
  110. {
  111. return mul(g_consts.m_viewProjMat, float4(svPosition * g_consts.m_scale + g_consts.m_worldPosition, 1.0));
  112. })";
  113. const CString pixelShaderSrc = R"(
  114. float4 main() : SV_TARGET0
  115. {
  116. return float4(1.0, 0.0, 0.5, 0.0);
  117. })";
  118. const CString blitVertShader = R"(
  119. struct VertOut
  120. {
  121. float4 m_svPosition : SV_POSITION;
  122. float2 m_uv : TEXCOORD;
  123. };
  124. VertOut main(uint vertId : SV_VERTEXID)
  125. {
  126. const float2 coord = float2(vertId >> 1, vertId & 1);
  127. VertOut output;
  128. output.m_svPosition = float4(coord * float2(4.0, -4.0) + float2(-1.0, 1.0), 0.0, 1.0);
  129. output.m_uv = coord * 2.0f;
  130. return output;
  131. })";
  132. const CString blitPixelShader = R"(
  133. struct VertOut
  134. {
  135. float4 m_svPosition : SV_POSITION;
  136. float2 m_uv : TEXCOORD;
  137. };
  138. Texture2D g_inTex : register(t0);
  139. SamplerState g_sampler : register(s0);
  140. float4 main(VertOut input) : SV_TARGET0
  141. {
  142. return g_inTex.Sample(g_sampler, input.m_uv);
  143. })";
  144. ShaderProgramPtr compProg = createComputeProg(computeShaderSrc);
  145. ShaderProgramPtr graphicsProg = createVertFragProg(vertShaderSrc, pixelShaderSrc);
  146. ShaderProgramPtr blitProg = createVertFragProg(blitVertShader, blitPixelShader);
  147. DynamicArray<Vec3> positions;
  148. DynamicArray<UVec3> indices;
  149. generateSphere(positions, indices, 50, 50);
  150. BufferPtr posBuff = createBuffer(BufferUsageBit::kVertexOrIndex, ConstWeakArray(positions), "PosBuffer");
  151. BufferPtr indexBuff = createBuffer(BufferUsageBit::kVertexOrIndex, ConstWeakArray(indices), "IdxBuffer");
  152. TextureInitInfo texInit("Tex");
  153. texInit.m_width = texInit.m_height = 2048;
  154. texInit.m_format = Format::kR32G32B32A32_Sfloat;
  155. texInit.m_usage = TextureUsageBit::kUavCompute;
  156. TexturePtr inTex = createTexture2d(texInit, Vec4(0.5f));
  157. TexturePtr outTex = createTexture2d(texInit, Vec4(0.1f));
  158. {
  159. CommandBufferInitInfo cinit;
  160. cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
  161. CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  162. const TextureBarrierInfo barrier2 = {TextureView(inTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kCopyDestination,
  163. TextureUsageBit::kUavCompute};
  164. cmdb->setPipelineBarrier({&barrier2, 1}, {}, {});
  165. cmdb->endRecording();
  166. FencePtr fence;
  167. GrManager::getSingleton().submit(cmdb.get(), {}, &fence);
  168. fence->clientWait(kMaxSecond);
  169. }
  170. TextureInitInfo texInit2("RT");
  171. texInit2.m_width = texInit2.m_height = windowSize;
  172. texInit2.m_format = Format::kR32G32B32A32_Sfloat;
  173. texInit2.m_usage = TextureUsageBit::kRtvDsvWrite | TextureUsageBit::kSrvPixel;
  174. TexturePtr rtTex = createTexture2d(texInit2, Vec4(0.5f));
  175. SamplerInitInfo samplerInit("sampler");
  176. SamplerPtr sampler = GrManager::getSingleton().newSampler(samplerInit);
  177. Array<TimestampQueryPtr, 2> startTimestamps = {GrManager::getSingleton().newTimestampQuery(), GrManager::getSingleton().newTimestampQuery()};
  178. TimestampQueryPtr endTimestamp = GrManager::getSingleton().newTimestampQuery();
  179. FencePtr finalFence;
  180. const U32 iterationCount = 1000;
  181. for(U32 i = 0; i < iterationCount; ++i)
  182. {
  183. ANKI_TEST_EXPECT_NO_ERR(Input::getSingleton().handleEvents());
  184. GrManager::getSingleton().beginFrame();
  185. TexturePtr presentTex = GrManager::getSingleton().acquireNextPresentableTexture();
  186. // Init command buffers
  187. CommandBufferInitInfo cinit;
  188. cinit.m_flags = CommandBufferFlag::kGeneralWork | CommandBufferFlag::kSmallBatch;
  189. CommandBufferPtr gfxCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  190. CommandBufferPtr compCmdb;
  191. if(useAsyncQueue)
  192. {
  193. CommandBufferInitInfo cinit;
  194. cinit.m_flags = CommandBufferFlag::kComputeWork | CommandBufferFlag::kSmallBatch;
  195. compCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  196. }
  197. else
  198. {
  199. compCmdb = gfxCmdb;
  200. }
  201. CommandBufferPtr blitCmdb = GrManager::getSingleton().newCommandBuffer(cinit);
  202. // Barriers
  203. {
  204. const TextureBarrierInfo rtBarrier = {TextureView(rtTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
  205. TextureUsageBit::kRtvDsvWrite};
  206. gfxCmdb->setPipelineBarrier({&rtBarrier, 1}, {}, {});
  207. const TextureBarrierInfo uavBarrier = {TextureView(outTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
  208. TextureUsageBit::kUavCompute};
  209. compCmdb->setPipelineBarrier({&uavBarrier, 1}, {}, {});
  210. const TextureBarrierInfo blitBarrier = {TextureView(presentTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kNone,
  211. TextureUsageBit::kRtvDsvWrite};
  212. blitCmdb->setPipelineBarrier({&blitBarrier, 1}, {}, {});
  213. }
  214. // Compute dispatch
  215. {
  216. if(i == 0)
  217. {
  218. compCmdb->writeTimestamp(startTimestamps[0].get());
  219. }
  220. compCmdb->bindShaderProgram(compProg.get());
  221. compCmdb->bindUav(0, 0, TextureView(inTex.get(), TextureSubresourceDesc::all()));
  222. compCmdb->bindUav(1, 0, TextureView(outTex.get(), TextureSubresourceDesc::all()));
  223. compCmdb->dispatchCompute(inTex->getWidth() / 8, inTex->getHeight() / 8, 1);
  224. }
  225. // Draw spheres
  226. {
  227. if(i == 0)
  228. {
  229. compCmdb->writeTimestamp(startTimestamps[1].get());
  230. }
  231. RenderTarget rt;
  232. rt.m_textureView = TextureView(rtTex.get(), TextureSubresourceDesc::all());
  233. rt.m_loadOperation = RenderTargetLoadOperation::kClear;
  234. rt.m_clearValue.m_colorf = {getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), 1.0f};
  235. gfxCmdb->beginRenderPass({rt});
  236. gfxCmdb->bindVertexBuffer(0, BufferView(posBuff.get()), sizeof(Vec3));
  237. gfxCmdb->setVertexAttribute(VertexAttributeSemantic::kPosition, 0, Format::kR32G32B32_Sfloat, 0);
  238. gfxCmdb->bindIndexBuffer(BufferView(indexBuff.get()), IndexType::kU32);
  239. gfxCmdb->bindShaderProgram(graphicsProg.get());
  240. gfxCmdb->setViewport(0, 0, windowSize, windowSize);
  241. struct Consts
  242. {
  243. Vec3 m_worldPosition;
  244. F32 m_scale;
  245. Mat4 m_viewProjMat;
  246. } consts;
  247. constexpr F32 orthoHalfSize = 10.0f;
  248. constexpr F32 orthoSize = orthoHalfSize * 2.0f;
  249. const Mat4 viewMat = Mat4::getIdentity().invert();
  250. const Mat4 projMat =
  251. Mat4::calculateOrthographicProjectionMatrix(orthoHalfSize, -orthoHalfSize, orthoHalfSize, -orthoHalfSize, 0.1f, 200.0f);
  252. consts.m_viewProjMat = projMat * viewMat;
  253. consts.m_scale = 0.07f;
  254. for(U32 x = 0; x < spheresToDrawPerDimension; ++x)
  255. {
  256. for(U32 y = 0; y < spheresToDrawPerDimension; ++y)
  257. {
  258. consts.m_worldPosition = Vec3(F32(x) / (spheresToDrawPerDimension - 1) * orthoSize - orthoHalfSize,
  259. F32(y) / (spheresToDrawPerDimension - 1) * orthoSize - orthoHalfSize, -1.0f);
  260. gfxCmdb->setFastConstants(&consts, sizeof(consts));
  261. gfxCmdb->drawIndexed(PrimitiveTopology::kTriangles, U32(indexBuff->getSize() / sizeof(U32)));
  262. }
  263. }
  264. gfxCmdb->endRenderPass();
  265. }
  266. // Blit
  267. {
  268. const TextureBarrierInfo blitBarrier = {TextureView(rtTex.get(), TextureSubresourceDesc::all()), TextureUsageBit::kRtvDsvWrite,
  269. TextureUsageBit::kSrvPixel};
  270. blitCmdb->setPipelineBarrier({&blitBarrier, 1}, {}, {});
  271. RenderTarget rt;
  272. rt.m_textureView = TextureView(presentTex.get(), TextureSubresourceDesc::all());
  273. rt.m_loadOperation = RenderTargetLoadOperation::kDontCare;
  274. rt.m_clearValue.m_colorf = {getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), getRandomRange(0.0f, 1.0f), 1.0f};
  275. blitCmdb->beginRenderPass({rt});
  276. blitCmdb->bindShaderProgram(blitProg.get());
  277. blitCmdb->bindSrv(0, 0, TextureView(rtTex.get(), TextureSubresourceDesc::all()));
  278. blitCmdb->bindSampler(0, 0, sampler.get());
  279. blitCmdb->setViewport(0, 0, windowSize, windowSize);
  280. blitCmdb->draw(PrimitiveTopology::kTriangles, 3);
  281. blitCmdb->endRenderPass();
  282. const TextureBarrierInfo presentBarrier = {TextureView(presentTex.get(), TextureSubresourceDesc::all()),
  283. TextureUsageBit::kRtvDsvWrite, TextureUsageBit::kPresent};
  284. blitCmdb->setPipelineBarrier({&presentBarrier, 1}, {}, {});
  285. if(i == iterationCount - 1)
  286. {
  287. compCmdb->writeTimestamp(endTimestamp.get());
  288. }
  289. }
  290. gfxCmdb->endRecording();
  291. blitCmdb->endRecording();
  292. if(useAsyncQueue)
  293. {
  294. compCmdb->endRecording();
  295. }
  296. if(useAsyncQueue)
  297. {
  298. WeakArray<Fence*> firstWaveWaitFences;
  299. Array<Fence*, 1> arr;
  300. if(finalFence.isCreated())
  301. {
  302. arr = {finalFence.get()};
  303. firstWaveWaitFences = {arr};
  304. }
  305. FencePtr fence2;
  306. GrManager::getSingleton().submit(compCmdb.get(), firstWaveWaitFences, &fence2);
  307. FencePtr fence1;
  308. GrManager::getSingleton().submit(gfxCmdb.get(), firstWaveWaitFences, &fence1);
  309. Array<Fence*, 2> waitFences = {{fence1.get(), fence2.get()}};
  310. GrManager::getSingleton().submit(blitCmdb.get(), {waitFences}, &finalFence);
  311. }
  312. else
  313. {
  314. GrManager::getSingleton().submit(gfxCmdb.get());
  315. GrManager::getSingleton().submit(blitCmdb.get(), {}, &finalFence);
  316. }
  317. GrManager::getSingleton().endFrame();
  318. }
  319. finalFence->clientWait(kMaxSecond);
  320. Array<Second, 2> startTime;
  321. ANKI_TEST_EXPECT_EQ(startTimestamps[0]->getResult(startTime[0]), TimestampQueryResult::kAvailable);
  322. ANKI_TEST_EXPECT_EQ(startTimestamps[1]->getResult(startTime[1]), TimestampQueryResult::kAvailable);
  323. Second endTime;
  324. ANKI_TEST_EXPECT_EQ(endTimestamp->getResult(endTime), TimestampQueryResult::kAvailable);
  325. ANKI_TEST_LOGI("GPU time %f\n", endTime - min(startTime[0], startTime[1]));
  326. }
  327. Input::freeSingleton();
  328. GrManager::freeSingleton();
  329. NativeWindow::freeSingleton();
  330. ShaderCompilerMemoryPool::freeSingleton();
  331. DefaultMemoryPool::freeSingleton();
  332. }