LightBin.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. // Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <anki/renderer/LightBin.h>
  6. #include <anki/renderer/RenderQueue.h>
  7. #include <anki/core/Trace.h>
  8. #include <anki/util/ThreadPool.h>
  9. #include <anki/collision/Sphere.h>
  10. #include <anki/collision/Frustum.h>
  11. namespace anki
  12. {
  13. /// This should be the number of light types. For now it's spots & points & probes & decals.
  14. const U SIZE_IDX_COUNT = 4;
  15. // Shader structs and block representations. All positions and directions in viewspace
  16. // For documentation see the shaders
  17. class ShaderCluster
  18. {
  19. public:
  20. U32 m_firstIdx;
  21. };
  22. class ShaderPointLight
  23. {
  24. public:
  25. Vec4 m_posRadius;
  26. Vec4 m_diffuseColorTileSize;
  27. Vec4 m_specularColorRadius;
  28. UVec4 m_atlasTilesPad2;
  29. };
  30. class ShaderSpotLight
  31. {
  32. public:
  33. Vec4 m_posRadius;
  34. Vec4 m_diffuseColorShadowmapId;
  35. Vec4 m_specularColorRadius;
  36. Vec4 m_lightDir;
  37. Vec4 m_outerCosInnerCos;
  38. Mat4 m_texProjectionMat; ///< Texture projection matrix
  39. };
  40. class ShaderProbe
  41. {
  42. public:
  43. Vec3 m_pos;
  44. F32 m_radiusSq;
  45. F32 m_cubemapIndex;
  46. U32 _m_pading[3];
  47. ShaderProbe()
  48. {
  49. // To avoid warnings
  50. _m_pading[0] = _m_pading[1] = _m_pading[2] = 0;
  51. }
  52. };
  53. class ShaderDecal
  54. {
  55. public:
  56. Vec4 m_diffUv;
  57. Vec4 m_normRoughnessUv;
  58. Mat4 m_texProjectionMat;
  59. Vec4 m_blendFactors;
  60. };
  61. static const U MAX_TYPED_LIGHTS_PER_CLUSTER = 16;
  62. static const U MAX_PROBES_PER_CLUSTER = 12;
  63. static const U MAX_DECALS_PER_CLUSTER = 8;
  64. static const F32 INVALID_TEXTURE_INDEX = -1.0;
  65. class ClusterLightIndex
  66. {
  67. public:
  68. ClusterLightIndex()
  69. {
  70. // Do nothing. No need to initialize
  71. }
  72. U getIndex() const
  73. {
  74. return m_index;
  75. }
  76. void setIndex(U i)
  77. {
  78. ANKI_ASSERT(i <= MAX_U16);
  79. m_index = i;
  80. }
  81. private:
  82. U16 m_index;
  83. };
  84. static Bool operator<(const ClusterLightIndex& a, const ClusterLightIndex& b)
  85. {
  86. return a.getIndex() < b.getIndex();
  87. }
  88. /// Store the probe radius for sorting the indices.
  89. /// WARNING: Keep it as small as possible, that's why the members are U16
  90. class ClusterProbeIndex
  91. {
  92. public:
  93. ClusterProbeIndex()
  94. {
  95. // Do nothing. No need to initialize
  96. }
  97. U getIndex() const
  98. {
  99. return m_index;
  100. }
  101. void setIndex(U i)
  102. {
  103. ANKI_ASSERT(i <= MAX_U16);
  104. m_index = i;
  105. }
  106. F32 getProbeRadius() const
  107. {
  108. return F32(m_probeRadius) / F32(MAX_U16) * F32(MAX_PROBE_RADIUS);
  109. }
  110. void setProbeRadius(F32 r)
  111. {
  112. ANKI_ASSERT(r < MAX_PROBE_RADIUS);
  113. m_probeRadius = r / F32(MAX_PROBE_RADIUS) * F32(MAX_U16);
  114. }
  115. Bool operator<(const ClusterProbeIndex& b) const
  116. {
  117. ANKI_ASSERT(m_probeRadius > 0 && b.m_probeRadius > 0);
  118. return (m_probeRadius != b.m_probeRadius) ? (m_probeRadius > b.m_probeRadius) : (m_index < b.m_index);
  119. }
  120. private:
  121. static const U MAX_PROBE_RADIUS = 1000;
  122. U16 m_index;
  123. U16 m_probeRadius;
  124. };
  125. static_assert(sizeof(ClusterProbeIndex) == sizeof(U16) * 2, "Because we memcmp");
  126. /// WARNING: Keep it as small as possible. The number of clusters is huge
  127. class alignas(U32) ClusterData
  128. {
  129. public:
  130. Atomic<U8> m_pointCount;
  131. Atomic<U8> m_spotCount;
  132. Atomic<U8> m_probeCount;
  133. Atomic<U8> m_decalCount;
  134. Array<ClusterLightIndex, MAX_TYPED_LIGHTS_PER_CLUSTER> m_pointIds;
  135. Array<ClusterLightIndex, MAX_TYPED_LIGHTS_PER_CLUSTER> m_spotIds;
  136. Array<ClusterProbeIndex, MAX_PROBES_PER_CLUSTER> m_probeIds;
  137. Array<ClusterLightIndex, MAX_DECALS_PER_CLUSTER> m_decalIds;
  138. ClusterData()
  139. {
  140. // Do nothing. No need to initialize
  141. }
  142. void reset()
  143. {
  144. // Set the counts to zero and try to be faster
  145. *reinterpret_cast<U32*>(&m_pointCount) = 0;
  146. }
  147. void normalizeCounts()
  148. {
  149. normalize(m_pointCount, MAX_TYPED_LIGHTS_PER_CLUSTER, "point lights");
  150. normalize(m_spotCount, MAX_TYPED_LIGHTS_PER_CLUSTER, "spot lights");
  151. normalize(m_probeCount, MAX_PROBES_PER_CLUSTER, "probes");
  152. normalize(m_decalCount, MAX_DECALS_PER_CLUSTER, "decals");
  153. }
  154. void sortLightIds()
  155. {
  156. const U pointCount = m_pointCount.get();
  157. if(pointCount > 1)
  158. {
  159. std::sort(&m_pointIds[0], &m_pointIds[0] + pointCount);
  160. }
  161. const U spotCount = m_spotCount.get();
  162. if(spotCount > 1)
  163. {
  164. std::sort(&m_spotIds[0], &m_spotIds[0] + spotCount);
  165. }
  166. const U probeCount = m_probeCount.get();
  167. if(probeCount > 1)
  168. {
  169. std::sort(m_probeIds.getBegin(), m_probeIds.getBegin() + probeCount);
  170. }
  171. const U decalCount = m_decalCount.get();
  172. if(decalCount > 1)
  173. {
  174. std::sort(&m_decalIds[0], &m_decalIds[0] + decalCount);
  175. }
  176. }
  177. Bool operator==(const ClusterData& b) const
  178. {
  179. const U pointCount = m_pointCount.get();
  180. const U spotCount = m_spotCount.get();
  181. const U probeCount = m_probeCount.get();
  182. const U decalCount = m_decalCount.get();
  183. const U pointCount2 = b.m_pointCount.get();
  184. const U spotCount2 = b.m_spotCount.get();
  185. const U probeCount2 = b.m_probeCount.get();
  186. const U decalCount2 = b.m_decalCount.get();
  187. if(pointCount != pointCount2 || spotCount != spotCount2 || probeCount != probeCount2
  188. || decalCount != decalCount2)
  189. {
  190. return false;
  191. }
  192. if(pointCount > 0)
  193. {
  194. if(memcmp(&m_pointIds[0], &b.m_pointIds[0], sizeof(m_pointIds[0]) * pointCount) != 0)
  195. {
  196. return false;
  197. }
  198. }
  199. if(spotCount > 0)
  200. {
  201. if(memcmp(&m_spotIds[0], &b.m_spotIds[0], sizeof(m_spotIds[0]) * spotCount) != 0)
  202. {
  203. return false;
  204. }
  205. }
  206. if(probeCount > 0)
  207. {
  208. if(memcmp(&m_probeIds[0], &b.m_probeIds[0], sizeof(b.m_probeIds[0]) * probeCount) != 0)
  209. {
  210. return false;
  211. }
  212. }
  213. if(decalCount > 0)
  214. {
  215. if(memcmp(&m_decalIds[0], &b.m_decalIds[0], sizeof(b.m_decalIds[0]) * decalCount) != 0)
  216. {
  217. return false;
  218. }
  219. }
  220. return true;
  221. }
  222. private:
  223. static void normalize(Atomic<U8>& count, const U maxCount, CString what)
  224. {
  225. U8 a = count.get();
  226. count.set(a % maxCount);
  227. if(ANKI_UNLIKELY(a >= maxCount))
  228. {
  229. ANKI_R_LOGW("Increase cluster limit: %s", &what[0]);
  230. }
  231. }
  232. };
  233. /// Common data for all tasks.
  234. class LightBinContext
  235. {
  236. public:
  237. LightBinContext(StackAllocator<U8> alloc)
  238. : m_alloc(alloc)
  239. , m_tempClusters(alloc)
  240. {
  241. }
  242. Mat4 m_viewMat;
  243. Mat4 m_viewProjMat;
  244. Mat4 m_camTrf;
  245. U32 m_maxLightIndices = 0;
  246. Bool m_shadowsEnabled = false;
  247. StackAllocator<U8> m_alloc;
  248. // To fill the light buffers
  249. WeakArray<ShaderPointLight> m_pointLights;
  250. WeakArray<ShaderSpotLight> m_spotLights;
  251. WeakArray<ShaderProbe> m_probes;
  252. WeakArray<ShaderDecal> m_decals;
  253. WeakArray<U32> m_lightIds;
  254. WeakArray<ShaderCluster> m_clusters;
  255. Atomic<U32> m_pointLightsCount = {0};
  256. Atomic<U32> m_spotLightsCount = {0};
  257. Atomic<U32> m_probeCount = {0};
  258. Atomic<U32> m_decalCount = {0};
  259. // To fill the tile buffers
  260. DynamicArrayAuto<ClusterData> m_tempClusters;
  261. // To fill the light index buffer
  262. Atomic<U32> m_lightIdsCount = {0};
  263. // Misc
  264. WeakArray<const PointLightQueueElement> m_vPointLights;
  265. WeakArray<const SpotLightQueueElement> m_vSpotLights;
  266. WeakArray<const ReflectionProbeQueueElement> m_vProbes;
  267. WeakArray<const DecalQueueElement> m_vDecals;
  268. Atomic<U32> m_count = {0};
  269. Atomic<U32> m_count2 = {0};
  270. TextureViewPtr m_diffDecalTexAtlas;
  271. SpinLock m_diffDecalTexAtlasMtx;
  272. TextureViewPtr m_specularRoughnessDecalTexAtlas;
  273. SpinLock m_specularRoughnessDecalTexAtlasMtx;
  274. LightBin* m_bin = nullptr;
  275. };
  276. /// Write the lights to the GPU buffers.
  277. class WriteLightsTask : public ThreadPoolTask
  278. {
  279. public:
  280. LightBinContext* m_ctx = nullptr;
  281. Error operator()(U32 threadId, PtrSize threadsCount)
  282. {
  283. m_ctx->m_bin->binLights(threadId, threadsCount, *m_ctx);
  284. return Error::NONE;
  285. }
  286. };
  287. LightBin::LightBin(const GenericMemoryPoolAllocator<U8>& alloc,
  288. U clusterCountX,
  289. U clusterCountY,
  290. U clusterCountZ,
  291. ThreadPool* threadPool,
  292. StagingGpuMemoryManager* stagingMem)
  293. : m_alloc(alloc)
  294. , m_clusterCount(clusterCountX * clusterCountY * clusterCountZ)
  295. , m_threadPool(threadPool)
  296. , m_stagingMem(stagingMem)
  297. , m_barrier(threadPool->getThreadCount())
  298. {
  299. m_clusterer.init(alloc, clusterCountX, clusterCountY, clusterCountZ);
  300. }
  301. LightBin::~LightBin()
  302. {
  303. }
  304. Error LightBin::bin(const Mat4& viewMat,
  305. const Mat4& projMat,
  306. const Mat4& viewProjMat,
  307. const Mat4& camTrf,
  308. const RenderQueue& rqueue,
  309. StackAllocator<U8> frameAlloc,
  310. U maxLightIndices,
  311. Bool shadowsEnabled,
  312. LightBinOut& out)
  313. {
  314. ANKI_TRACE_SCOPED_EVENT(RENDERER_LIGHT_BINNING);
  315. // Prepare the clusterer
  316. ClustererPrepareInfo pinf;
  317. pinf.m_viewMat = viewMat;
  318. pinf.m_projMat = projMat;
  319. pinf.m_viewProjMat = viewProjMat;
  320. pinf.m_camTrf = Transform(camTrf);
  321. pinf.m_near = rqueue.m_cameraNear;
  322. pinf.m_far = rqueue.m_cameraFar;
  323. m_clusterer.prepare(*m_threadPool, pinf);
  324. //
  325. // Quickly get the lights
  326. //
  327. const U visiblePointLightsCount = rqueue.m_pointLights.getSize();
  328. const U visibleSpotLightsCount = rqueue.m_spotLights.getSize();
  329. const U visibleProbeCount = rqueue.m_reflectionProbes.getSize();
  330. const U visibleDecalCount = rqueue.m_decals.getSize();
  331. ANKI_TRACE_INC_COUNTER(RENDERER_LIGHTS, visiblePointLightsCount + visibleSpotLightsCount);
  332. //
  333. // Write the lights and tiles UBOs
  334. //
  335. Array<WriteLightsTask, ThreadPool::MAX_THREADS> tasks;
  336. LightBinContext ctx(frameAlloc);
  337. ctx.m_viewMat = viewMat;
  338. ctx.m_viewProjMat = viewProjMat;
  339. ctx.m_camTrf = camTrf;
  340. ctx.m_maxLightIndices = maxLightIndices;
  341. ctx.m_shadowsEnabled = shadowsEnabled;
  342. ctx.m_tempClusters.create(m_clusterCount);
  343. if(visiblePointLightsCount)
  344. {
  345. ShaderPointLight* data = static_cast<ShaderPointLight*>(m_stagingMem->allocateFrame(
  346. sizeof(ShaderPointLight) * visiblePointLightsCount, StagingGpuMemoryType::UNIFORM, out.m_pointLightsToken));
  347. ctx.m_pointLights = WeakArray<ShaderPointLight>(data, visiblePointLightsCount);
  348. ctx.m_vPointLights =
  349. WeakArray<const PointLightQueueElement>(rqueue.m_pointLights.getBegin(), visiblePointLightsCount);
  350. }
  351. else
  352. {
  353. out.m_pointLightsToken.markUnused();
  354. }
  355. if(visibleSpotLightsCount)
  356. {
  357. ShaderSpotLight* data = static_cast<ShaderSpotLight*>(m_stagingMem->allocateFrame(
  358. sizeof(ShaderSpotLight) * visibleSpotLightsCount, StagingGpuMemoryType::UNIFORM, out.m_spotLightsToken));
  359. ctx.m_spotLights = WeakArray<ShaderSpotLight>(data, visibleSpotLightsCount);
  360. ctx.m_vSpotLights =
  361. WeakArray<const SpotLightQueueElement>(rqueue.m_spotLights.getBegin(), visibleSpotLightsCount);
  362. }
  363. else
  364. {
  365. out.m_spotLightsToken.markUnused();
  366. }
  367. if(visibleProbeCount)
  368. {
  369. ShaderProbe* data = static_cast<ShaderProbe*>(m_stagingMem->allocateFrame(
  370. sizeof(ShaderProbe) * visibleProbeCount, StagingGpuMemoryType::UNIFORM, out.m_probesToken));
  371. ctx.m_probes = WeakArray<ShaderProbe>(data, visibleProbeCount);
  372. ctx.m_vProbes =
  373. WeakArray<const ReflectionProbeQueueElement>(rqueue.m_reflectionProbes.getBegin(), visibleProbeCount);
  374. }
  375. else
  376. {
  377. out.m_probesToken.markUnused();
  378. }
  379. if(visibleDecalCount)
  380. {
  381. ShaderDecal* data = static_cast<ShaderDecal*>(m_stagingMem->allocateFrame(
  382. sizeof(ShaderDecal) * visibleDecalCount, StagingGpuMemoryType::UNIFORM, out.m_decalsToken));
  383. ctx.m_decals = WeakArray<ShaderDecal>(data, visibleDecalCount);
  384. ctx.m_vDecals = WeakArray<const DecalQueueElement>(rqueue.m_decals.getBegin(), visibleDecalCount);
  385. }
  386. else
  387. {
  388. out.m_decalsToken.markUnused();
  389. }
  390. ctx.m_bin = this;
  391. // Get mem for clusters
  392. ShaderCluster* data = static_cast<ShaderCluster*>(m_stagingMem->allocateFrame(
  393. sizeof(ShaderCluster) * m_clusterCount, StagingGpuMemoryType::STORAGE, out.m_clustersToken));
  394. ctx.m_clusters = WeakArray<ShaderCluster>(data, m_clusterCount);
  395. // Allocate light IDs
  396. U32* data2 = static_cast<U32*>(m_stagingMem->allocateFrame(
  397. maxLightIndices * sizeof(U32), StagingGpuMemoryType::STORAGE, out.m_lightIndicesToken));
  398. ctx.m_lightIds = WeakArray<U32>(data2, maxLightIndices);
  399. // Fill the first part of light ids with invalid indices. Will be used for empty clusters
  400. for(U i = 0; i < SIZE_IDX_COUNT; ++i)
  401. {
  402. ctx.m_lightIds[i] = 0;
  403. }
  404. ctx.m_lightIdsCount.set(SIZE_IDX_COUNT);
  405. // Fire the async job
  406. for(U i = 0; i < m_threadPool->getThreadCount(); i++)
  407. {
  408. tasks[i].m_ctx = &ctx;
  409. m_threadPool->assignNewTask(i, &tasks[i]);
  410. }
  411. // Sync
  412. ANKI_CHECK(m_threadPool->waitForAllThreadsToFinish());
  413. out.m_diffDecalTexView = ctx.m_diffDecalTexAtlas;
  414. out.m_specularRoughnessDecalTexView = ctx.m_specularRoughnessDecalTexAtlas;
  415. return Error::NONE;
  416. }
  417. void LightBin::binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ctx)
  418. {
  419. ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
  420. U clusterCount = m_clusterCount;
  421. PtrSize start, end;
  422. //
  423. // Initialize the temp clusters
  424. //
  425. ThreadPoolTask::choseStartEnd(threadId, threadsCount, clusterCount, start, end);
  426. for(U i = start; i < end; ++i)
  427. {
  428. ctx.m_tempClusters[i].reset();
  429. }
  430. ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
  431. m_barrier.wait();
  432. ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
  433. //
  434. // Iterate lights and probes and bin them
  435. //
  436. ClustererTestResult testResult;
  437. m_clusterer.initTestResults(ctx.m_alloc, testResult);
  438. U lightCount = ctx.m_vPointLights.getSize() + ctx.m_vSpotLights.getSize();
  439. U totalCount = lightCount + ctx.m_vProbes.getSize() + ctx.m_vDecals.getSize();
  440. const U TO_BIN_COUNT = 1;
  441. while((start = ctx.m_count2.fetchAdd(TO_BIN_COUNT)) < totalCount)
  442. {
  443. end = min<U>(start + TO_BIN_COUNT, totalCount);
  444. for(U j = start; j < end; ++j)
  445. {
  446. if(j >= lightCount + ctx.m_vDecals.getSize())
  447. {
  448. U i = j - (lightCount + ctx.m_vDecals.getSize());
  449. writeAndBinProbe(ctx.m_vProbes[i], ctx, testResult);
  450. }
  451. else if(j >= ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize())
  452. {
  453. U i = j - (ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize());
  454. writeAndBinSpotLight(ctx.m_vSpotLights[i], ctx, testResult);
  455. }
  456. else if(j >= ctx.m_vDecals.getSize())
  457. {
  458. U i = j - ctx.m_vDecals.getSize();
  459. writeAndBinPointLight(ctx.m_vPointLights[i], ctx, testResult);
  460. }
  461. else
  462. {
  463. U i = j;
  464. writeAndBinDecal(ctx.m_vDecals[i], ctx, testResult);
  465. }
  466. }
  467. }
  468. //
  469. // Last thing, update the real clusters
  470. //
  471. ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
  472. m_barrier.wait();
  473. ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
  474. // Run per cluster
  475. const U CLUSTER_GROUP = 16;
  476. while((start = ctx.m_count.fetchAdd(CLUSTER_GROUP)) < clusterCount)
  477. {
  478. end = min<U>(start + CLUSTER_GROUP, clusterCount);
  479. for(U i = start; i < end; ++i)
  480. {
  481. auto& cluster = ctx.m_tempClusters[i];
  482. cluster.normalizeCounts();
  483. const U countP = cluster.m_pointCount.get();
  484. const U countS = cluster.m_spotCount.get();
  485. const U countProbe = cluster.m_probeCount.get();
  486. const U countDecal = cluster.m_decalCount.get();
  487. const U count = countP + countS + countProbe + countDecal;
  488. auto& c = ctx.m_clusters[i];
  489. c.m_firstIdx = 0; // Point to the first empty indices
  490. // Early exit
  491. if(ANKI_UNLIKELY(count == 0))
  492. {
  493. continue;
  494. }
  495. // Check if the previous cluster contains the same lights as this one and if yes then merge them. This will
  496. // avoid allocating new IDs (and thrashing GPU caches).
  497. cluster.sortLightIds();
  498. if(i != start)
  499. {
  500. const auto& clusterB = ctx.m_tempClusters[i - 1];
  501. if(cluster == clusterB)
  502. {
  503. c.m_firstIdx = ctx.m_clusters[i - 1].m_firstIdx;
  504. continue;
  505. }
  506. }
  507. U offset = ctx.m_lightIdsCount.fetchAdd(count + SIZE_IDX_COUNT);
  508. U initialOffset = offset;
  509. (void)initialOffset;
  510. if(offset + count + SIZE_IDX_COUNT <= ctx.m_maxLightIndices)
  511. {
  512. c.m_firstIdx = offset;
  513. ctx.m_lightIds[offset++] = countDecal;
  514. for(U i = 0; i < countDecal; ++i)
  515. {
  516. ctx.m_lightIds[offset++] = cluster.m_decalIds[i].getIndex();
  517. }
  518. ctx.m_lightIds[offset++] = countP;
  519. for(U i = 0; i < countP; ++i)
  520. {
  521. ctx.m_lightIds[offset++] = cluster.m_pointIds[i].getIndex();
  522. }
  523. ctx.m_lightIds[offset++] = countS;
  524. for(U i = 0; i < countS; ++i)
  525. {
  526. ctx.m_lightIds[offset++] = cluster.m_spotIds[i].getIndex();
  527. }
  528. ctx.m_lightIds[offset++] = countProbe;
  529. for(U i = 0; i < countProbe; ++i)
  530. {
  531. ctx.m_lightIds[offset++] = cluster.m_probeIds[i].getIndex();
  532. }
  533. ANKI_ASSERT(offset - initialOffset == count + SIZE_IDX_COUNT);
  534. }
  535. else
  536. {
  537. ANKI_R_LOGW("Light IDs buffer too small");
  538. }
  539. } // end for
  540. } // end while
  541. ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
  542. }
  543. void LightBin::writeAndBinPointLight(
  544. const PointLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult)
  545. {
  546. // Get GPU light
  547. I idx = ctx.m_pointLightsCount.fetchAdd(1);
  548. ShaderPointLight& slight = ctx.m_pointLights[idx];
  549. slight.m_posRadius = Vec4(lightEl.m_worldPosition.xyz(), 1.0f / (lightEl.m_radius * lightEl.m_radius));
  550. slight.m_diffuseColorTileSize = lightEl.m_diffuseColor.xyz0();
  551. if(lightEl.m_shadowRenderQueues[0] == nullptr || !ctx.m_shadowsEnabled)
  552. {
  553. slight.m_diffuseColorTileSize.w() = INVALID_TEXTURE_INDEX;
  554. }
  555. else
  556. {
  557. slight.m_diffuseColorTileSize.w() = lightEl.m_atlasTileSize;
  558. slight.m_atlasTilesPad2 = UVec4(lightEl.m_atlasTiles.x(), lightEl.m_atlasTiles.y(), 0, 0);
  559. }
  560. slight.m_specularColorRadius = Vec4(lightEl.m_specularColor, lightEl.m_radius);
  561. // Now bin it
  562. Sphere sphere(lightEl.m_worldPosition.xyz0(), lightEl.m_radius);
  563. Aabb box;
  564. sphere.computeAabb(box);
  565. m_clusterer.bin(sphere, box, testResult);
  566. auto it = testResult.getClustersBegin();
  567. auto end = testResult.getClustersEnd();
  568. for(; it != end; ++it)
  569. {
  570. U x = (*it).x();
  571. U y = (*it).y();
  572. U z = (*it).z();
  573. U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
  574. auto& cluster = ctx.m_tempClusters[i];
  575. i = cluster.m_pointCount.fetchAdd(1) % MAX_TYPED_LIGHTS_PER_CLUSTER;
  576. cluster.m_pointIds[i].setIndex(idx);
  577. }
  578. }
  579. void LightBin::writeAndBinSpotLight(
  580. const SpotLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult)
  581. {
  582. I idx = ctx.m_spotLightsCount.fetchAdd(1);
  583. ShaderSpotLight& light = ctx.m_spotLights[idx];
  584. F32 shadowmapIndex = INVALID_TEXTURE_INDEX;
  585. if(lightEl.hasShadow() && ctx.m_shadowsEnabled)
  586. {
  587. // bias * proj_l * view_l
  588. light.m_texProjectionMat = lightEl.m_textureMatrix;
  589. shadowmapIndex = 1.0f; // Just set a value
  590. }
  591. // Pos & dist
  592. light.m_posRadius =
  593. Vec4(lightEl.m_worldTransform.getTranslationPart().xyz(), 1.0f / (lightEl.m_distance * lightEl.m_distance));
  594. // Diff color and shadowmap ID now
  595. light.m_diffuseColorShadowmapId = Vec4(lightEl.m_diffuseColor, shadowmapIndex);
  596. // Spec color
  597. light.m_specularColorRadius = Vec4(lightEl.m_specularColor, lightEl.m_distance);
  598. // Light dir
  599. Vec3 lightDir = -lightEl.m_worldTransform.getRotationPart().getZAxis();
  600. light.m_lightDir = Vec4(lightDir, 0.0f);
  601. // Angles
  602. light.m_outerCosInnerCos = Vec4(cos(lightEl.m_outerAngle / 2.0f), cos(lightEl.m_innerAngle / 2.0f), 1.0f, 1.0f);
  603. // Bin lights
  604. PerspectiveFrustum shape(lightEl.m_outerAngle, lightEl.m_outerAngle, 0.01f, lightEl.m_distance);
  605. shape.transform(Transform(lightEl.m_worldTransform));
  606. Aabb box;
  607. shape.computeAabb(box);
  608. m_clusterer.binPerspectiveFrustum(shape, box, testResult);
  609. auto it = testResult.getClustersBegin();
  610. auto end = testResult.getClustersEnd();
  611. for(; it != end; ++it)
  612. {
  613. U x = (*it).x();
  614. U y = (*it).y();
  615. U z = (*it).z();
  616. U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
  617. auto& cluster = ctx.m_tempClusters[i];
  618. i = cluster.m_spotCount.fetchAdd(1) % MAX_TYPED_LIGHTS_PER_CLUSTER;
  619. cluster.m_spotIds[i].setIndex(idx);
  620. }
  621. }
  622. void LightBin::writeAndBinProbe(
  623. const ReflectionProbeQueueElement& probeEl, LightBinContext& ctx, ClustererTestResult& testResult)
  624. {
  625. // Write it
  626. ShaderProbe probe;
  627. probe.m_pos = probeEl.m_worldPosition;
  628. probe.m_radiusSq = probeEl.m_radius * probeEl.m_radius;
  629. probe.m_cubemapIndex = probeEl.m_textureArrayIndex;
  630. U idx = ctx.m_probeCount.fetchAdd(1);
  631. ctx.m_probes[idx] = probe;
  632. // Bin it
  633. Sphere sphere(probeEl.m_worldPosition.xyz0(), probeEl.m_radius);
  634. Aabb box;
  635. sphere.computeAabb(box);
  636. m_clusterer.bin(sphere, box, testResult);
  637. auto it = testResult.getClustersBegin();
  638. auto end = testResult.getClustersEnd();
  639. for(; it != end; ++it)
  640. {
  641. U x = (*it).x();
  642. U y = (*it).y();
  643. U z = (*it).z();
  644. U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
  645. auto& cluster = ctx.m_tempClusters[i];
  646. i = cluster.m_probeCount.fetchAdd(1) % MAX_PROBES_PER_CLUSTER;
  647. cluster.m_probeIds[i].setIndex(idx);
  648. cluster.m_probeIds[i].setProbeRadius(probeEl.m_radius);
  649. }
  650. }
  651. void LightBin::writeAndBinDecal(const DecalQueueElement& decalEl, LightBinContext& ctx, ClustererTestResult& testResult)
  652. {
  653. I idx = ctx.m_decalCount.fetchAdd(1);
  654. ShaderDecal& decal = ctx.m_decals[idx];
  655. TextureViewPtr atlas(const_cast<TextureView*>(decalEl.m_diffuseAtlas));
  656. Vec4 uv = decalEl.m_diffuseAtlasUv;
  657. decal.m_diffUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
  658. decal.m_blendFactors[0] = decalEl.m_diffuseAtlasBlendFactor;
  659. {
  660. LockGuard<SpinLock> lock(ctx.m_diffDecalTexAtlasMtx);
  661. if(ctx.m_diffDecalTexAtlas && ctx.m_diffDecalTexAtlas != atlas)
  662. {
  663. ANKI_R_LOGF("All decals should have the same tex atlas");
  664. }
  665. ctx.m_diffDecalTexAtlas = atlas;
  666. }
  667. atlas.reset(const_cast<TextureView*>(decalEl.m_specularRoughnessAtlas));
  668. uv = decalEl.m_specularRoughnessAtlasUv;
  669. decal.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
  670. decal.m_blendFactors[1] = decalEl.m_specularRoughnessAtlasBlendFactor;
  671. if(atlas)
  672. {
  673. LockGuard<SpinLock> lock(ctx.m_specularRoughnessDecalTexAtlasMtx);
  674. if(ctx.m_specularRoughnessDecalTexAtlas && ctx.m_specularRoughnessDecalTexAtlas != atlas)
  675. {
  676. ANKI_R_LOGF("All decals should have the same tex atlas");
  677. }
  678. ctx.m_specularRoughnessDecalTexAtlas = atlas;
  679. }
  680. // bias * proj_l * view_
  681. decal.m_texProjectionMat = decalEl.m_textureMatrix;
  682. // Bin it
  683. Obb obb(decalEl.m_obbCenter.xyz0(), Mat3x4(decalEl.m_obbRotation), decalEl.m_obbExtend.xyz0());
  684. Aabb box;
  685. obb.computeAabb(box);
  686. m_clusterer.bin(obb, box, testResult);
  687. auto it = testResult.getClustersBegin();
  688. auto end = testResult.getClustersEnd();
  689. for(; it != end; ++it)
  690. {
  691. U x = (*it).x();
  692. U y = (*it).y();
  693. U z = (*it).z();
  694. U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
  695. auto& cluster = ctx.m_tempClusters[i];
  696. i = cluster.m_decalCount.fetchAdd(1) % MAX_DECALS_PER_CLUSTER;
  697. cluster.m_decalIds[i].setIndex(idx);
  698. }
  699. }
  700. } // end namespace anki