ClusterBin.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. // Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #include <anki/renderer/ClusterBin.h>
  6. #include <anki/renderer/RenderQueue.h>
  7. #include <anki/Collision.h>
  8. #include <anki/util/ThreadHive.h>
  9. #include <anki/util/Tracer.h>
  10. #include <anki/core/ConfigSet.h>
  11. namespace anki
  12. {
  13. /// Get a view space point.
  14. static Vec4 unproject(const F32 zVspace, const Vec2& ndc, const Vec4& unprojParams)
  15. {
  16. Vec4 view;
  17. view.x() = ndc.x() * unprojParams.x();
  18. view.y() = ndc.y() * unprojParams.y();
  19. view.z() = 1.0f;
  20. view.w() = 0.0f;
  21. return view * zVspace;
  22. }
  23. template<typename TShape>
  24. static Bool insideClusterFrustum(const Array<Plane, 4>& planeArr, const TShape& shape)
  25. {
  26. for(const Plane& plane : planeArr)
  27. {
  28. if(testPlane(plane, shape) < 0.0f)
  29. {
  30. return false;
  31. }
  32. }
  33. return true;
  34. }
  35. /// Bin context.
  36. class ClusterBin::BinCtx
  37. {
  38. public:
  39. ClusterBin* m_bin ANKI_DEBUG_CODE(= nullptr);
  40. ClusterBinIn* m_in ANKI_DEBUG_CODE(= nullptr);
  41. ClusterBinOut* m_out ANKI_DEBUG_CODE(= nullptr);
  42. WeakArray<PointLight> m_pointLights;
  43. WeakArray<SpotLight> m_spotLights;
  44. WeakArray<ReflectionProbe> m_probes;
  45. WeakArray<Decal> m_decals;
  46. WeakArray<U32> m_lightIds;
  47. WeakArray<U32> m_clusters;
  48. Atomic<U32> m_tileIdxToProcess = {0};
  49. Atomic<U32> m_allocatedIndexCount = {TYPED_OBJECT_COUNT};
  50. Vec4 m_unprojParams;
  51. Bool m_clusterEdgesDirty;
  52. };
  53. class ClusterBin::TileCtx
  54. {
  55. public:
  56. struct ClusterMetaInfo
  57. {
  58. Array<U16, TYPED_OBJECT_COUNT> m_counts;
  59. U16 m_offset;
  60. };
  61. DynamicArrayAuto<Vec4> m_clusterEdgesWSpace;
  62. DynamicArrayAuto<Aabb> m_clusterBoxes;
  63. DynamicArrayAuto<Sphere> m_clusterSpheres;
  64. DynamicArrayAuto<ClusterMetaInfo> m_clusterInfos;
  65. DynamicArrayAuto<U32> m_indices;
  66. U32 m_clusterCountZ = MAX_U32;
  67. TileCtx(StackAllocator<U8>& alloc)
  68. : m_clusterEdgesWSpace(alloc)
  69. , m_clusterBoxes(alloc)
  70. , m_clusterSpheres(alloc)
  71. , m_clusterInfos(alloc)
  72. , m_indices(alloc)
  73. {
  74. }
  75. WeakArray<U32> getClusterIndices(const U32 clusterZ)
  76. {
  77. ANKI_ASSERT(clusterZ < m_clusterCountZ);
  78. const U32 perClusterCount = m_indices.getSize() / m_clusterCountZ;
  79. return WeakArray<U32>(&m_indices[perClusterCount * clusterZ], perClusterCount);
  80. }
  81. };
  82. ClusterBin::~ClusterBin()
  83. {
  84. m_clusterEdges.destroy(m_alloc);
  85. }
  86. void ClusterBin::init(HeapAllocator<U8> alloc, U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ,
  87. const ConfigSet& cfg)
  88. {
  89. m_alloc = alloc;
  90. m_clusterCounts[0] = clusterCountX;
  91. m_clusterCounts[1] = clusterCountY;
  92. m_clusterCounts[2] = clusterCountZ;
  93. m_totalClusterCount = clusterCountX * clusterCountY * clusterCountZ;
  94. m_avgObjectsPerCluster = cfg.getNumberU32("r_avgObjectsPerCluster");
  95. // The actual indices per cluster are
  96. // - the object indices per cluster
  97. // - plus TYPED_OBJECT_COUNT-1 that is the offset per object type minus the first object type
  98. // - plus TYPED_OBJECT_COUNT the stopper dummy indices
  99. m_indexCount = m_totalClusterCount * (m_avgObjectsPerCluster + TYPED_OBJECT_COUNT - 1 + TYPED_OBJECT_COUNT);
  100. m_clusterEdges.create(m_alloc, m_clusterCounts[0] * m_clusterCounts[1] * (m_clusterCounts[2] + 1) * 4);
  101. }
  102. void ClusterBin::bin(ClusterBinIn& in, ClusterBinOut& out)
  103. {
  104. ANKI_TRACE_SCOPED_EVENT(R_BIN_TO_CLUSTERS);
  105. BinCtx ctx;
  106. ctx.m_bin = this;
  107. ctx.m_in = &in;
  108. ctx.m_out = &out;
  109. prepare(ctx);
  110. if(ctx.m_unprojParams != m_prevUnprojParams)
  111. {
  112. ctx.m_clusterEdgesDirty = true;
  113. m_prevUnprojParams = ctx.m_unprojParams;
  114. }
  115. else
  116. {
  117. ctx.m_clusterEdgesDirty = false;
  118. }
  119. // Allocate indices
  120. U32* indices = static_cast<U32*>(ctx.m_in->m_stagingMem->allocateFrame(
  121. m_indexCount * sizeof(U32), StagingGpuMemoryType::STORAGE, ctx.m_out->m_indicesToken));
  122. ctx.m_lightIds = WeakArray<U32>(indices, m_indexCount);
  123. // Reserve some indices for empty clusters
  124. for(U i = 0; i < TYPED_OBJECT_COUNT; ++i)
  125. {
  126. indices[i] = 0;
  127. }
  128. // Allocate clusters
  129. U32* clusters = static_cast<U32*>(ctx.m_in->m_stagingMem->allocateFrame(
  130. sizeof(U32) * m_totalClusterCount, StagingGpuMemoryType::STORAGE, ctx.m_out->m_clustersToken));
  131. ctx.m_clusters = WeakArray<U32>(clusters, m_totalClusterCount);
  132. // Create task for writing GPU buffers
  133. Array<ThreadHiveTask, ThreadHive::MAX_THREADS + 1> tasks;
  134. tasks[0] = ANKI_THREAD_HIVE_TASK(
  135. {
  136. ANKI_TRACE_SCOPED_EVENT(R_WRITE_LIGHT_BUFFERS);
  137. self->m_bin->writeTypedObjectsToGpuBuffers(*self);
  138. },
  139. &ctx, nullptr, nullptr);
  140. // Create tasks for binning
  141. tasks[1] = ANKI_THREAD_HIVE_TASK(
  142. {
  143. ANKI_TRACE_SCOPED_EVENT(R_BIN_TO_CLUSTERS);
  144. BinCtx& ctx = *self;
  145. TileCtx tileCtx(ctx.m_in->m_tempAlloc);
  146. const U32 clusterCountZ = ctx.m_bin->m_clusterCounts[2];
  147. tileCtx.m_clusterEdgesWSpace.create((clusterCountZ + 1) * 4);
  148. tileCtx.m_clusterBoxes.create(clusterCountZ);
  149. tileCtx.m_clusterSpheres.create(clusterCountZ);
  150. tileCtx.m_indices.create(clusterCountZ * ctx.m_bin->m_avgObjectsPerCluster);
  151. tileCtx.m_clusterInfos.create(clusterCountZ);
  152. tileCtx.m_clusterCountZ = clusterCountZ;
  153. const U32 tileCount = ctx.m_bin->m_clusterCounts[0] * ctx.m_bin->m_clusterCounts[1];
  154. U32 tileIdx;
  155. while((tileIdx = ctx.m_tileIdxToProcess.fetchAdd(1)) < tileCount)
  156. {
  157. ctx.m_bin->binTile(tileIdx, ctx, tileCtx);
  158. }
  159. },
  160. &ctx, nullptr, nullptr);
  161. for(U threadIdx = 1; threadIdx < in.m_threadHive->getThreadCount(); ++threadIdx)
  162. {
  163. tasks[threadIdx + 1] = tasks[1];
  164. }
  165. // Submit and wait
  166. in.m_threadHive->submitTasks(&tasks[0], in.m_threadHive->getThreadCount() + 1);
  167. in.m_threadHive->waitAllTasks();
  168. }
  169. void ClusterBin::prepare(BinCtx& ctx)
  170. {
  171. const F32 near = ctx.m_in->m_renderQueue->m_cameraNear;
  172. const F32 far = ctx.m_in->m_renderQueue->m_cameraFar;
  173. const F32 calcNearOpt = (far - near) / F32(m_clusterCounts[2] * m_clusterCounts[2]);
  174. // Compute magic val 0
  175. // It's been used to calculate the 'k' of a cluster given the world position
  176. {
  177. // Given a distance 'd' from the camera's near plane in world space the 'k' split is calculated like:
  178. // k = sqrt(d / (f - n) * Cz2) (1)
  179. // where 'n' and 'f' are the near and far vals of the projection and Cz2 is the m_counts[2]^2
  180. // If the 'd' is not known and the world position instead is known then 'd' is the distance from that position
  181. // to the camera's near plane.
  182. // d = dot(Pn, W) - Po (2)
  183. // where 'Pn' is the plane's normal, 'Po' is the plane's offset and 'W' is the world position.
  184. // Substituting d from (2) in (1) we have:
  185. // k = sqrt((dot(Pn, W) - Po) / (f - n) * Cz2) =>
  186. // k = sqrt((dot(Pn, W) * Cz2 - Po * Cz2) / (f - n))
  187. // k = sqrt(dot(Pn, W) * Cz2 / (f - n) - Po * Cz2 / (f - n))
  188. // k = sqrt(dot(Pn * Cz2 / (f - n), W) - Po * Cz2 / (f - n))
  189. // If we assume that:
  190. // A = Pn * Cz2 / (f - n) and B = Po * Cz2 / (f - n)
  191. // Then:
  192. // k = sqrt(dot(A, W) - B)
  193. const Mat4& vp = ctx.m_in->m_renderQueue->m_viewProjectionMatrix;
  194. Plane nearPlane;
  195. extractClipPlane(vp, FrustumPlaneType::NEAR, nearPlane);
  196. Vec3 A = nearPlane.getNormal().xyz() * F32(m_clusterCounts[2] * m_clusterCounts[2]) / (far - near);
  197. F32 B = nearPlane.getOffset() * F32(m_clusterCounts[2] * m_clusterCounts[2]) / (far - near);
  198. ctx.m_out->m_shaderMagicValues.m_val0 = Vec4(A, B);
  199. }
  200. // Compute magic val 1
  201. {
  202. ctx.m_out->m_shaderMagicValues.m_val1.x() = calcNearOpt;
  203. ctx.m_out->m_shaderMagicValues.m_val1.y() = near;
  204. }
  205. // Unproj params
  206. ctx.m_unprojParams = ctx.m_in->m_renderQueue->m_projectionMatrix.extractPerspectiveUnprojectionParams();
  207. }
  208. void ClusterBin::binTile(U32 tileIdx, BinCtx& ctx, TileCtx& tileCtx)
  209. {
  210. ANKI_ASSERT(tileIdx < m_clusterCounts[0] * m_clusterCounts[1]);
  211. const U32 tileX = tileIdx % m_clusterCounts[0];
  212. const U32 tileY = tileIdx / m_clusterCounts[0];
  213. // Compute the tile's cluster edges in view space
  214. WeakArray<Vec4> clusterEdgesVSpace(&m_clusterEdges[tileIdx * (m_clusterCounts[2] + 1) * 4],
  215. (m_clusterCounts[2] + 1) * 4);
  216. if(ctx.m_clusterEdgesDirty)
  217. {
  218. const Vec2 tileSize = 2.0f / Vec2(F32(m_clusterCounts[0]), F32(m_clusterCounts[1]));
  219. const Vec2 startNdc =
  220. Vec2(F32(tileX) / F32(m_clusterCounts[0]), F32(tileY) / F32(m_clusterCounts[1])) * 2.0f - 1.0f;
  221. const Vec4& unprojParams = ctx.m_unprojParams;
  222. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2] + 1; ++clusterZ)
  223. {
  224. const F32 zNear = -computeClusterNear(ctx.m_out->m_shaderMagicValues, clusterZ);
  225. const U32 idx = clusterZ * 4;
  226. clusterEdgesVSpace[idx + 0] = unproject(zNear, startNdc, unprojParams).xyz1();
  227. clusterEdgesVSpace[idx + 1] = unproject(zNear, startNdc + Vec2(tileSize.x(), 0.0f), unprojParams).xyz1();
  228. clusterEdgesVSpace[idx + 2] = unproject(zNear, startNdc + tileSize, unprojParams).xyz1();
  229. clusterEdgesVSpace[idx + 3] = unproject(zNear, startNdc + Vec2(0.0f, tileSize.y()), unprojParams).xyz1();
  230. }
  231. }
  232. // Transform the tile's cluster edges to world space
  233. DynamicArrayAuto<Vec4>& clusterEdgesWSpace = tileCtx.m_clusterEdgesWSpace;
  234. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2] + 1; ++clusterZ)
  235. {
  236. const U32 idx = clusterZ * 4;
  237. clusterEdgesWSpace[idx + 0] = (ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[idx + 0]).xyz0();
  238. clusterEdgesWSpace[idx + 1] = (ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[idx + 1]).xyz0();
  239. clusterEdgesWSpace[idx + 2] = (ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[idx + 2]).xyz0();
  240. clusterEdgesWSpace[idx + 3] = (ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[idx + 3]).xyz0();
  241. }
  242. // Compute the tile frustum
  243. Array<Plane, 4> frustumPlanes;
  244. const U32 lastQuartet = clusterEdgesWSpace.getSize() - 1 - 4;
  245. const U32 beforeLastQuartet = lastQuartet - 4;
  246. frustumPlanes[0].setFrom3Points(clusterEdgesWSpace[beforeLastQuartet + 0],
  247. clusterEdgesWSpace[beforeLastQuartet + 1], clusterEdgesWSpace[lastQuartet + 0]);
  248. frustumPlanes[1].setFrom3Points(clusterEdgesWSpace[beforeLastQuartet + 1],
  249. clusterEdgesWSpace[beforeLastQuartet + 2], clusterEdgesWSpace[lastQuartet + 2]);
  250. frustumPlanes[2].setFrom3Points(clusterEdgesWSpace[beforeLastQuartet + 2],
  251. clusterEdgesWSpace[beforeLastQuartet + 3], clusterEdgesWSpace[lastQuartet + 2]);
  252. frustumPlanes[3].setFrom3Points(clusterEdgesWSpace[beforeLastQuartet + 3],
  253. clusterEdgesWSpace[beforeLastQuartet + 0], clusterEdgesWSpace[lastQuartet + 0]);
  254. // Compute the cluster AABBs and spheres
  255. DynamicArrayAuto<Aabb>& clusterBoxes = tileCtx.m_clusterBoxes;
  256. DynamicArrayAuto<Sphere>& clusterSpheres = tileCtx.m_clusterSpheres;
  257. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  258. {
  259. // Compute an AABB and a sphere that contains the cluster
  260. Vec4 aabbMin(MAX_F32, MAX_F32, MAX_F32, 0.0f);
  261. Vec4 aabbMax(MIN_F32, MIN_F32, MIN_F32, 0.0f);
  262. for(U32 i = 0; i < 8; ++i)
  263. {
  264. aabbMin = aabbMin.min(clusterEdgesWSpace[clusterZ * 4 + i]);
  265. aabbMax = aabbMax.max(clusterEdgesWSpace[clusterZ * 4 + i]);
  266. }
  267. clusterBoxes[clusterZ] = Aabb(aabbMin, aabbMax);
  268. const Vec4 sphereCenter = (aabbMin + aabbMax) / 2.0f;
  269. clusterSpheres[clusterZ] = Sphere(sphereCenter, (aabbMin - sphereCenter).getLength());
  270. }
  271. // Zero the infos
  272. memset(&tileCtx.m_clusterInfos[0], 0, tileCtx.m_clusterInfos.getSizeInBytes());
  273. #define ANKI_SET_IDX(typeIdx) \
  274. ClusterBin::TileCtx::ClusterMetaInfo& inf = tileCtx.m_clusterInfos[clusterZ]; \
  275. if(ANKI_UNLIKELY(U32(inf.m_offset) + 1 >= m_avgObjectsPerCluster)) \
  276. { \
  277. ANKI_R_LOGW("Out of cluster indices. Increase r_avgObjectsPerCluster"); \
  278. continue; \
  279. } \
  280. tileCtx.getClusterIndices(clusterZ)[inf.m_offset++] = i; \
  281. ++inf.m_counts[typeIdx]; \
  282. ANKI_ASSERT(inf.m_counts[typeIdx] <= m_avgObjectsPerCluster)
  283. // Point lights
  284. {
  285. Sphere lightSphere;
  286. for(U32 i = 0; i < ctx.m_in->m_renderQueue->m_pointLights.getSize(); ++i)
  287. {
  288. const PointLightQueueElement& plight = ctx.m_in->m_renderQueue->m_pointLights[i];
  289. lightSphere.setCenter(plight.m_worldPosition.xyz0());
  290. lightSphere.setRadius(plight.m_radius);
  291. if(!insideClusterFrustum(frustumPlanes, lightSphere))
  292. {
  293. continue;
  294. }
  295. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  296. {
  297. if(!testCollision(lightSphere, clusterBoxes[clusterZ]))
  298. {
  299. continue;
  300. }
  301. ANKI_SET_IDX(0);
  302. }
  303. }
  304. }
  305. // Spot lights
  306. {
  307. Array<Vec4, 5> lightEdges;
  308. lightEdges[0] = Vec4(0.0f); // Eye
  309. ConvexHullShape spotLightShape(&lightEdges[0], lightEdges.getSize());
  310. for(U32 i = 0; i < ctx.m_in->m_renderQueue->m_spotLights.getSize(); ++i)
  311. {
  312. const SpotLightQueueElement& slight = ctx.m_in->m_renderQueue->m_spotLights[i];
  313. computeEdgesOfFrustum(slight.m_distance, slight.m_outerAngle, slight.m_outerAngle, &lightEdges[1]);
  314. spotLightShape.setTransform(Transform(slight.m_worldTransform));
  315. if(!insideClusterFrustum(frustumPlanes, spotLightShape))
  316. {
  317. continue;
  318. }
  319. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  320. {
  321. if(!testCollision(clusterSpheres[clusterZ],
  322. Cone(slight.m_worldTransform.getTranslationPart().xyz0(),
  323. -slight.m_worldTransform.getZAxis(), slight.m_distance, slight.m_outerAngle)))
  324. {
  325. continue;
  326. }
  327. ANKI_SET_IDX(1);
  328. }
  329. }
  330. }
  331. // Probes
  332. {
  333. Aabb probeBox;
  334. for(U32 i = 0; i < ctx.m_in->m_renderQueue->m_reflectionProbes.getSize(); ++i)
  335. {
  336. const ReflectionProbeQueueElement& probe = ctx.m_in->m_renderQueue->m_reflectionProbes[i];
  337. probeBox.setMin(probe.m_aabbMin);
  338. probeBox.setMax(probe.m_aabbMax);
  339. if(!insideClusterFrustum(frustumPlanes, probeBox))
  340. {
  341. continue;
  342. }
  343. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  344. {
  345. if(!testCollision(probeBox, clusterBoxes[clusterZ]))
  346. {
  347. continue;
  348. }
  349. ANKI_SET_IDX(2);
  350. }
  351. }
  352. }
  353. // GI probes
  354. {
  355. Aabb probeBox;
  356. for(U32 i = 0; i < ctx.m_in->m_renderQueue->m_giProbes.getSize(); ++i)
  357. {
  358. const GlobalIlluminationProbeQueueElement& probe = ctx.m_in->m_renderQueue->m_giProbes[i];
  359. probeBox.setMin(probe.m_aabbMin);
  360. probeBox.setMax(probe.m_aabbMax);
  361. if(!insideClusterFrustum(frustumPlanes, probeBox))
  362. {
  363. continue;
  364. }
  365. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  366. {
  367. if(!testCollision(probeBox, clusterBoxes[clusterZ]))
  368. {
  369. continue;
  370. }
  371. ANKI_SET_IDX(3);
  372. }
  373. }
  374. }
  375. // Decals
  376. {
  377. Obb decalBox;
  378. for(U32 i = 0; i < ctx.m_in->m_renderQueue->m_decals.getSize(); ++i)
  379. {
  380. const DecalQueueElement& decal = ctx.m_in->m_renderQueue->m_decals[i];
  381. decalBox.setCenter(decal.m_obbCenter.xyz0());
  382. decalBox.setRotation(Mat3x4(Vec3(0.0f), decal.m_obbRotation));
  383. decalBox.setExtend(decal.m_obbExtend.xyz0());
  384. if(!insideClusterFrustum(frustumPlanes, decalBox))
  385. {
  386. continue;
  387. }
  388. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  389. {
  390. if(!testCollision(decalBox, clusterBoxes[clusterZ]))
  391. {
  392. continue;
  393. }
  394. ANKI_SET_IDX(4);
  395. }
  396. }
  397. }
  398. // Fog volumes
  399. {
  400. for(U32 i = 0; i < ctx.m_in->m_renderQueue->m_fogDensityVolumes.getSize(); ++i)
  401. {
  402. const FogDensityQueueElement& fogVol = ctx.m_in->m_renderQueue->m_fogDensityVolumes[i];
  403. if(fogVol.m_isBox)
  404. {
  405. Aabb box;
  406. box.setMin(fogVol.m_aabbMin);
  407. box.setMax(fogVol.m_aabbMax);
  408. if(!insideClusterFrustum(frustumPlanes, box))
  409. {
  410. continue;
  411. }
  412. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  413. {
  414. if(!testCollision(box, clusterBoxes[clusterZ]))
  415. {
  416. continue;
  417. }
  418. ANKI_SET_IDX(5);
  419. }
  420. }
  421. else
  422. {
  423. Sphere sphere;
  424. sphere.setCenter(fogVol.m_sphereCenter.xyz0());
  425. sphere.setRadius(fogVol.m_sphereRadius);
  426. if(!insideClusterFrustum(frustumPlanes, sphere))
  427. {
  428. continue;
  429. }
  430. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  431. {
  432. if(!testCollision(sphere, clusterBoxes[clusterZ]))
  433. {
  434. continue;
  435. }
  436. ANKI_SET_IDX(5);
  437. }
  438. }
  439. }
  440. }
  441. // Upload the indices for all clusters of the tile
  442. for(U32 clusterZ = 0; clusterZ < m_clusterCounts[2]; ++clusterZ)
  443. {
  444. WeakArray<U32> inIndices = tileCtx.getClusterIndices(clusterZ);
  445. const ClusterBin::TileCtx::ClusterMetaInfo& inf = tileCtx.m_clusterInfos[clusterZ];
  446. const U32 other = (TYPED_OBJECT_COUNT - 1) + TYPED_OBJECT_COUNT;
  447. const U32 indexCountPlusOther = inf.m_offset + other;
  448. ANKI_ASSERT(indexCountPlusOther <= m_avgObjectsPerCluster + other);
  449. ANKI_ASSERT(indexCountPlusOther >= other);
  450. // Write indices
  451. const U32 firstIndex = ctx.m_allocatedIndexCount.fetchAdd(indexCountPlusOther);
  452. ANKI_ASSERT(firstIndex + indexCountPlusOther <= ctx.m_lightIds.getSize());
  453. WeakArray<U32> outIndices(&ctx.m_lightIds[firstIndex], indexCountPlusOther);
  454. // Write the offsets
  455. U32 offset = firstIndex + TYPED_OBJECT_COUNT - 1;
  456. for(U32 i = 1; i < TYPED_OBJECT_COUNT; ++i)
  457. {
  458. offset += inf.m_counts[i - 1] + 1; // Count plus the stop
  459. outIndices[i - 1] = offset;
  460. }
  461. // Write indices
  462. U32 outIndicesOffset = TYPED_OBJECT_COUNT - 1;
  463. U32 inIndicesOffset = 0;
  464. for(U32 i = 0; i < TYPED_OBJECT_COUNT; ++i)
  465. {
  466. for(U32 c = 0; c < inf.m_counts[i]; ++c)
  467. {
  468. outIndices[outIndicesOffset++] = inIndices[inIndicesOffset++];
  469. }
  470. // Stop
  471. outIndices[outIndicesOffset++] = MAX_U32;
  472. }
  473. ANKI_ASSERT(inIndicesOffset == inf.m_offset);
  474. ANKI_ASSERT(outIndicesOffset == indexCountPlusOther);
  475. // Write the cluster
  476. const U32 clusterIndex =
  477. clusterZ * (m_clusterCounts[0] * m_clusterCounts[1]) + tileY * m_clusterCounts[0] + tileX;
  478. ctx.m_clusters[clusterIndex] = firstIndex + TYPED_OBJECT_COUNT - 1; // Points to the first object
  479. }
  480. }
  481. void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const
  482. {
  483. const RenderQueue& rqueue = *ctx.m_in->m_renderQueue;
  484. // Write the point lights
  485. const U32 visiblePointLightCount = rqueue.m_pointLights.getSize();
  486. if(visiblePointLightCount)
  487. {
  488. PointLight* data = static_cast<PointLight*>(ctx.m_in->m_stagingMem->allocateFrame(
  489. sizeof(PointLight) * visiblePointLightCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_pointLightsToken));
  490. WeakArray<PointLight> gpuLights(data, visiblePointLightCount);
  491. for(U32 i = 0; i < visiblePointLightCount; ++i)
  492. {
  493. const PointLightQueueElement& in = rqueue.m_pointLights[i];
  494. PointLight& out = gpuLights[i];
  495. out.m_position = in.m_worldPosition;
  496. out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
  497. out.m_diffuseColor = in.m_diffuseColor;
  498. if(in.m_shadowRenderQueues[0] == nullptr || !ctx.m_in->m_shadowsEnabled)
  499. {
  500. out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
  501. }
  502. else
  503. {
  504. out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
  505. ANKI_ASSERT(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets));
  506. memcpy(&out.m_shadowAtlasTileOffsets[0], &in.m_shadowAtlasTileOffsets[0],
  507. sizeof(in.m_shadowAtlasTileOffsets));
  508. }
  509. out.m_radius = in.m_radius;
  510. }
  511. }
  512. else
  513. {
  514. ctx.m_out->m_pointLightsToken.markUnused();
  515. }
  516. // Write the spot lights
  517. const U32 visibleSpotLightCount = rqueue.m_spotLights.getSize();
  518. if(visibleSpotLightCount)
  519. {
  520. SpotLight* data = static_cast<SpotLight*>(ctx.m_in->m_stagingMem->allocateFrame(
  521. sizeof(SpotLight) * visibleSpotLightCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_spotLightsToken));
  522. WeakArray<SpotLight> gpuLights(data, visibleSpotLightCount);
  523. for(U32 i = 0; i < visibleSpotLightCount; ++i)
  524. {
  525. const SpotLightQueueElement& in = rqueue.m_spotLights[i];
  526. SpotLight& out = gpuLights[i];
  527. F32 shadowmapIndex = INVALID_TEXTURE_INDEX;
  528. if(in.hasShadow() && ctx.m_in->m_shadowsEnabled)
  529. {
  530. // bias * proj_l * view_l
  531. out.m_texProjectionMat = in.m_textureMatrix;
  532. shadowmapIndex = 1.0f; // Just set a value
  533. }
  534. // Pos & dist
  535. out.m_position = in.m_worldTransform.getTranslationPart().xyz();
  536. out.m_squareRadiusOverOne = 1.0f / (in.m_distance * in.m_distance);
  537. // Diff color and shadowmap ID now
  538. out.m_diffuseColor = in.m_diffuseColor;
  539. out.m_shadowmapId = shadowmapIndex;
  540. // Light dir & radius
  541. Vec3 lightDir = -in.m_worldTransform.getRotationPart().getZAxis();
  542. out.m_dir = lightDir;
  543. out.m_radius = in.m_distance;
  544. // Angles
  545. out.m_outerCos = cos(in.m_outerAngle / 2.0f);
  546. out.m_innerCos = cos(in.m_innerAngle / 2.0f);
  547. }
  548. }
  549. else
  550. {
  551. ctx.m_out->m_spotLightsToken.markUnused();
  552. }
  553. // Write the decals
  554. const U32 visibleDecalCount = rqueue.m_decals.getSize();
  555. if(visibleDecalCount)
  556. {
  557. Decal* data = static_cast<Decal*>(ctx.m_in->m_stagingMem->allocateFrame(
  558. sizeof(Decal) * visibleDecalCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_decalsToken));
  559. WeakArray<Decal> gpuDecals(data, visibleDecalCount);
  560. TextureView* diffuseAtlas = nullptr;
  561. TextureView* specularRoughnessAtlas = nullptr;
  562. for(U32 i = 0; i < visibleDecalCount; ++i)
  563. {
  564. const DecalQueueElement& in = rqueue.m_decals[i];
  565. Decal& out = gpuDecals[i];
  566. if((diffuseAtlas != nullptr && diffuseAtlas != in.m_diffuseAtlas)
  567. || (specularRoughnessAtlas != nullptr && specularRoughnessAtlas != in.m_specularRoughnessAtlas))
  568. {
  569. ANKI_R_LOGF("All decals should have the same tex atlas");
  570. }
  571. diffuseAtlas = in.m_diffuseAtlas;
  572. specularRoughnessAtlas = in.m_specularRoughnessAtlas;
  573. // Diff
  574. Vec4 uv = in.m_diffuseAtlasUv;
  575. out.m_diffUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
  576. out.m_blendFactors[0] = in.m_diffuseAtlasBlendFactor;
  577. // Other
  578. uv = in.m_specularRoughnessAtlasUv;
  579. out.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
  580. out.m_blendFactors[1] = in.m_specularRoughnessAtlasBlendFactor;
  581. // bias * proj_l * view
  582. out.m_texProjectionMat = in.m_textureMatrix;
  583. }
  584. ANKI_ASSERT(diffuseAtlas || specularRoughnessAtlas);
  585. ctx.m_out->m_diffDecalTexView.reset(diffuseAtlas);
  586. ctx.m_out->m_specularRoughnessDecalTexView.reset(specularRoughnessAtlas);
  587. }
  588. else
  589. {
  590. ctx.m_out->m_decalsToken.markUnused();
  591. }
  592. // Write the probes
  593. const U32 visibleProbeCount = rqueue.m_reflectionProbes.getSize();
  594. if(visibleProbeCount)
  595. {
  596. ReflectionProbe* data = static_cast<ReflectionProbe*>(
  597. ctx.m_in->m_stagingMem->allocateFrame(sizeof(ReflectionProbe) * visibleProbeCount,
  598. StagingGpuMemoryType::UNIFORM, ctx.m_out->m_reflectionProbesToken));
  599. WeakArray<ReflectionProbe> gpuProbes(data, visibleProbeCount);
  600. for(U32 i = 0; i < visibleProbeCount; ++i)
  601. {
  602. const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
  603. ReflectionProbe& out = gpuProbes[i];
  604. out.m_position = in.m_worldPosition;
  605. out.m_cubemapIndex = F32(in.m_textureArrayIndex);
  606. out.m_aabbMin = in.m_aabbMin;
  607. out.m_aabbMax = in.m_aabbMax;
  608. }
  609. }
  610. else
  611. {
  612. ctx.m_out->m_reflectionProbesToken.markUnused();
  613. }
  614. // Fog volumes
  615. const U32 visibleFogVolumeCount = rqueue.m_fogDensityVolumes.getSize();
  616. if(visibleFogVolumeCount)
  617. {
  618. FogDensityVolume* data = static_cast<FogDensityVolume*>(
  619. ctx.m_in->m_stagingMem->allocateFrame(sizeof(FogDensityVolume) * visibleFogVolumeCount,
  620. StagingGpuMemoryType::UNIFORM, ctx.m_out->m_fogDensityVolumesToken));
  621. WeakArray<FogDensityVolume> gpuFogVolumes(data, visibleFogVolumeCount);
  622. for(U32 i = 0; i < visibleFogVolumeCount; ++i)
  623. {
  624. const FogDensityQueueElement& in = rqueue.m_fogDensityVolumes[i];
  625. FogDensityVolume& out = gpuFogVolumes[i];
  626. out.m_density = in.m_density;
  627. if(in.m_isBox)
  628. {
  629. out.m_isBox = 1;
  630. out.m_aabbMinOrSphereCenter = in.m_aabbMin;
  631. out.m_aabbMaxOrSphereRadiusSquared = in.m_aabbMax;
  632. }
  633. else
  634. {
  635. out.m_isBox = 0;
  636. out.m_aabbMinOrSphereCenter = in.m_sphereCenter;
  637. out.m_aabbMaxOrSphereRadiusSquared = Vec3(in.m_sphereRadius * in.m_sphereRadius);
  638. }
  639. }
  640. }
  641. else
  642. {
  643. ctx.m_out->m_fogDensityVolumesToken.markUnused();
  644. }
  645. // Write the probes
  646. const U32 visibleGiProbeCount = rqueue.m_giProbes.getSize();
  647. if(visibleGiProbeCount)
  648. {
  649. GlobalIlluminationProbe* data = static_cast<GlobalIlluminationProbe*>(ctx.m_in->m_stagingMem->allocateFrame(
  650. sizeof(GlobalIlluminationProbe) * visibleGiProbeCount, StagingGpuMemoryType::UNIFORM,
  651. ctx.m_out->m_globalIlluminationProbesToken));
  652. WeakArray<GlobalIlluminationProbe> gpuProbes(data, visibleGiProbeCount);
  653. for(U32 i = 0; i < visibleGiProbeCount; ++i)
  654. {
  655. const GlobalIlluminationProbeQueueElement& in = rqueue.m_giProbes[i];
  656. GlobalIlluminationProbe& out = gpuProbes[i];
  657. out.m_aabbMin = in.m_aabbMin;
  658. out.m_aabbMax = in.m_aabbMax;
  659. out.m_textureIndex = U32(&in - &rqueue.m_giProbes.getFront());
  660. out.m_halfTexelSizeU = 1.0f / F32(F32(in.m_cellCounts.x()) * 6.0f) / 2.0f;
  661. out.m_fadeDistance = in.m_fadeDistance;
  662. }
  663. }
  664. else
  665. {
  666. ctx.m_out->m_globalIlluminationProbesToken.markUnused();
  667. }
  668. }
  669. } // end namespace anki