|
|
@@ -3,6 +3,9 @@
|
|
|
// Code licensed under the BSD License.
|
|
|
// http://www.anki3d.org/LICENSE
|
|
|
|
|
|
+// For those platforms that don't support 64bit atomics try to do the atomics in 32bit
|
|
|
+#pragma anki mutator SUPPORTS_64BIT_ATOMICS 0 1
|
|
|
+
|
|
|
ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0u);
|
|
|
ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1u);
|
|
|
ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2u);
|
|
|
@@ -23,11 +26,22 @@ layout(set = 0, binding = 0, scalar) uniform b_unis
|
|
|
ClusteredShadingUniforms u_unis;
|
|
|
};
|
|
|
|
|
|
-layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters
|
|
|
+layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters64
|
|
|
+{
|
|
|
+ Cluster u_clusters64[];
|
|
|
+};
|
|
|
+
|
|
|
+layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters32
|
|
|
{
|
|
|
- Cluster u_clusters[];
|
|
|
+ Cluster32 u_clusters32[];
|
|
|
};
|
|
|
|
|
|
+#if SUPPORTS_64BIT_ATOMICS
|
|
|
+# define u_clusters u_clusters64
|
|
|
+#else
|
|
|
+# define u_clusters u_clusters32
|
|
|
+#endif
|
|
|
+
|
|
|
layout(set = 0, binding = 2, scalar) uniform b_pointLights
|
|
|
{
|
|
|
PointLight u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
|
|
|
@@ -68,10 +82,60 @@ UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2)
|
|
|
|
|
|
// A mask per tile of this workgroup for the clusterer object being processed by this workgroup
|
|
|
const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
|
|
|
+
|
|
|
+#if SUPPORTS_64BIT_ATOMICS
|
|
|
shared U64 s_tileMasks[TILES_PER_WORKGROUP];
|
|
|
+#else
|
|
|
+shared U32 s_tileMasks[TILES_PER_WORKGROUP][2u];
|
|
|
+#endif
|
|
|
|
|
|
// A mask for each Z split for a specific clusterer object
|
|
|
+#if SUPPORTS_64BIT_ATOMICS
|
|
|
shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
|
|
|
+#else
|
|
|
+shared U32 s_zSplitMasks[Z_SPLIT_COUNT][2u];
|
|
|
+#endif
|
|
|
+
|
|
|
+#if SUPPORTS_64BIT_ATOMICS
|
|
|
+# define atomicOr2x32_64(dest, src) atomicOr(dest, src)
|
|
|
+
|
|
|
+# define atomicOr2x32_2x32(dest, src) atomicOr(dest, src)
|
|
|
+
|
|
|
+# define atomicOr32_2X32(dest, src) atomicOr(dest, U32(src))
|
|
|
+
|
|
|
+# define zero2x32(dest) \
|
|
|
+ do \
|
|
|
+ { \
|
|
|
+ dest = 0ul; \
|
|
|
+ } while(false)
|
|
|
+
|
|
|
+# define isZero2x32(src) (src == 0ul)
|
|
|
+#else // !SUPPORTS_64BIT_ATOMICS
|
|
|
+# define atomicOr2x32_64(dest, src) \
|
|
|
+ do \
|
|
|
+ { \
|
|
|
+ atomicOr(dest[0u], U32(src)); \
|
|
|
+ atomicOr(dest[1u], U32(src >> 32ul)); \
|
|
|
+ } while(false)
|
|
|
+
|
|
|
+# define atomicOr2x32_2x32(dest, src) \
|
|
|
+ do \
|
|
|
+ { \
|
|
|
+ atomicOr(dest[0u], src[0u]); \
|
|
|
+ atomicOr(dest[1u], src[1u]); \
|
|
|
+ } while(false)
|
|
|
+
|
|
|
+# define atomicOr32_2X32(dest, src) atomicOr(dest, src[1u])
|
|
|
+
|
|
|
+# define zero2x32(dest) \
|
|
|
+ do \
|
|
|
+ { \
|
|
|
+ dest[0u] = 0u; \
|
|
|
+ dest[1u] = 0u; \
|
|
|
+ } while(false)
|
|
|
+
|
|
|
+# define isZero2x32(src) ((src[0u] | src[1u]) == 0u)
|
|
|
+#endif
|
|
|
|
|
|
Bool isPointLight()
|
|
|
{
|
|
|
@@ -132,12 +196,12 @@ void main()
|
|
|
const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
|
|
|
|
|
|
// Zero shared memory
|
|
|
- s_tileMasks[localTileIdx] = 0ul;
|
|
|
+ zero2x32(s_tileMasks[localTileIdx]);
|
|
|
const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
|
|
|
for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
|
|
|
i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
|
|
|
{
|
|
|
- s_zSplitMasks[i] = 0ul;
|
|
|
+ zero2x32(s_zSplitMasks[i]);
|
|
|
}
|
|
|
memoryBarrierShared();
|
|
|
barrier();
|
|
|
@@ -239,7 +303,7 @@ void main()
|
|
|
{
|
|
|
// Set the tile
|
|
|
const U64 mask = 1ul << U64(objectArrayIdx);
|
|
|
- atomicOr(s_tileMasks[localTileIdx], mask);
|
|
|
+ atomicOr2x32_64(s_tileMasks[localTileIdx], mask);
|
|
|
|
|
|
// Compute and set the Z splits
|
|
|
const Vec3 hitpointA = rayDir * t0 + rayOrigin;
|
|
|
@@ -267,7 +331,7 @@ void main()
|
|
|
clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
|
|
|
for(I32 i = startZSplit; i <= endZSplit; ++i)
|
|
|
{
|
|
|
- atomicOr(s_zSplitMasks[i], mask);
|
|
|
+ atomicOr2x32_64(s_zSplitMasks[i], mask);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -276,31 +340,31 @@ void main()
|
|
|
barrier();
|
|
|
|
|
|
// First sample writes the tile
|
|
|
- if(sampleIdx == 0u && s_tileMasks[localTileIdx] != 0ul)
|
|
|
+ if(sampleIdx == 0u && !isZero2x32(s_tileMasks[localTileIdx]))
|
|
|
{
|
|
|
if(isPointLight())
|
|
|
{
|
|
|
- atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr2x32_2x32(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isSpotLight())
|
|
|
{
|
|
|
- atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr2x32_2x32(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isDecal())
|
|
|
{
|
|
|
- atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr2x32_2x32(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isFogVolume())
|
|
|
{
|
|
|
- atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
|
|
|
+ atomicOr32_2X32(u_clusters[tileIdx].m_fogDensityVolumesMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isReflectionProbe())
|
|
|
{
|
|
|
- atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
|
|
|
+ atomicOr32_2X32(u_clusters[tileIdx].m_reflectionProbesMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
|
|
|
+ atomicOr32_2X32(u_clusters[tileIdx].m_giProbesMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -308,31 +372,31 @@ void main()
|
|
|
for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
|
|
|
i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
|
|
|
{
|
|
|
- if(s_zSplitMasks[i] != 0ul)
|
|
|
+ if(!isZero2x32(s_zSplitMasks[i]))
|
|
|
{
|
|
|
if(isPointLight())
|
|
|
{
|
|
|
- atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isSpotLight())
|
|
|
{
|
|
|
- atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isDecal())
|
|
|
{
|
|
|
- atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isFogVolume())
|
|
|
{
|
|
|
- atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
|
|
|
+ atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isReflectionProbe())
|
|
|
{
|
|
|
- atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
|
|
|
+ atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
|
|
|
+ atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_giProbesMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
}
|
|
|
}
|