|
|
@@ -3,9 +3,6 @@
|
|
|
// Code licensed under the BSD License.
|
|
|
// http://www.anki3d.org/LICENSE
|
|
|
|
|
|
-// For those platforms that don't support 64bit atomics try to do the atomics in 32bit
|
|
|
-#pragma anki mutator SUPPORTS_64BIT_ATOMICS 0 1
|
|
|
-
|
|
|
ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0u);
|
|
|
ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1u);
|
|
|
ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2u);
|
|
|
@@ -26,22 +23,11 @@ layout(set = 0, binding = 0, scalar) uniform b_unis
|
|
|
ClusteredShadingUniforms u_unis;
|
|
|
};
|
|
|
|
|
|
-layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters64
|
|
|
-{
|
|
|
- Cluster u_clusters64[];
|
|
|
-};
|
|
|
-
|
|
|
-layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters32
|
|
|
+layout(set = 0, binding = 1, scalar) writeonly buffer b_clusters
|
|
|
{
|
|
|
- Cluster32 u_clusters32[];
|
|
|
+ Cluster u_clusters[];
|
|
|
};
|
|
|
|
|
|
-#if SUPPORTS_64BIT_ATOMICS
|
|
|
-# define u_clusters u_clusters64
|
|
|
-#else
|
|
|
-# define u_clusters u_clusters32
|
|
|
-#endif
|
|
|
-
|
|
|
layout(set = 0, binding = 2, scalar) uniform b_pointLights
|
|
|
{
|
|
|
PointLight u_pointLights[MAX_VISIBLE_POINT_LIGHTS];
|
|
|
@@ -82,60 +68,10 @@ UVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] = UVec2[](LOCATION(-2, -6), LOCATION(6, -2)
|
|
|
|
|
|
// A mask per tile of this workgroup for the clusterer object being processed by this workgroup
|
|
|
const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
|
|
|
-
|
|
|
-#if SUPPORTS_64BIT_ATOMICS
|
|
|
-shared U64 s_tileMasks[TILES_PER_WORKGROUP];
|
|
|
-#else
|
|
|
-shared U32 s_tileMasks[TILES_PER_WORKGROUP][2u];
|
|
|
-#endif
|
|
|
+shared ExtendedClusterObjectMask s_tileMasks[TILES_PER_WORKGROUP];
|
|
|
|
|
|
// A mask for each Z split for a specific clusterer object
|
|
|
-#if SUPPORTS_64BIT_ATOMICS
|
|
|
-shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
|
|
|
-#else
|
|
|
-shared U32 s_zSplitMasks[Z_SPLIT_COUNT][2u];
|
|
|
-#endif
|
|
|
-
|
|
|
-#if SUPPORTS_64BIT_ATOMICS
|
|
|
-# define atomicOr2x32_64(dest, src) atomicOr(dest, src)
|
|
|
-
|
|
|
-# define atomicOr2x32_2x32(dest, src) atomicOr(dest, src)
|
|
|
-
|
|
|
-# define atomicOr32_2X32(dest, src) atomicOr(dest, U32(src))
|
|
|
-
|
|
|
-# define zero2x32(dest) \
|
|
|
- do \
|
|
|
- { \
|
|
|
- dest = 0ul; \
|
|
|
- } while(false)
|
|
|
-
|
|
|
-# define isZero2x32(src) (src == 0ul)
|
|
|
-#else // !SUPPORTS_64BIT_ATOMICS
|
|
|
-# define atomicOr2x32_64(dest, src) \
|
|
|
- do \
|
|
|
- { \
|
|
|
- atomicOr(dest[0u], U32(src)); \
|
|
|
- atomicOr(dest[1u], U32(src >> 32ul)); \
|
|
|
- } while(false)
|
|
|
-
|
|
|
-# define atomicOr2x32_2x32(dest, src) \
|
|
|
- do \
|
|
|
- { \
|
|
|
- atomicOr(dest[0u], src[0u]); \
|
|
|
- atomicOr(dest[1u], src[1u]); \
|
|
|
- } while(false)
|
|
|
-
|
|
|
-# define atomicOr32_2X32(dest, src) atomicOr(dest, src[1u])
|
|
|
-
|
|
|
-# define zero2x32(dest) \
|
|
|
- do \
|
|
|
- { \
|
|
|
- dest[0u] = 0u; \
|
|
|
- dest[1u] = 0u; \
|
|
|
- } while(false)
|
|
|
-
|
|
|
-# define isZero2x32(src) ((src[0u] | src[1u]) == 0u)
|
|
|
-#endif
|
|
|
+shared ExtendedClusterObjectMask s_zSplitMasks[Z_SPLIT_COUNT];
|
|
|
|
|
|
Bool isPointLight()
|
|
|
{
|
|
|
@@ -196,12 +132,12 @@ void main()
|
|
|
const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
|
|
|
|
|
|
// Zero shared memory
|
|
|
- zero2x32(s_tileMasks[localTileIdx]);
|
|
|
+ s_tileMasks[localTileIdx] = ExtendedClusterObjectMask(0);
|
|
|
const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
|
|
|
for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
|
|
|
i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
|
|
|
{
|
|
|
- zero2x32(s_zSplitMasks[i]);
|
|
|
+ s_zSplitMasks[i] = ExtendedClusterObjectMask(0);
|
|
|
}
|
|
|
memoryBarrierShared();
|
|
|
barrier();
|
|
|
@@ -302,8 +238,9 @@ void main()
|
|
|
if(collides)
|
|
|
{
|
|
|
// Set the tile
|
|
|
- const U64 mask = 1ul << U64(objectArrayIdx);
|
|
|
- atomicOr2x32_64(s_tileMasks[localTileIdx], mask);
|
|
|
+ const ExtendedClusterObjectMask mask = ExtendedClusterObjectMask(1)
|
|
|
+ << ExtendedClusterObjectMask(objectArrayIdx);
|
|
|
+ atomicOr(s_tileMasks[localTileIdx], mask);
|
|
|
|
|
|
// Compute and set the Z splits
|
|
|
const Vec3 hitpointA = rayDir * t0 + rayOrigin;
|
|
|
@@ -331,7 +268,7 @@ void main()
|
|
|
clamp(I32(maxDistFromNearPlane * u_unis.m_zSplitCountOverFrustumLength), 0, I32(Z_SPLIT_COUNT) - 1);
|
|
|
for(I32 i = startZSplit; i <= endZSplit; ++i)
|
|
|
{
|
|
|
- atomicOr2x32_64(s_zSplitMasks[i], mask);
|
|
|
+ atomicOr(s_zSplitMasks[i], mask);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -340,31 +277,31 @@ void main()
|
|
|
barrier();
|
|
|
|
|
|
// First sample writes the tile
|
|
|
- if(sampleIdx == 0u && !isZero2x32(s_tileMasks[localTileIdx]))
|
|
|
+ if(sampleIdx == 0u && s_tileMasks[localTileIdx] != ExtendedClusterObjectMask(0))
|
|
|
{
|
|
|
if(isPointLight())
|
|
|
{
|
|
|
- atomicOr2x32_2x32(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr(u_clusters[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isSpotLight())
|
|
|
{
|
|
|
- atomicOr2x32_2x32(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr(u_clusters[tileIdx].m_spotLightsMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isDecal())
|
|
|
{
|
|
|
- atomicOr2x32_2x32(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr(u_clusters[tileIdx].m_decalsMask, s_tileMasks[localTileIdx]);
|
|
|
}
|
|
|
else if(isFogVolume())
|
|
|
{
|
|
|
- atomicOr32_2X32(u_clusters[tileIdx].m_fogDensityVolumesMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr(u_clusters[tileIdx].m_fogDensityVolumesMask, U32(s_tileMasks[localTileIdx]));
|
|
|
}
|
|
|
else if(isReflectionProbe())
|
|
|
{
|
|
|
- atomicOr32_2X32(u_clusters[tileIdx].m_reflectionProbesMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr(u_clusters[tileIdx].m_reflectionProbesMask, U32(s_tileMasks[localTileIdx]));
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- atomicOr32_2X32(u_clusters[tileIdx].m_giProbesMask, s_tileMasks[localTileIdx]);
|
|
|
+ atomicOr(u_clusters[tileIdx].m_giProbesMask, U32(s_tileMasks[localTileIdx]));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -372,31 +309,31 @@ void main()
|
|
|
for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
|
|
|
i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
|
|
|
{
|
|
|
- if(!isZero2x32(s_zSplitMasks[i]))
|
|
|
+ if(s_zSplitMasks[i] != ExtendedClusterObjectMask(0))
|
|
|
{
|
|
|
if(isPointLight())
|
|
|
{
|
|
|
- atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr(u_clusters[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isSpotLight())
|
|
|
{
|
|
|
- atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr(u_clusters[TILE_COUNT + i].m_spotLightsMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isDecal())
|
|
|
{
|
|
|
- atomicOr2x32_2x32(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr(u_clusters[TILE_COUNT + i].m_decalsMask, s_zSplitMasks[i]);
|
|
|
}
|
|
|
else if(isFogVolume())
|
|
|
{
|
|
|
- atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr(u_clusters[TILE_COUNT + i].m_fogDensityVolumesMask, U32(s_zSplitMasks[i]));
|
|
|
}
|
|
|
else if(isReflectionProbe())
|
|
|
{
|
|
|
- atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr(u_clusters[TILE_COUNT + i].m_reflectionProbesMask, U32(s_zSplitMasks[i]));
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- atomicOr32_2X32(u_clusters[TILE_COUNT + i].m_giProbesMask, s_zSplitMasks[i]);
|
|
|
+ atomicOr(u_clusters[TILE_COUNT + i].m_giProbesMask, U32(s_zSplitMasks[i]));
|
|
|
}
|
|
|
}
|
|
|
}
|