// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE #pragma anki mutator SAMPLE_RESOLVE_TYPE 0 1 2 // 0: average, 1: min, 2: max #pragma anki mutator WAVE_OPERATIONS 0 1 #pragma anki mutator REDUCTION_SAMPLER 0 1 #define AVG 0 #define MIN 1 #define MAX 2 #pragma anki start comp #include layout(local_size_x = 256) in; layout(push_constant, std430) uniform b_pc { U32 u_workgroupCount; U32 u_mipmapCount; Vec2 u_srcTexSizeOverOne; U32 u_lastMipWidth; U32 u_padding[3u]; }; layout(set = 0, binding = 0) uniform image2D u_dstImages[12u]; layout(set = 0, binding = 1) coherent uniform image2D u_dstImage5; layout(set = 0, binding = 2) coherent buffer b_atomic { U32 u_spdCounter; }; layout(std430, set = 0, binding = 3) writeonly buffer b_cb { F32 u_clientBuf[]; }; layout(set = 0, binding = 4) uniform sampler u_reductionSampler; ///< Special sampler that can do min/max reduction. layout(set = 0, binding = 5) uniform texture2D u_srcTex; // Include SPD #define A_GPU 1 #define A_GLSL 1 #include shared AU1 s_spdCounter; shared AF1 s_spdIntermediateR[16][16]; F32 reduce(Vec4 depths) { #if SAMPLE_RESOLVE_TYPE == MIN return min(depths.x, min(depths.y, min(depths.z, depths.w))); #elif SAMPLE_RESOLVE_TYPE == MAX return max(depths.x, max(depths.y, max(depths.z, depths.w))); #elif SAMPLE_RESOLVE_TYPE == AVG return (depths.x + depths.y + depths.z + depths.w) * 0.25; #else # error See file #endif } AF4 SpdLoadSourceImage(AU2 p, AU1 slice) { const AF2 textureCoord = Vec2(p) * u_srcTexSizeOverOne + u_srcTexSizeOverOne; #if REDUCTION_SAMPLER == 1 return AF4(textureLod(u_srcTex, u_reductionSampler, textureCoord, 0.0).r, 0.0, 0.0, 0.0); #else const Vec4 depths = textureGather(sampler2D(u_srcTex, u_reductionSampler), textureCoord, 0); return AF4(reduce(depths), 0.0, 0.0, 0.0); #endif } AF4 SpdLoad(AU2 p, AU1 slice) { return AF4(imageLoad(u_dstImage5, IVec2(p)).r, 0.0, 0.0, 0.0); } void SpdStore(AU2 p, AF4 value, AU1 mip, AU1 slice) { if(mip == 5u) { imageStore(u_dstImage5, IVec2(p), Vec4(value.x, 0.0, 0.0, 0.0)); } else { imageStore(u_dstImages[mip], IVec2(p), Vec4(value.x, 0.0, 0.0, 0.0)); } // Store the last mip to the buffer as well if(mip == u_mipmapCount - 1u) { const U32 idx = p.y * u_lastMipWidth + p.x; u_clientBuf[idx] = value.x; } } void SpdIncreaseAtomicCounter(AU1 slice) { s_spdCounter = atomicAdd(u_spdCounter, 1u); } AU1 SpdGetAtomicCounter() { return s_spdCounter; } void SpdResetAtomicCounter(AU1 slice) { u_spdCounter = 0u; } AF4 SpdLoadIntermediate(AU1 x, AU1 y) { return AF4(s_spdIntermediateR[x][y], 0.0, 0.0, 0.0); } void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) { s_spdIntermediateR[x][y] = value.x; } AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) { return AF4(reduce(Vec4(v0.x, v1.x, v2.x, v3.x)), 0.0, 0.0, 0.0); } #define SPD_LINEAR_SAMPLER 1 #if WAVE_OPERATIONS == 0 # define SPD_NO_WAVE_OPERATIONS 1 #endif #include void main() { const U32 slice = 0u; const UVec2 offset = UVec2(0u); SpdDownsample(AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), AU1(u_mipmapCount), AU1(u_workgroupCount), slice, offset); } #pragma anki end