Browse Source

Compute: Rename Write Only suffix _WR to _WO.

Бранимир Караџић 1 year ago
parent
commit
932302d8f4
28 changed files with 130 additions and 128 deletions
  1. 1 1
      examples/08-update/cs_update.sc
  2. 1 1
      examples/24-nbody/cs_indirect.sc
  3. 2 2
      examples/24-nbody/cs_init_instances.sc
  4. 2 2
      examples/24-nbody/cs_update_instances.sc
  5. 1 1
      examples/37-gpudrivenrendering/cs_gdr_copy_z.sc
  6. 1 1
      examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc
  7. 1 1
      examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc
  8. 1 1
      examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc
  9. 1 1
      examples/39-assao/cs_assao_apply.sc
  10. 1 1
      examples/39-assao/cs_assao_generate_importance_map.sc
  11. 1 1
      examples/39-assao/cs_assao_generate_q.sh
  12. 1 1
      examples/39-assao/cs_assao_load_counter_clear.sc
  13. 1 1
      examples/39-assao/cs_assao_non_smart_apply.sc
  14. 1 1
      examples/39-assao/cs_assao_non_smart_blur.sc
  15. 1 1
      examples/39-assao/cs_assao_non_smart_half_apply.sc
  16. 1 1
      examples/39-assao/cs_assao_postprocess_importance_map_a.sc
  17. 1 1
      examples/39-assao/cs_assao_postprocess_importance_map_b.sc
  18. 4 4
      examples/39-assao/cs_assao_prepare_depth_mip.sc
  19. 4 4
      examples/39-assao/cs_assao_prepare_depths.sc
  20. 5 5
      examples/39-assao/cs_assao_prepare_depths_and_normals.sc
  21. 3 3
      examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc
  22. 2 2
      examples/39-assao/cs_assao_prepare_depths_half.sc
  23. 1 1
      examples/39-assao/cs_assao_smart_blur.sc
  24. 1 1
      examples/39-assao/cs_assao_smart_blur_wide.sc
  25. 2 2
      examples/41-tess/cs_terrain_init.sc
  26. 71 69
      examples/46-fsr/cs_fsr.h
  27. 3 3
      examples/48-drawindirect/cs_drawindirect.sc
  28. 15 15
      src/bgfx_compute.sh

+ 1 - 1
examples/08-update/cs_update.sc

@@ -5,7 +5,7 @@
 
 #include "bgfx_compute.sh"
 
-IMAGE2D_ARRAY_WR(s_texColor, rgba8, 0);
+IMAGE2D_ARRAY_WO(s_texColor, rgba8, 0);
 uniform vec4 u_time;
 
 NUM_THREADS(16, 16, 1)

+ 1 - 1
examples/24-nbody/cs_indirect.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh"
 #include "uniforms.sh"
 
-BUFFER_WR(indirectBuffer, uvec4, 0);
+BUFFER_WO(indirectBuffer, uvec4, 0);
 
 NUM_THREADS(1, 1, 1)
 void main()

+ 2 - 2
examples/24-nbody/cs_init_instances.sc

@@ -6,8 +6,8 @@
 #include "bgfx_compute.sh"
 #include "uniforms.sh"
 
-BUFFER_WR(prevPositionBuffer, vec4, 0);
-BUFFER_WR(currPositionBuffer, vec4, 1);
+BUFFER_WO(prevPositionBuffer, vec4, 0);
+BUFFER_WO(currPositionBuffer, vec4, 1);
 
 uint rotl(uint _x, uint _r)
 {

+ 2 - 2
examples/24-nbody/cs_update_instances.sc

@@ -8,8 +8,8 @@
 
 BUFFER_RO(prevPositionBuffer,    vec4, 0);
 BUFFER_RO(currPositionBuffer,    vec4, 1);
-BUFFER_WR(outPrevPositionBuffer, vec4, 2);
-BUFFER_WR(outCurrPositionBuffer, vec4, 3);
+BUFFER_WO(outPrevPositionBuffer, vec4, 2);
+BUFFER_WO(outCurrPositionBuffer, vec4, 3);
 
 #define GROUP_SIZE 512
 SHARED vec3 otherEntries[GROUP_SIZE];

+ 1 - 1
examples/37-gpudrivenrendering/cs_gdr_copy_z.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh"
 
 SAMPLER2D(s_texOcclusionDepth, 0);
-IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1);
+IMAGE2D_WO(s_texOcclusionDepthOut, r32f, 1);
 
 uniform vec4 u_inputRTSize;
 

+ 1 - 1
examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh"
 
 IMAGE2D_RO(s_texOcclusionDepthIn, r32f, 0);
-IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1);
+IMAGE2D_WO(s_texOcclusionDepthOut, r32f, 1);
 
 uniform vec4 u_inputRTSize;
 

+ 1 - 1
examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc

@@ -9,7 +9,7 @@ SAMPLER2D(s_texOcclusionDepth, 0);
 
 BUFFER_RO(instanceDataIn, vec4, 1);
 BUFFER_RW(drawcallInstanceCount, uint, 2);
-BUFFER_WR(instancePredicates, bool, 3);
+BUFFER_WO(instancePredicates, bool, 3);
 
 uniform vec4 u_inputRTSize;
 uniform vec4 u_cullingConfig;

+ 1 - 1
examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc

@@ -17,7 +17,7 @@ BUFFER_RW(drawcallInstanceCount, uint, 3);
 //drawcall data that will drive drawIndirect
 BUFFER_RW(drawcallData, uvec4, 4);
 //culled instance data
-BUFFER_WR(instanceDataOut, vec4, 5);
+BUFFER_WO(instanceDataOut, vec4, 5);
 
 uniform vec4 u_cullingConfig;
 

+ 1 - 1
examples/39-assao/cs_assao_apply.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_WR(s_target, r8, 0);
+IMAGE2D_WO(s_target, r8, 0);
 SAMPLER2DARRAY(s_finalSSAO,  1); 
 
 // unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!

+ 1 - 1
examples/39-assao/cs_assao_generate_importance_map.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_WR(s_target, r8, 0);
+IMAGE2D_WO(s_target, r8, 0);
 SAMPLER2DARRAY(s_finalSSAO,  1); 
 
 NUM_THREADS(8, 8, 1)

+ 1 - 1
examples/39-assao/cs_assao_generate_q.sh

@@ -62,7 +62,7 @@ IMAGE2D_RO(s_normalmapSource, rgba8, 2);
 BUFFER_RO(s_loadCounter, uint, 3); 
 SAMPLER2D(s_importanceMap,  4); 
 IMAGE2D_ARRAY_RO(s_baseSSAO, rg8, 5);
-IMAGE2D_ARRAY_WR(s_target, rg8, 6);
+IMAGE2D_ARRAY_WO(s_target, rg8, 6);
 
 // packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!
 float PackEdges( vec4 edgesLRTB )

+ 1 - 1
examples/39-assao/cs_assao_load_counter_clear.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-BUFFER_WR(s_loadCounter, uint, 0);
+BUFFER_WO(s_loadCounter, uint, 0);
 
 NUM_THREADS(1, 1, 1)
 void main() 

+ 1 - 1
examples/39-assao/cs_assao_non_smart_apply.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_WR(s_target, r8, 0);
+IMAGE2D_WO(s_target, r8, 0);
 SAMPLER2DARRAY(s_finalSSAO,	1);
 
 // edge-ignorant blur & apply (for the lowest quality level 0)

+ 1 - 1
examples/39-assao/cs_assao_non_smart_blur.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_ARRAY_WR(s_target, rg8, 0);
+IMAGE2D_ARRAY_WO(s_target, rg8, 0);
 SAMPLER2DARRAY(s_blurInput,  1); 
 
 // edge-ignorant blur in x and y directions, 9 pixels touched (for the lowest quality level 0)

+ 1 - 1
examples/39-assao/cs_assao_non_smart_half_apply.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_WR(s_target, r8, 0);
+IMAGE2D_WO(s_target, r8, 0);
 SAMPLER2DARRAY(s_finalSSAO, 1);
 
 // edge-ignorant blur & apply, skipping half pixels in checkerboard pattern (for the Lowest quality level 0 and Settings::SkipHalfPixelsOnLowQualityLevel == true )

+ 1 - 1
examples/39-assao/cs_assao_postprocess_importance_map_a.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_WR(s_target, r8, 0);
+IMAGE2D_WO(s_target, r8, 0);
 SAMPLER2D(s_importanceMap, 1);
 
 // Shaders below only needed for adaptive quality level

+ 1 - 1
examples/39-assao/cs_assao_postprocess_importance_map_b.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_WR(s_target, r8, 0);
+IMAGE2D_WO(s_target, r8, 0);
 SAMPLER2D(s_importanceMap, 1);
 BUFFER_RW(s_loadCounter, uint, 2);
 

+ 4 - 4
examples/39-assao/cs_assao_prepare_depth_mip.sc

@@ -11,10 +11,10 @@ IMAGE2D_RO(s_viewspaceDepthSource1, r16f, 1);
 IMAGE2D_RO(s_viewspaceDepthSource2, r16f, 2);
 IMAGE2D_RO(s_viewspaceDepthSource3, r16f, 3);
 
-IMAGE2D_WR(s_target0, r16f, 4);
-IMAGE2D_WR(s_target1, r16f, 5);
-IMAGE2D_WR(s_target2, r16f, 6);
-IMAGE2D_WR(s_target3, r16f, 7);
+IMAGE2D_WO(s_target0, r16f, 4);
+IMAGE2D_WO(s_target1, r16f, 5);
+IMAGE2D_WO(s_target2, r16f, 6);
+IMAGE2D_WO(s_target3, r16f, 7);
 
 // calculate effect radius and fit our screen sampling pattern inside it
 void CalculateRadiusParameters( const float pixCenterLength, const vec2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq )

+ 4 - 4
examples/39-assao/cs_assao_prepare_depths.sc

@@ -8,10 +8,10 @@
 
 SAMPLER2D(s_depthSource, 0);
 
-IMAGE2D_WR(s_target0, r16f, 1); 
-IMAGE2D_WR(s_target1, r16f, 2);
-IMAGE2D_WR(s_target2, r16f, 3);
-IMAGE2D_WR(s_target3, r16f, 4);
+IMAGE2D_WO(s_target0, r16f, 1); 
+IMAGE2D_WO(s_target1, r16f, 2);
+IMAGE2D_WO(s_target2, r16f, 3);
+IMAGE2D_WO(s_target3, r16f, 4);
 
 float ScreenSpaceToViewSpaceDepth( float screenDepth )
 {

+ 5 - 5
examples/39-assao/cs_assao_prepare_depths_and_normals.sc

@@ -8,11 +8,11 @@
 
 SAMPLER2D(s_depthSource, 0);
 
-IMAGE2D_WR(s_target0, r16f, 1);
-IMAGE2D_WR(s_target1, r16f, 2);
-IMAGE2D_WR(s_target2, r16f, 3);
-IMAGE2D_WR(s_target3, r16f, 4);
-IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5);
+IMAGE2D_WO(s_target0, r16f, 1);
+IMAGE2D_WO(s_target1, r16f, 2);
+IMAGE2D_WO(s_target2, r16f, 3);
+IMAGE2D_WO(s_target3, r16f, 4);
+IMAGE2D_WO(s_normalsOutputUAV, rgba8, 5);
 
 float ScreenSpaceToViewSpaceDepth( float screenDepth )
 {

+ 3 - 3
examples/39-assao/cs_assao_prepare_depths_and_normals_half.sc

@@ -8,9 +8,9 @@
 
 SAMPLER2D(s_depthSource, 0);
 
-IMAGE2D_WR(s_target0, r16f, 1);
-IMAGE2D_WR(s_target1, r16f, 2);
-IMAGE2D_WR(s_normalsOutputUAV, rgba8, 5);
+IMAGE2D_WO(s_target0, r16f, 1);
+IMAGE2D_WO(s_target1, r16f, 2);
+IMAGE2D_WO(s_normalsOutputUAV, rgba8, 5);
 
 float ScreenSpaceToViewSpaceDepth( float screenDepth )
 {

+ 2 - 2
examples/39-assao/cs_assao_prepare_depths_half.sc

@@ -7,8 +7,8 @@
 #include "uniforms.sh"
 
 SAMPLER2D(s_depthSource, 0);
-IMAGE2D_WR(s_target0, r16f, 1);
-IMAGE2D_WR(s_target1, r16f, 2);
+IMAGE2D_WO(s_target0, r16f, 1);
+IMAGE2D_WO(s_target1, r16f, 2);
 
 float ScreenSpaceToViewSpaceDepth( float screenDepth )
 {

+ 1 - 1
examples/39-assao/cs_assao_smart_blur.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_ARRAY_WR(s_target, rg8, 0);
+IMAGE2D_ARRAY_WO(s_target, rg8, 0);
 SAMPLER2DARRAY(s_blurInput, 1);
 
 // unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!

+ 1 - 1
examples/39-assao/cs_assao_smart_blur_wide.sc

@@ -6,7 +6,7 @@
 #include "bgfx_compute.sh" 
 #include "uniforms.sh"
 
-IMAGE2D_ARRAY_WR(s_target, rg8, 0);
+IMAGE2D_ARRAY_WO(s_target, rg8, 0);
 SAMPLER2DARRAY(s_blurInput, 1);
 
 // unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions!

+ 2 - 2
examples/41-tess/cs_terrain_init.sc

@@ -2,11 +2,11 @@
 
 #include "uniforms.sh"
 
-BUFFER_WR(u_SubdBufferOut, uint, 1);
+BUFFER_WO(u_SubdBufferOut, uint, 1);
 BUFFER_RW(u_CulledSubdBuffer, uint, 2);
 BUFFER_RW(indirectBuffer, uvec4, 3);
 BUFFER_RW(atomicCounterBuffer, uint, 4);
-BUFFER_WR(u_SubdBufferIn, uint, 8);
+BUFFER_WO(u_SubdBufferIn, uint, 8);
 
 NUM_THREADS(1u, 1u, 1u)
 void main()

+ 71 - 69
examples/46-fsr/cs_fsr.h

@@ -24,36 +24,36 @@ uniform vec4 u_params[3];
 #endif // BGFX_SHADER_LANGUAGE_GLSL
 
 #if SAMPLE_SLOW_FALLBACK
-    #include "ffx_a.h"
-    SAMPLER2D(InputTexture, 0);
-    IMAGE2D_WR(OutputTexture, rgba32f, 1);
-    #if SAMPLE_EASU
-        #define FSR_EASU_F 1
-        AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
-        AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
-        AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
-    #endif
-    #if SAMPLE_RCAS
-        #define FSR_RCAS_F
-        AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
-        void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
-    #endif
+#	include "ffx_a.h"
+	SAMPLER2D(InputTexture, 0);
+	IMAGE2D_WO(OutputTexture, rgba32f, 1);
+#	if SAMPLE_EASU
+		#define FSR_EASU_F 1
+		AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
+		AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
+		AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
+#	endif
+#	if SAMPLE_RCAS
+		#define FSR_RCAS_F
+		AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
+		void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
+#	endif
 #else
-    #define A_HALF
-    #include "ffx_a.h"
-    SAMPLER2D(InputTexture, 0);
-    IMAGE2D_WR(OutputTexture, rgba16f, 1);
-    #if SAMPLE_EASU
-        #define FSR_EASU_H 1
-        AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
-        AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
-        AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
-    #endif
-    #if SAMPLE_RCAS
-        #define FSR_RCAS_H
-        AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
-        void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
-    #endif
+#	define A_HALF
+#	include "ffx_a.h"
+	SAMPLER2D(InputTexture, 0);
+	IMAGE2D_WO(OutputTexture, rgba16f, 1);
+#	if SAMPLE_EASU
+		#define FSR_EASU_H 1
+		AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
+		AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
+		AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
+#	endif
+#	if SAMPLE_RCAS
+		#define FSR_RCAS_H
+		AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
+		void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
+#	endif
 #endif
 
 #include "ffx_fsr1.h"
@@ -61,38 +61,40 @@ uniform vec4 u_params[3];
 void CurrFilter(AU2 pos, AU4 Const0, AU4 Const1, AU4 Const2, AU4 Const3, AU4 Sample)
 {
 #if SAMPLE_BILINEAR
-    AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
-    imageStore(OutputTexture, ASU2(pos), texture2DLod(InputTexture, pp, 0.0));
+	AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
+	imageStore(OutputTexture, ASU2(pos), texture2DLod(InputTexture, pp, 0.0));
 #endif
+
 #if SAMPLE_EASU
-    #if SAMPLE_SLOW_FALLBACK
-        AF3 c;
-        FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
-        if( Sample.x == 1 )
-            c *= c;
-        imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
-    #else
-        AH3 c;
-        FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
-        if( Sample.x == 1 )
-            c *= c;
-        imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
-    #endif
+#	if SAMPLE_SLOW_FALLBACK
+		AF3 c;
+		FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
+		if( Sample.x == 1 )
+			c *= c;
+		imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+#	else
+		AH3 c;
+		FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
+		if( Sample.x == 1 )
+			c *= c;
+		imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+#	endif
 #endif
+
 #if SAMPLE_RCAS
-    #if SAMPLE_SLOW_FALLBACK
-        AF3 c;
-        FsrRcasF(c.r, c.g, c.b, pos, Const0);
-        if( Sample.x == 1 )
-            c *= c;
-        imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
-    #else
-        AH3 c;
-        FsrRcasH(c.r, c.g, c.b, pos, Const0);
-        if( Sample.x == 1 )
-            c *= c;
-        imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
-    #endif
+#	if SAMPLE_SLOW_FALLBACK
+		AF3 c;
+		FsrRcasF(c.r, c.g, c.b, pos, Const0);
+		if( Sample.x == 1 )
+			c *= c;
+		imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+#	else
+		AH3 c;
+		FsrRcasH(c.r, c.g, c.b, pos, Const0);
+		if( Sample.x == 1 )
+			c *= c;
+		imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+#	endif
 #endif
 }
 
@@ -108,20 +110,20 @@ void main()
 		SrcSize.x, SrcSize.y,  // The size of the input image.
 		DstSize.x, DstSize.y); // The output resolution.
 	Sample.x = 0; // no HDR output
-#endif
+#endif // SAMPLE_EASU || SAMPLE_BILINEAR
+
 #if SAMPLE_RCAS
 	FsrRcasCon(Const0, ViewportSizeRcasAttenuation.z);
 	Sample.x = 0;  // no HDR output
-#endif
+#endif // SAMPLE_RCAS
 
-    // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
-    AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
-    CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
-    gxy.x += 8u;
-    CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
-    gxy.y += 8u;
-    CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
-    gxy.x -= 8u;
-    CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
+	// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+	AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
+	CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
+	gxy.x += 8u;
+	CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
+	gxy.y += 8u;
+	CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
+	gxy.x -= 8u;
+	CurrFilter(gxy, Const0, Const1, Const2, Const3, Sample);
 }
-

+ 3 - 3
examples/48-drawindirect/cs_drawindirect.sc

@@ -9,10 +9,10 @@
 BUFFER_RO(instanceDataIn, vec4, 0);
 
 // Output
-BUFFER_WR(indirectBuffer, uvec4, 1);
-BUFFER_WR(instanceBufferOut, vec4, 2);
+BUFFER_WO(indirectBuffer, uvec4, 1);
+BUFFER_WO(instanceBufferOut, vec4, 2);
 #ifdef INDIRECT_COUNT
-BUFFER_WR(indirectCountBuffer, int, 3);
+BUFFER_WO(indirectCountBuffer, int, 3);
 #endif
 
 uniform vec4 u_drawParams;

+ 15 - 15
src/bgfx_compute.sh

@@ -32,22 +32,22 @@
 #define readwrite
 #define IMAGE2D_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D,  readonly)
 #define UIMAGE2D_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, readonly)
-#define IMAGE2D_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D,  writeonly)
-#define UIMAGE2D_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, writeonly)
+#define IMAGE2D_WO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D,  writeonly)
+#define UIMAGE2D_WO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, writeonly)
 #define IMAGE2D_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2D,  readwrite)
 #define UIMAGE2D_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2D, readwrite)
 
 #define IMAGE2D_ARRAY_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray,  readonly)
 #define UIMAGE2D_ARRAY_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, readonly)
-#define IMAGE2D_ARRAY_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray,  writeonly)
-#define UIMAGE2D_ARRAY_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, writeonly)
+#define IMAGE2D_ARRAY_WO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray,  writeonly)
+#define UIMAGE2D_ARRAY_WO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, writeonly)
 #define IMAGE2D_ARRAY_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image2DArray,  readwrite)
 #define UIMAGE2D_ARRAY_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage2DArray, readwrite)
 
 #define IMAGE3D_RO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D,  readonly)
 #define UIMAGE3D_RO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, readonly)
-#define IMAGE3D_WR( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D,  writeonly)
-#define UIMAGE3D_WR(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, writeonly)
+#define IMAGE3D_WO( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D,  writeonly)
+#define UIMAGE3D_WO(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, writeonly)
 #define IMAGE3D_RW( _name, _format, _reg) __IMAGE_XX(_name, _format, _reg, image3D,  readwrite)
 #define UIMAGE3D_RW(_name, _format, _reg) __IMAGE_XX(_name, _format, _reg, uimage3D, readwrite)
 
@@ -59,7 +59,7 @@
 
 #define BUFFER_RO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readonly)
 #define BUFFER_RW(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readwrite)
-#define BUFFER_WR(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly)
+#define BUFFER_WO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly)
 
 #define NUM_THREADS(_x, _y, _z) layout (local_size_x = _x, local_size_y = _y, local_size_z = _z) in;
 
@@ -99,10 +99,10 @@
 
 #define UIMAGE2D_RO(_name, _format, _reg) IMAGE2D_RO(_name, _format, _reg)
 
-#define IMAGE2D_WR( _name, _format, _reg)                                                 \
+#define IMAGE2D_WO( _name, _format, _reg)                                                 \
 	WRITEONLY FORMAT(_format) RWTexture2D<COMP_ ## _format> _name : REGISTER(u, _reg);  \
 	
-#define UIMAGE2D_WR(_name, _format, _reg) IMAGE2D_WR(_name, _format, _reg)
+#define UIMAGE2D_WO(_name, _format, _reg) IMAGE2D_WO(_name, _format, _reg)
 
 #define IMAGE2D_RW( _name, _format, _reg)                            \
 	FORMAT(_format) RWTexture2D<COMP_ ## _format> _name : REGISTER(u, _reg);  \
@@ -114,10 +114,10 @@
 
 #define UIMAGE2D_ARRAY_RO(_name, _format, _reg) IMAGE2D_ARRAY_RO(_name, _format, _reg)
 
-#define IMAGE2D_ARRAY_WR( _name, _format, _reg)                                       \
+#define IMAGE2D_ARRAY_WO( _name, _format, _reg)                                       \
 	WRITEONLY FORMAT(_format) RWTexture2DArray<COMP_ ## _format> _name : REGISTER(u, _reg);    \
 
-#define UIMAGE2D_ARRAY_WR(_name, _format, _reg) IMAGE2D_ARRAY_WR(_name, _format, _reg)
+#define UIMAGE2D_ARRAY_WO(_name, _format, _reg) IMAGE2D_ARRAY_WO(_name, _format, _reg)
 
 #define IMAGE2D_ARRAY_RW(_name, _format, _reg)                              \
 	FORMAT(_format) RWTexture2DArray<COMP_ ## _format> _name : REGISTER(u, _reg);    \
@@ -129,10 +129,10 @@
 
 #define UIMAGE3D_RO(_name, _format, _reg) IMAGE3D_RO(_name, _format, _reg)
 
-#define IMAGE3D_WR( _name, _format, _reg)                                      \
+#define IMAGE3D_WO( _name, _format, _reg)                                      \
 	WRITEONLY FORMAT(_format) RWTexture3D<COMP_ ## _format> _name : REGISTER(u, _reg);
 
-#define UIMAGE3D_WR(_name, _format, _reg) IMAGE3D_RW(_name, _format, _reg)
+#define UIMAGE3D_WO(_name, _format, _reg) IMAGE3D_RW(_name, _format, _reg)
 
 #define IMAGE3D_RW( _name, _format, _reg)                            \
 	FORMAT(_format) RWTexture3D<COMP_ ## _format> _name : REGISTER(u, _reg);  \
@@ -142,11 +142,11 @@
 #if BGFX_SHADER_LANGUAGE_METAL || BGFX_SHADER_LANGUAGE_SPIRV
 #define BUFFER_RO(_name, _struct, _reg) StructuredBuffer<_struct>   _name : REGISTER(t, _reg)
 #define BUFFER_RW(_name, _struct, _reg) RWStructuredBuffer <_struct> _name : REGISTER(u, _reg)
-#define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
+#define BUFFER_WO(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
 #else
 #define BUFFER_RO(_name, _struct, _reg) Buffer<_struct>   _name : REGISTER(t, _reg)
 #define BUFFER_RW(_name, _struct, _reg) RWBuffer<_struct> _name : REGISTER(u, _reg)
-#define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
+#define BUFFER_WO(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg)
 #endif
 
 #define NUM_THREADS(_x, _y, _z) [numthreads(_x, _y, _z)]