Browse Source

Add a workgraphs test

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
aaf5345684

+ 1 - 1
AnKi/Gr/D3D/D3DDescriptor.cpp

@@ -576,7 +576,7 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 													  D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
 				}
 				else if(inDescriptor.m_type == DescriptorType::kStorageBuffer && !!(inDescriptor.m_flags & DescriptorFlag::kWrite)
-						&& !!(inDescriptor.m_flags | DescriptorFlag::kByteAddressBuffer))
+						&& !!(inDescriptor.m_flags & DescriptorFlag::kByteAddressBuffer))
 				{
 					// RWByteAddressBuffer
 

+ 2 - 0
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -287,6 +287,8 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 
 			debugInterface1->SetEnableGPUBasedValidation(true);
 		}
+
+		ANKI_D3D_LOGI("Validation is enabled (GPU validation %s)", (g_gpuValidationCVar.get()) ? "as well" : "no");
 	}
 
 	ComPtr<IDXGIFactory2> factory2;

+ 3 - 8
AnKi/ShaderCompiler/ShaderParser.cpp

@@ -15,14 +15,9 @@ namespace anki {
 	ANKI_SHADER_COMPILER_LOGE("%s: " msg_ ": %s", fname.cstr(), line.cstr()); \
 	return Error::kUserData
 
-inline constexpr Array<CString, U32(ShaderType::kCount)> kShaderStageNames = {{"VERTEX", "TESSELLATION_CONTROL", "TESSELLATION_EVALUATION",
-																			   "GEOMETRY", "TASK", "MESH", "FRAGMENT", "COMPUTE", "RAY_GEN",
-																			   "ANY_HIT", "CLOSEST_HIT", "MISS", "INTERSECTION", "CALLABLE"}};
-
-inline constexpr char kShaderHeader[] = R"(#define ANKI_%s_SHADER 1
-#define kMaxBindlessTextures %uu
-#define kMaxBindlessReadonlyTextureBuffers %uu
-)";
+inline constexpr Array<CString, U32(ShaderType::kCount)> kShaderStageNames = {
+	{"VERTEX", "TESSELLATION_CONTROL", "TESSELLATION_EVALUATION", "GEOMETRY", "TASK", "MESH", "FRAGMENT", "COMPUTE", "RAY_GEN", "ANY_HIT",
+	 "CLOSEST_HIT", "MISS", "INTERSECTION", "CALLABLE", "WORK_GRAPH"}};
 
 static ShaderType strToShaderType(CString str)
 {

+ 1 - 1
AnKi/Shaders/Functions.hlsl

@@ -675,7 +675,7 @@ vector<T, 3> filmGrain(vector<T, 3> color, Vec2 uv, T strength, F32 time)
 	return color * grain;
 }
 
-#if ANKI_COMPUTE_SHADER
+#if ANKI_COMPUTE_SHADER || ANKI_WORK_GRAPH_SHADER
 /// HLSL doesn't have SubgroupID so compute it. It's a macro because we can't have functions that InterlockedAdd on local variables (the compiler
 /// can't see it's groupshared).
 /// @param svGroupIndex Self explanatory.

+ 12 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -292,3 +292,15 @@ template<typename T>
 void DispatchMesh(U32 groupSizeX, U32 groupSizeY, U32 groupSizeZ, T payload);
 
 void SetMeshOutputCounts(U32 vertexCount, U32 primitiveCount);
+
+// Workgraphs
+
+template<typename T>
+struct DispatchNodeInputRecord
+{
+	T Get() const
+	{
+		T t;
+		return t;
+	}
+};

+ 41 - 15
Tests/Gr/GrCommon.h

@@ -11,7 +11,7 @@
 
 namespace anki {
 
-inline ShaderPtr createShader(CString src, ShaderType type)
+inline ShaderPtr createShader(CString src, ShaderType type, ConstWeakArray<CString> extraCompilerArgs = {})
 {
 	ShaderCompilerString header;
 	ShaderParser::generateAnkiShaderHeader(type, header);
@@ -20,9 +20,9 @@ inline ShaderPtr createShader(CString src, ShaderType type)
 	ShaderCompilerString errorLog;
 
 #if ANKI_GR_BACKEND_VULKAN
-	Error err = compileHlslToSpirv(header, type, false, true, {}, bin, errorLog);
+	Error err = compileHlslToSpirv(header, type, false, true, extraCompilerArgs, bin, errorLog);
 #else
-	Error err = compileHlslToDxil(header, type, false, true, {}, bin, errorLog);
+	Error err = compileHlslToDxil(header, type, false, true, extraCompilerArgs, bin, errorLog);
 #endif
 	if(err)
 	{
@@ -48,10 +48,10 @@ inline ShaderPtr createShader(CString src, ShaderType type)
 	return GrManager::getSingleton().newShader(initInf);
 }
 
-inline ShaderProgramPtr createVertFragProg(CString vert, CString frag)
+inline ShaderProgramPtr createVertFragProg(CString vert, CString frag, ConstWeakArray<CString> extraCompilerArgs = {})
 {
-	ShaderPtr vertS = createShader(vert, ShaderType::kVertex);
-	ShaderPtr fragS = createShader(frag, ShaderType::kFragment);
+	ShaderPtr vertS = createShader(vert, ShaderType::kVertex, extraCompilerArgs);
+	ShaderPtr fragS = createShader(frag, ShaderType::kFragment, extraCompilerArgs);
 
 	ShaderProgramInitInfo init;
 	init.m_graphicsShaders[ShaderType::kVertex] = vertS.get();
@@ -62,6 +62,15 @@ inline ShaderProgramPtr createVertFragProg(CString vert, CString frag)
 	return prog;
 }
 
+inline ShaderPtr loadShader(CString filename, ShaderType type, ConstWeakArray<CString> extraCompilerArgs = {})
+{
+	File file;
+	ANKI_TEST_EXPECT_NO_ERR(file.open(filename, FileOpenFlag::kRead));
+	String src;
+	ANKI_TEST_EXPECT_NO_ERR(file.readAllText(src));
+	return createShader(src, type, extraCompilerArgs);
+}
+
 const U kWidth = 1024;
 const U kHeight = 768;
 
@@ -75,8 +84,8 @@ inline void commonInit(Bool validation = true)
 	g_debugMarkersCVar.set(true);
 	if(validation)
 	{
-		g_validationCVar.set(true);
-		[[maybe_unused]] const Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"GpuValidation", "1"});
+		[[maybe_unused]] Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"GpuValidation", "1"});
+		err = CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"DebugMarkers", "1"});
 	}
 
 	initWindow();
@@ -133,21 +142,22 @@ inline BufferPtr createBuffer(BufferUsageBit usage, T pattern, U32 count, CStrin
 }
 
 template<typename T>
-inline TexturePtr createTexture2d(const TextureInitInfo& texInit, T initialValue)
+inline TexturePtr createTexture2d(const TextureInitInfo texInit_, ConstWeakArray<T> data)
 {
+	TextureInitInfo texInit = texInit_;
+	texInit.m_usage |= TextureUsageBit::kTransferDestination;
+
 	BufferInitInfo buffInit;
 	buffInit.m_mapAccess = BufferMapAccessBit::kWrite;
 	buffInit.m_size = texInit.m_height * texInit.m_width * getFormatInfo(texInit.m_format).m_texelSize;
+	ANKI_ASSERT(getFormatInfo(texInit.m_format).m_texelSize == sizeof(T));
+	ANKI_ASSERT(buffInit.m_size == data.getSizeInBytes());
 	buffInit.m_usage = BufferUsageBit::kTransferSource;
 
 	BufferPtr staging = GrManager::getSingleton().newBuffer(buffInit);
 
-	T* inData = static_cast<T*>(staging->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
-	const T* endData = inData + (buffInit.m_size / sizeof(T));
-	for(; inData < endData; ++inData)
-	{
-		*inData = initialValue;
-	}
+	void* inData = staging->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite);
+	memcpy(inData, data.getBegin(), buffInit.m_size);
 	staging->unmap();
 
 	TexturePtr tex = GrManager::getSingleton().newTexture(texInit);
@@ -167,6 +177,15 @@ inline TexturePtr createTexture2d(const TextureInitInfo& texInit, T initialValue
 	return tex;
 };
 
+template<typename T>
+inline TexturePtr createTexture2d(const TextureInitInfo& texInit, T initialValue)
+{
+	DynamicArray<T> arr;
+	arr.resize(texInit.m_height * texInit.m_width, initialValue);
+
+	return createTexture2d(texInit, ConstWeakArray<T>(arr));
+};
+
 template<typename T>
 inline void readBuffer(BufferPtr buff, DynamicArray<T>& out)
 {
@@ -216,4 +235,11 @@ inline void validateBuffer(BufferPtr buff, ConstWeakArray<T> values)
 	}
 }
 
+template<typename T>
+inline void validateBuffer2(BufferPtr buff, T value)
+{
+	const Array<T, 1> arr = {value};
+	validateBuffer(buff, ConstWeakArray(arr));
+}
+
 } // end namespace anki

+ 150 - 0
Tests/Gr/GrWorkGraphs.cpp

@@ -472,3 +472,153 @@ void main(uint svDispatchThreadId : SV_DispatchThreadId, uint svGroupIndex : SV_
 
 	commonDestroy();
 }
+
+ANKI_TEST(Gr, WorkGraphsWorkDrain)
+{
+	const Bool bBenchmark = getenv("BENCHMARK") && CString(getenv("BENCHMARK")) == "1";
+
+	[[maybe_unused]] Error err = CVarSet::getSingleton().setMultiple(Array<const Char*, 2>{"WorkGraphs", "1"});
+
+	commonInit(!bBenchmark);
+
+	const Bool bWorkgraphs =
+		getenv("WORKGRAPHS") && CString(getenv("WORKGRAPHS")) == "1" && GrManager::getSingleton().getDeviceCapabilities().m_workGraphs;
+
+	ANKI_TEST_LOGI("Testing with BENCHMARK=%u WORKGRAPHS=%u", bBenchmark, bWorkgraphs);
+
+#define TEX_SIZE_X 4096u
+#define TEX_SIZE_Y 4096u
+#define TILE_SIZE_X 32u
+#define TILE_SIZE_Y 32u
+#define TILE_COUNT_X (TEX_SIZE_X / TILE_SIZE_X)
+#define TILE_COUNT_Y (TEX_SIZE_Y / TILE_SIZE_Y)
+#define TILE_COUNT (TILE_COUNT_X * TILE_COUNT_Y)
+
+	{
+		// Create WG prog
+		ShaderProgramPtr wgProg;
+		if(bWorkgraphs)
+		{
+			ShaderPtr wgShader = loadShader(ANKI_SOURCE_DIRECTORY "/Tests/Gr/WorkDrainWg.hlsl", ShaderType::kWorkGraph);
+
+			ShaderProgramInitInfo progInit;
+			progInit.m_workGraph.m_shader = wgShader.get();
+			Array<WorkGraphNodeSpecialization, 1> specializations = {{{"main", UVec3(TILE_COUNT_X, TILE_COUNT_Y, 1)}}};
+			progInit.m_workGraph.m_nodeSpecializations = specializations;
+			wgProg = GrManager::getSingleton().newShaderProgram(progInit);
+		}
+
+		// Scratch buff
+		BufferPtr scratchBuff;
+		if(bWorkgraphs)
+		{
+			BufferInitInfo scratchInit("scratch");
+			scratchInit.m_size = wgProg->getWorkGraphMemoryRequirements();
+			scratchInit.m_usage = BufferUsageBit::kAllStorage;
+			scratchBuff = GrManager::getSingleton().newBuffer(scratchInit);
+		}
+
+		// Create compute progs
+		ShaderProgramPtr compProg0, compProg1;
+		{
+			ShaderPtr shader =
+				loadShader(ANKI_SOURCE_DIRECTORY "/Tests/Gr/WorkDrainCompute.hlsl", ShaderType::kCompute, Array<CString, 1>{"-DFIRST"});
+			ShaderProgramInitInfo progInit;
+			progInit.m_computeShader = shader.get();
+			compProg0 = GrManager::getSingleton().newShaderProgram(progInit);
+
+			shader = loadShader(ANKI_SOURCE_DIRECTORY "/Tests/Gr/WorkDrainCompute.hlsl", ShaderType::kCompute);
+			progInit.m_computeShader = shader.get();
+			compProg1 = GrManager::getSingleton().newShaderProgram(progInit);
+		}
+
+		// Create texture 2D
+		TexturePtr tex;
+		{
+			DynamicArray<Vec4> data;
+			data.resize(TEX_SIZE_X * TEX_SIZE_Y, Vec4(1.0f));
+			data[10] = Vec4(1.1f, 2.06f, 3.88f, 0.5f);
+
+			TextureInitInfo texInit("Tex");
+			texInit.m_width = TEX_SIZE_X;
+			texInit.m_height = TEX_SIZE_Y;
+			texInit.m_format = Format::kR32G32B32A32_Sfloat;
+			texInit.m_usage = TextureUsageBit::kAllUav | TextureUsageBit::kAllSrv;
+
+			tex = createTexture2d(texInit, ConstWeakArray(data));
+		}
+
+		// Create counter buff
+		BufferPtr threadgroupCountBuff;
+		{
+			threadgroupCountBuff = createBuffer(BufferUsageBit::kStorageComputeWrite, U32(0u), 1);
+		}
+
+		// Result buffers
+		BufferPtr tileMax = createBuffer(BufferUsageBit::kAllStorage, Vec4(0.1f), TILE_COUNT);
+		BufferPtr finalMax = createBuffer(BufferUsageBit::kAllStorage, Vec4(0.1f), 1);
+
+		const U32 iterationCount = (!bBenchmark) ? 1 : 10000u;
+		Second avgTimeMs = 0.0;
+		for(U32 i = 0; i < iterationCount; ++i)
+		{
+			CommandBufferPtr cmdb =
+				GrManager::getSingleton().newCommandBuffer(CommandBufferInitInfo(CommandBufferFlag::kSmallBatch | CommandBufferFlag::kGeneralWork));
+
+			BufferBarrierInfo barr = {BufferView(tileMax.get()), BufferUsageBit::kStorageComputeWrite, BufferUsageBit::kStorageComputeWrite};
+			cmdb->setPipelineBarrier({}, {&barr, 1}, {});
+
+			cmdb->bindTexture(ANKI_REG(t0), TextureView(tex.get(), TextureSubresourceDesc::all()));
+			cmdb->bindStorageBuffer(ANKI_REG(u0), BufferView(tileMax.get()));
+			cmdb->bindStorageBuffer(ANKI_REG(u1), BufferView(finalMax.get()));
+			cmdb->bindStorageBuffer(ANKI_REG(u2), BufferView(threadgroupCountBuff.get()));
+
+			if(bWorkgraphs)
+			{
+				cmdb->bindShaderProgram(wgProg.get());
+
+				struct FirstNodeRecord
+				{
+					UVec3 m_gridSize;
+				};
+
+				Array<FirstNodeRecord, 1> records;
+				records[0].m_gridSize = UVec3(TILE_COUNT_X, TILE_COUNT_Y, 1);
+
+				cmdb->dispatchGraph(BufferView(scratchBuff.get()), records.getBegin(), records.getSize(), sizeof(records[0]));
+			}
+			else
+			{
+				cmdb->bindShaderProgram(compProg0.get());
+				cmdb->dispatchCompute(TILE_COUNT_X, TILE_COUNT_Y, 1);
+
+				barr = {BufferView(tileMax.get()), BufferUsageBit::kStorageComputeWrite, BufferUsageBit::kStorageComputeRead};
+				cmdb->setPipelineBarrier({}, {&barr, 1}, {});
+
+				cmdb->bindShaderProgram(compProg1.get());
+				cmdb->dispatchCompute(1, 1, 1);
+			}
+
+			cmdb->endRecording();
+
+			const U64 start = HighRezTimer::getCurrentTimeUs();
+
+			FencePtr fence;
+			GrManager::getSingleton().submit(cmdb.get(), {}, &fence);
+			fence->clientWait(kMaxSecond);
+
+			const U64 end = HighRezTimer::getCurrentTimeUs();
+
+			avgTimeMs += (Second(end - start) * 0.001) / Second(iterationCount);
+		}
+
+		validateBuffer2(finalMax, Vec4(1.1f, 2.06f, 3.88f, 1.0f));
+
+		if(bBenchmark)
+		{
+			ANKI_TEST_LOGI("Benchmark: avg time: %fms", avgTimeMs);
+		}
+	}
+
+	commonDestroy();
+}

+ 77 - 0
Tests/Gr/WorkDrainCompute.hlsl

@@ -0,0 +1,77 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#define TEX_SIZE_X 4096u
+#define TEX_SIZE_Y 4096u
+#define TILE_SIZE_X 32u
+#define TILE_SIZE_Y 32u
+#define TILE_COUNT_X (TEX_SIZE_X / TILE_SIZE_X)
+#define TILE_COUNT_Y (TEX_SIZE_Y / TILE_SIZE_Y)
+#define TILE_COUNT (TILE_COUNT_X * TILE_COUNT_Y)
+
+Texture2D g_inputTex : register(t0);
+RWStructuredBuffer<float4> g_tileMaxColors : register(u0);
+RWStructuredBuffer<float4> g_result : register(u1);
+
+#if defined(FIRST)
+
+groupshared float4 g_tileMax[TILE_SIZE_X * TILE_SIZE_Y];
+
+[numthreads(TILE_SIZE_X, TILE_SIZE_Y, 1)] void main(uint2 svDispatchThreadId : SV_DispatchThreadID, uint svGroupIndex : SV_GroupIndex,
+													uint2 svGroupId : SV_GroupID)
+{
+	g_tileMax[svGroupIndex] = g_inputTex[svDispatchThreadId];
+
+	GroupMemoryBarrierWithGroupSync();
+
+	[loop] for(uint s = TILE_SIZE_X * TILE_SIZE_Y / 2u; s > 0u; s >>= 1u)
+	{
+		if(svGroupIndex < s)
+		{
+			g_tileMax[svGroupIndex] = max(g_tileMax[svGroupIndex], g_tileMax[svGroupIndex + s]);
+		}
+
+		GroupMemoryBarrierWithGroupSync();
+	}
+
+	const uint tileIdx = svGroupId.y * TILE_COUNT_X + svGroupId.x;
+	g_tileMaxColors[tileIdx] = g_tileMax[0];
+}
+
+#else
+
+groupshared float4 g_maxColor[64];
+
+[numthreads(64, 1, 1)] void main(uint svGroupIndex : SV_GroupIndex)
+{
+	const uint tilesPerThread = TILE_COUNT / 64;
+
+	const uint start = svGroupIndex * tilesPerThread;
+	const uint end = start + tilesPerThread;
+
+	float4 localMax = 0.0;
+	for(uint tileIdx = start; tileIdx < end; ++tileIdx)
+	{
+		localMax = max(localMax, g_tileMaxColors[tileIdx]);
+	}
+
+	g_maxColor[svGroupIndex] = localMax;
+
+	GroupMemoryBarrierWithGroupSync();
+
+	[loop] for(uint s = 64 / 2u; s > 0u; s >>= 1u)
+	{
+		if(svGroupIndex < s)
+		{
+			g_maxColor[svGroupIndex] = max(g_maxColor[svGroupIndex], g_maxColor[svGroupIndex + s]);
+		}
+
+		GroupMemoryBarrierWithGroupSync();
+	}
+
+	g_result[0] = g_maxColor[0];
+}
+
+#endif

+ 110 - 0
Tests/Gr/WorkDrainWg.hlsl

@@ -0,0 +1,110 @@
+// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#define TEX_SIZE_X 4096u
+#define TEX_SIZE_Y 4096u
+#define TILE_SIZE_X 32u
+#define TILE_SIZE_Y 32u
+#define TILE_COUNT_X (TEX_SIZE_X / TILE_SIZE_X)
+#define TILE_COUNT_Y (TEX_SIZE_Y / TILE_SIZE_Y)
+#define TILE_COUNT (TILE_COUNT_X * TILE_COUNT_Y)
+
+struct FirstNodeInput
+{
+	uint3 m_svDispatchGrid : SV_DispatchGrid;
+};
+
+struct SecondNodeInput
+{
+	uint3 m_svDispatchGrid : SV_DispatchGrid;
+};
+
+Texture2D g_inputTex : register(t0);
+RWStructuredBuffer<float4> g_tileMaxColors : register(u0);
+RWStructuredBuffer<float4> g_result : register(u1);
+RWStructuredBuffer<uint> g_threadgroupCount : register(u2);
+
+groupshared float4 g_tileMax[TILE_SIZE_X * TILE_SIZE_Y];
+
+[Shader("node")][NodeLaunch("broadcasting")][NodeIsProgramEntry][NodeMaxDispatchGrid(1, 1, 1)][numthreads(TILE_SIZE_X, TILE_SIZE_Y, 1)] void
+main(DispatchNodeInputRecord<FirstNodeInput> input, [MaxRecords(1)] NodeOutput<SecondNodeInput> secondNode,
+	 uint2 svDispatchThreadId : SV_DispatchThreadID, uint svGroupIndex : SV_GroupIndex, uint2 svGroupId : SV_GROUPID)
+{
+	g_tileMax[svGroupIndex] = g_inputTex[svDispatchThreadId];
+
+	GroupMemoryBarrierWithGroupSync();
+
+	[loop] for(uint s = TILE_SIZE_X * TILE_SIZE_Y / 2u; s > 0u; s >>= 1u)
+	{
+		if(svGroupIndex < s)
+		{
+			g_tileMax[svGroupIndex] = max(g_tileMax[svGroupIndex], g_tileMax[svGroupIndex + s]);
+		}
+
+		GroupMemoryBarrierWithGroupSync();
+	}
+
+	const uint tileIdx = svGroupId.y * TILE_COUNT_X + svGroupId.x;
+	g_tileMaxColors[tileIdx] = g_tileMax[0];
+
+	GroupMemoryBarrierWithGroupSync();
+
+	// Check if it's the last threadgroup executing
+	bool lastThreadgroup = false;
+	if(svGroupIndex == 0)
+	{
+		uint orig;
+		InterlockedAdd(g_threadgroupCount[0], 1, orig);
+		lastThreadgroup = orig + 1u == TILE_SIZE_X * TILE_SIZE_Y;
+
+		if(lastThreadgroup)
+		{
+			g_threadgroupCount[0] = 0;
+		}
+	}
+
+	// Submit (or not) new work
+	GroupNodeOutputRecords<SecondNodeInput> recs = secondNode.GetGroupNodeOutputRecords(lastThreadgroup ? 1 : 0);
+
+	if(lastThreadgroup)
+	{
+		recs.Get().m_svDispatchGrid = uint3(64, 1, 1);
+	}
+
+	recs.OutputComplete();
+}
+
+groupshared float4 g_maxColor[64];
+
+[Shader("node")][NodeLaunch("broadcasting")][NodeMaxDispatchGrid(1, 1, 1)][numthreads(64, 1, 1)] void
+secondNode(DispatchNodeInputRecord<SecondNodeInput> inp, uint svGroupIndex : SV_GroupIndex)
+{
+	const uint tilesPerThread = TILE_COUNT / 64;
+
+	const uint start = svGroupIndex * tilesPerThread;
+	const uint end = start + tilesPerThread;
+
+	float4 localMax = 0.0;
+	for(uint tileIdx = start; tileIdx < end; ++tileIdx)
+	{
+		localMax = max(localMax, g_tileMaxColors[tileIdx]);
+	}
+
+	g_maxColor[svGroupIndex] = localMax;
+
+	GroupMemoryBarrierWithGroupSync();
+
+	[loop] for(uint s = 64 / 2u; s > 0u; s >>= 1u)
+	{
+		if(svGroupIndex < s)
+		{
+			g_maxColor[svGroupIndex] = max(g_maxColor[svGroupIndex], g_maxColor[svGroupIndex + s]);
+		}
+
+		GroupMemoryBarrierWithGroupSync();
+	}
+
+	g_result[0] = g_maxColor[0];
+}

+ 3 - 3
Tools/FormatSource.py

@@ -15,9 +15,9 @@ import platform
 
 file_extensions = ["h", "hpp", "c", "cpp", "glsl", "hlsl", "ankiprog"]
 directories = ["AnKi", "Tests", "Sandbox", "Tools", "Samples"]
-hlsl_semantics = ["TEXCOORD", "SV_POSITION", "SV_TARGET0", "SV_TARGET1", "SV_TARGET2", "SV_TARGET3", "SV_TARGET4",
-                  "SV_TARGET5", "SV_TARGET6", "SV_TARGET7", "SV_DISPATCHTHREADID", "SV_GROUPINDEX", "SV_GROUPID",
-                  "SV_GROUPTHREADID"]
+hlsl_semantics = ["TEXCOORD", "SV_POSITION", "SV_Position", "SV_TARGET0", "SV_TARGET1", "SV_TARGET2", "SV_TARGET3", "SV_TARGET4",
+                  "SV_TARGET5", "SV_TARGET6", "SV_TARGET7", "SV_DISPATCHTHREADID", "SV_DispatchThreadID", "SV_GROUPINDEX", "SV_GroupIndex",
+                  "SV_GROUPID", "SV_GroupID", "SV_GROUPTHREADID", "SV_GroupThreadID"]
 hlsl_attribs = ["[shader(\"closesthit\")]", "[shader(\"anyhit\")]", "[shader(\"raygeneration\")]", "[shader(\"miss\")]",
                 "[raypayload]", "[outputtopology(\"triangle\")]"]
 hlsl_attribs_fake = ["______shaderclosesthit", "______shaderanyhit", "______shaderraygeneration", "______shadermiss",