Browse Source

D3D: Fix validation errors

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
7087b31d60
58 changed files with 637 additions and 378 deletions
  1. 1 1
      AnKi/Gr/BackendCommon/Format.def.h
  2. 2 2
      AnKi/Gr/BackendCommon/GraphicsStateTracker.h
  3. 1 5
      AnKi/Gr/Common.h
  4. 8 3
      AnKi/Gr/D3D/D3DBuffer.cpp
  5. 61 22
      AnKi/Gr/D3D/D3DCommandBuffer.cpp
  6. 30 0
      AnKi/Gr/D3D/D3DCommon.cpp
  7. 14 2
      AnKi/Gr/D3D/D3DCommon.h
  8. 26 19
      AnKi/Gr/D3D/D3DDescriptor.cpp
  9. 7 7
      AnKi/Gr/D3D/D3DDescriptor.h
  10. 1 0
      AnKi/Gr/D3D/D3DFence.h
  11. 1 1
      AnKi/Gr/D3D/D3DFrameGarbageCollector.cpp
  12. 68 67
      AnKi/Gr/D3D/D3DGrManager.cpp
  13. 4 0
      AnKi/Gr/D3D/D3DGrManager.h
  14. 30 12
      AnKi/Gr/D3D/D3DGraphicsState.cpp
  15. 2 2
      AnKi/Gr/D3D/D3DQueryFactory.cpp
  16. 5 5
      AnKi/Gr/D3D/D3DQueryFactory.h
  17. 1 1
      AnKi/Gr/D3D/D3DSampler.cpp
  18. 2 0
      AnKi/Gr/D3D/D3DShaderProgram.cpp
  19. 156 104
      AnKi/Gr/D3D/D3DTexture.cpp
  20. 20 10
      AnKi/Gr/D3D/D3DTexture.h
  21. 10 1
      AnKi/Gr/RenderGraph.cpp
  22. 2 2
      AnKi/Renderer/Bloom.cpp
  23. 0 2
      AnKi/Renderer/Bloom.h
  24. 14 2
      AnKi/Renderer/ClusterBinning.cpp
  25. 4 4
      AnKi/Renderer/Sky.cpp
  26. 1 1
      AnKi/Scene/Components/GlobalIlluminationProbeComponent.cpp
  27. 3 1
      AnKi/ShaderCompiler/ShaderCompiler.cpp
  28. 5 2
      AnKi/Shaders/Blit.ankiprog
  29. 4 2
      AnKi/Shaders/Bloom.ankiprog
  30. 4 2
      AnKi/Shaders/BloomUpscale.ankiprog
  31. 7 5
      AnKi/Shaders/DepthAwareBlur.ankiprog
  32. 3 3
      AnKi/Shaders/DepthDownscale.ankiprog
  33. 4 2
      AnKi/Shaders/DownscaleBlur.ankiprog
  34. 4 2
      AnKi/Shaders/FinalComposite.ankiprog
  35. 4 3
      AnKi/Shaders/Fsr.ankiprog
  36. 6 3
      AnKi/Shaders/GBufferPost.ankiprog
  37. 5 3
      AnKi/Shaders/LightShading.ankiprog
  38. 4 3
      AnKi/Shaders/LightShadingApplyFog.ankiprog
  39. 5 20
      AnKi/Shaders/LightShadingSkybox.ankiprog
  40. 4 5
      AnKi/Shaders/MipmapGenerator.ankiprog
  41. 4 3
      AnKi/Shaders/MotionVectors.ankiprog
  42. 8 2
      AnKi/Shaders/QuadVert.hlsl
  43. 4 2
      AnKi/Shaders/ShadowmapsResolve.ankiprog
  44. 10 8
      AnKi/Shaders/Ssao.ankiprog
  45. 4 8
      AnKi/Shaders/Ssr.ankiprog
  46. 4 2
      AnKi/Shaders/TemporalAA.ankiprog
  47. 4 2
      AnKi/Shaders/Tonemap.ankiprog
  48. 4 2
      AnKi/Shaders/TraditionalDeferredShading.ankiprog
  49. 4 2
      AnKi/Shaders/TraditionalDeferredShadingSkybox.ankiprog
  50. 4 3
      AnKi/Shaders/VisualizeGBufferNormal.ankiprog
  51. 4 3
      AnKi/Shaders/VisualizeHdrRenderTarget.ankiprog
  52. 4 3
      AnKi/Shaders/VisualizeRenderTarget.ankiprog
  53. 4 3
      AnKi/Shaders/VrsSriVisualizeRenderTarget.ankiprog
  54. 4 1
      AnKi/Util/Assert.cpp
  55. 10 1
      AnKi/Util/Logger.cpp
  56. 18 0
      AnKi/Util/String.h
  57. 6 0
      AnKi/Util/Win32Minimal.h
  58. 4 2
      Tests/Gr/Gr.cpp

+ 1 - 1
AnKi/Gr/BackendCommon/Format.def.h

@@ -80,7 +80,7 @@ ANKI_FORMAT_DEF(A2R10G10B10_Uscaled_Pack32,           60, 1041, 4,  4,  0,  0,
 ANKI_FORMAT_DEF(A2R10G10B10_Sscaled_Pack32,           61, 1042, 4,  4,  0,  0,  0,  2,         None)
 ANKI_FORMAT_DEF(A2R10G10B10_Uint_Pack32,              62, 1043, 4,  4,  0,  0,  0,  1,         None)
 ANKI_FORMAT_DEF(A2R10G10B10_Sint_Pack32,              63, 1044, 4,  4,  0,  0,  0,  2,         None)
-ANKI_FORMAT_DEF(A2B10G10R10_Unorm_Pack32,             64, 1045, 4,  4,  0,  0,  0,  0,         None)
+ANKI_FORMAT_DEF(A2B10G10R10_Unorm_Pack32,             64,   24, 4,  4,  0,  0,  0,  0,         None)
 ANKI_FORMAT_DEF(A2B10G10R10_Snorm_Pack32,             65, 1046, 4,  4,  0,  0,  0,  0,         None)
 ANKI_FORMAT_DEF(A2B10G10R10_Uscaled_Pack32,           66, 1047, 4,  4,  0,  0,  0,  1,         None)
 ANKI_FORMAT_DEF(A2B10G10R10_Sscaled_Pack32,           67, 1048, 4,  4,  0,  0,  0,  2,         None)

+ 2 - 2
AnKi/Gr/BackendCommon/GraphicsStateTracker.h

@@ -322,6 +322,7 @@ public:
 				m_hashes.m_vert = 0;
 			}
 
+#if ANKI_GR_BACKEND_VULKAN
 			if(!!(prog->getShaderTypes() & ShaderTypeBit::kVertex) && refl.m_vertex.m_vertexAttributeMask.getSetBitCount())
 			{
 				if(m_staticState.m_shaderProg)
@@ -334,13 +335,12 @@ public:
 					}
 				}
 
-#if ANKI_GR_BACKEND_VULKAN
 				for(VertexAttributeSemantic s : EnumIterable<VertexAttributeSemantic>())
 				{
 					m_staticState.m_vert.m_attribs[s].m_semanticToVertexAttributeLocation = refl.m_vertex.m_vkVertexAttributeLocations[s];
 				}
-#endif
 			}
+#endif
 
 			if(m_staticState.m_misc.m_colorRtMask != refl.m_fragment.m_colorAttachmentWritemask)
 			{

+ 1 - 5
AnKi/Gr/Common.h

@@ -403,11 +403,7 @@ enum class Format : U32
 {
 	kNone = 0,
 
-#if ANKI_GR_BACKEND_VULKAN
-#	define ANKI_FORMAT_DEF(type, vk, d3d, componentCount, texelSize, blockWidth, blockHeight, blockSize, shaderType, depthStencil) k##type = vk,
-#else
-#	define ANKI_FORMAT_DEF(type, vk, d3d, componentCount, texelSize, blockWidth, blockHeight, blockSize, shaderType, depthStencil) k##type = d3d,
-#endif
+#define ANKI_FORMAT_DEF(type, vk, d3d, componentCount, texelSize, blockWidth, blockHeight, blockSize, shaderType, depthStencil) k##type = vk,
 #include <AnKi/Gr/BackendCommon/Format.def.h>
 #undef ANKI_FORMAT_DEF
 };

+ 8 - 3
AnKi/Gr/D3D/D3DBuffer.cpp

@@ -159,7 +159,12 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	const D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON;
 	ANKI_D3D_CHECK(getDevice().CreateCommittedResource(&heapProperties, heapFlags, &resourceDesc, initialState, nullptr, IID_PPV_ARGS(&m_resource)));
 
-	ANKI_D3D_CHECK(m_resource->SetName(s2ws(inf.getName().cstr()).c_str()));
+	GrDynamicArray<WChar> wstr;
+	wstr.resize(getName().getLength() + 1);
+	getName().toWideChars(wstr.getBegin(), wstr.getSize());
+	ANKI_D3D_CHECK(m_resource->SetName(wstr.getBegin()));
+
+	m_gpuAddress = m_resource->GetGPUVirtualAddress();
 
 	return Error::kNone;
 }
@@ -195,9 +200,9 @@ D3D12_BARRIER_SYNC BufferImpl::computeSync(BufferUsageBit usage) const
 		sync |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;
 	}
 
-	if(!!(usage & (BufferUsageBit::kIndex | BufferUsageBit::kVertex)))
+	if(!!(usage & BufferUsageBit::kIndex))
 	{
-		sync |= D3D12_BARRIER_SYNC_INDEX_INPUT | D3D12_BARRIER_SYNC_VERTEX_SHADING;
+		sync |= D3D12_BARRIER_SYNC_INDEX_INPUT;
 	}
 
 	if(!!(usage & BufferUsageBit::kAllGeometry))

+ 61 - 22
AnKi/Gr/D3D/D3DCommandBuffer.cpp

@@ -37,7 +37,7 @@ void CommandBuffer::endRecording()
 		const QueryInfo qinfo = TimestampQueryFactory::getSingleton().getQueryInfo(handle);
 
 		self.m_cmdList->ResolveQueryData(qinfo.m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, qinfo.m_indexInHeap, 1, qinfo.m_resultsBuffer,
-										 qinfo.m_resultsBufferOffset / sizeof(U64));
+										 qinfo.m_resultsBufferOffset);
 	}
 
 	for(QueryHandle handle : self.m_pipelineQueries)
@@ -45,7 +45,7 @@ void CommandBuffer::endRecording()
 		const QueryInfo qinfo = PrimitivesPassedClippingFactory::getSingleton().getQueryInfo(handle);
 
 		self.m_cmdList->ResolveQueryData(qinfo.m_queryHeap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, qinfo.m_indexInHeap, 1, qinfo.m_resultsBuffer,
-										 qinfo.m_resultsBufferOffset / sizeof(U64));
+										 qinfo.m_resultsBufferOffset);
 	}
 
 	self.m_cmdList->Close();
@@ -275,12 +275,12 @@ void CommandBuffer::bindTexelBuffer(Register reg, const BufferView& buff, Format
 	const BufferImpl& impl = static_cast<const BufferImpl&>(buff.getBuffer());
 	if(reg.m_resourceType == HlslResourceType::kUav)
 	{
-		self.m_descriptors.bindUav(reg.m_space, reg.m_bindPoint, &impl.getD3DResource(), buff.getOffset(), buff.getRange(), DXGI_FORMAT(fmt));
+		self.m_descriptors.bindUav(reg.m_space, reg.m_bindPoint, &impl.getD3DResource(), buff.getOffset(), buff.getRange(), fmt);
 	}
 	else
 	{
 		ANKI_ASSERT(reg.m_resourceType == HlslResourceType::kSrv);
-		self.m_descriptors.bindSrv(reg.m_space, reg.m_bindPoint, &impl.getD3DResource(), buff.getOffset(), buff.getRange(), DXGI_FORMAT(fmt));
+		self.m_descriptors.bindSrv(reg.m_space, reg.m_bindPoint, &impl.getD3DResource(), buff.getOffset(), buff.getRange(), fmt);
 	}
 }
 
@@ -331,6 +331,7 @@ void CommandBuffer::beginRenderPass(ConstWeakArray<RenderTarget> colorRts, Rende
 
 	U32 rtWidth = 0;
 	U32 rtHeight = 0;
+	D3D12_RENDER_PASS_FLAGS flags = D3D12_RENDER_PASS_FLAG_ALLOW_UAV_WRITES;
 
 	Array<D3D12_RENDER_PASS_RENDER_TARGET_DESC, kMaxColorRenderTargets> colorRtDescs;
 	Array<Format, kMaxColorRenderTargets> colorRtFormats;
@@ -359,7 +360,9 @@ void CommandBuffer::beginRenderPass(ConstWeakArray<RenderTarget> colorRts, Rende
 		const TextureImpl& tex = static_cast<const TextureImpl&>(depthStencilRt->m_textureView.getTexture());
 
 		dsDesc = {};
-		dsDesc.cpuDescriptor = tex.getOrCreateDsv(depthStencilRt->m_textureView.getSubresource(), depthStencilRt->m_usage).getCpuOffset();
+		dsDesc.cpuDescriptor =
+			tex.getOrCreateDsv(depthStencilRt->m_textureView.getSubresource(), !(depthStencilRt->m_usage & TextureUsageBit::kFramebufferWrite))
+				.getCpuOffset();
 
 		dsDesc.DepthBeginningAccess.Type = convertLoadOp(depthStencilRt->m_loadOperation);
 		dsDesc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth, depthStencilRt->m_clearValue.m_depthStencil.m_depth;
@@ -373,12 +376,22 @@ void CommandBuffer::beginRenderPass(ConstWeakArray<RenderTarget> colorRts, Rende
 
 		rtWidth = tex.getWidth() >> depthStencilRt->m_textureView.getFirstMipmap();
 		rtHeight = tex.getHeight() >> depthStencilRt->m_textureView.getFirstMipmap();
+
+		if(!(depthStencilRt->m_usage & TextureUsageBit::kFramebufferWrite))
+		{
+			flags |= !!(depthStencilRt->m_textureView.getDepthStencilAspect() & DepthStencilAspectBit::kDepth)
+						 ? D3D12_RENDER_PASS_FLAG_BIND_READ_ONLY_DEPTH
+						 : D3D12_RENDER_PASS_FLAG_NONE;
+
+			flags |= !!(depthStencilRt->m_textureView.getDepthStencilAspect() & DepthStencilAspectBit::kStencil)
+						 ? D3D12_RENDER_PASS_FLAG_BIND_READ_ONLY_STENCIL
+						 : D3D12_RENDER_PASS_FLAG_NONE;
+		}
 	}
 
 	self.m_graphicsState.beginRenderPass(ConstWeakArray(colorRtFormats.getBegin(), colorRts.getSize()), dsFormat, UVec2(rtWidth, rtHeight));
 
-	self.m_cmdList->BeginRenderPass(colorRts.getSize(), colorRtDescs.getBegin(), (depthStencilRt) ? &dsDesc : nullptr,
-									D3D12_RENDER_PASS_FLAG_ALLOW_UAV_WRITES);
+	self.m_cmdList->BeginRenderPass(colorRts.getSize(), colorRtDescs.getBegin(), (depthStencilRt) ? &dsDesc : nullptr, flags);
 }
 
 void CommandBuffer::endRenderPass()
@@ -589,6 +602,8 @@ void CommandBuffer::copyBufferToTexture(const BufferView& buff, const TextureVie
 	const U32 depth = (texImpl.getTextureType() == TextureType::k3D) ? (texImpl.getDepth() >> texView.getFirstMipmap()) : 1u;
 	ANKI_ASSERT(width && height && depth);
 
+	const FormatInfo& formatInfo = getFormatInfo(texImpl.getFormat());
+
 	D3D12_TEXTURE_COPY_LOCATION srcLocation = {};
 	srcLocation.pResource = &buffImpl.getD3DResource();
 	srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
@@ -597,7 +612,8 @@ void CommandBuffer::copyBufferToTexture(const BufferView& buff, const TextureVie
 	srcLocation.PlacedFootprint.Footprint.Width = width;
 	srcLocation.PlacedFootprint.Footprint.Height = height;
 	srcLocation.PlacedFootprint.Footprint.Depth = depth;
-	srcLocation.PlacedFootprint.Footprint.RowPitch = width * getFormatInfo(texImpl.getFormat()).m_texelSize;
+	srcLocation.PlacedFootprint.Footprint.RowPitch = (formatInfo.isCompressed()) ? (width / formatInfo.m_blockWidth * formatInfo.m_blockSize)
+																				 : (width * getFormatInfo(texImpl.getFormat()).m_texelSize);
 
 	D3D12_TEXTURE_COPY_LOCATION dstLocation = {};
 	dstLocation.pResource = &texImpl.getD3DResource();
@@ -688,8 +704,32 @@ void CommandBuffer::setPipelineBarrier(ConstWeakArray<TextureBarrierInfo> textur
 	for(const BufferBarrierInfo& barrier : buffers)
 	{
 		const BufferImpl& impl = static_cast<const BufferImpl&>(barrier.m_bufferView.getBuffer());
-		D3D12_BUFFER_BARRIER& d3dBarrier = *bufferBarriers.emplaceBack();
-		d3dBarrier = impl.computeBarrier(barrier.m_previousUsage, barrier.m_nextUsage);
+		D3D12_BUFFER_BARRIER b = impl.computeBarrier(barrier.m_previousUsage, barrier.m_nextUsage);
+
+		if(bufferBarriers.getSize() && bufferBarriers.getBack().pResource == b.pResource)
+		{
+			// Merge barriers
+
+			if(bufferBarriers.getBack().AccessBefore == D3D12_BARRIER_ACCESS_NO_ACCESS && b.AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS)
+			{
+				bufferBarriers.getBack().AccessBefore = D3D12_BARRIER_ACCESS(0);
+			}
+			else if(bufferBarriers.getBack().AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS && b.AccessBefore == D3D12_BARRIER_ACCESS_NO_ACCESS)
+			{
+				b.AccessBefore = D3D12_BARRIER_ACCESS(0);
+			}
+
+			bufferBarriers.getBack().AccessBefore |= b.AccessBefore;
+			bufferBarriers.getBack().AccessAfter |= b.AccessAfter;
+			bufferBarriers.getBack().SyncBefore |= b.SyncBefore;
+			bufferBarriers.getBack().SyncAfter |= b.SyncAfter;
+		}
+		else
+		{
+			// New barrier
+			D3D12_BUFFER_BARRIER& d3dBarrier = *bufferBarriers.emplaceBack();
+			d3dBarrier = b;
+		}
 	}
 
 	ANKI_ASSERT(accelerationStructures.getSize() == 0 && "TODO");
@@ -768,16 +808,6 @@ void CommandBuffer::writeTimestamp(TimestampQuery* query)
 	self.m_timestampQueries.emplaceBack(impl.m_handle);
 
 	const QueryInfo qinfo = TimestampQueryFactory::getSingleton().getQueryInfo(impl.m_handle);
-
-	// Make sure all the work has finished (mesa's dozen does that)
-	const D3D12_GLOBAL_BARRIER barrier = {.SyncBefore = D3D12_BARRIER_SYNC_ALL,
-										  .SyncAfter = D3D12_BARRIER_SYNC_NONE,
-										  .AccessBefore = D3D12_BARRIER_ACCESS_COMMON,
-										  .AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS};
-
-	const D3D12_BARRIER_GROUP barrierGroup = {.Type = D3D12_BARRIER_TYPE_GLOBAL, .NumBarriers = 1, .pGlobalBarriers = &barrier};
-	self.m_cmdList->Barrier(1, &barrierGroup);
-
 	self.m_cmdList->EndQuery(qinfo.m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, qinfo.m_indexInHeap);
 }
 
@@ -809,8 +839,8 @@ void CommandBuffer::pushDebugMarker(CString name, Vec3 color)
 
 	if(self.m_debugMarkersEnabled)
 	{
-		const UVec3 coloru(color * 255.0f);
-		const U64 val = (U64(coloru.x()) << 48) | (U64(coloru.x()) << 32) | (U64(coloru.x()) << 16);
+		const U8Vec3 coloru(color * 255.0f);
+		const U32 val = PIX_COLOR(coloru.x(), coloru.y(), coloru.z());
 
 		PIXBeginEvent(self.m_cmdList, val, "%s", name.cstr());
 	}
@@ -835,12 +865,21 @@ Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 	ANKI_CHECK(CommandBufferFactory::getSingleton().newCommandBuffer(init.m_flags, m_mcmdb));
 
 	m_cmdList = &m_mcmdb->getCmdList();
+
+	GrDynamicArray<WChar> wstr;
+	wstr.resize(getName().getLength() + 1);
+	getName().toWideChars(wstr.getBegin(), wstr.getSize());
+	m_cmdList->SetName(wstr.getBegin());
+
 	m_fastPool = &m_mcmdb->getFastMemoryPool();
 
 	m_descriptors.init(m_fastPool);
 
 	m_debugMarkersEnabled = g_debugMarkersCVar.get();
 
+	m_timestampQueries = {m_fastPool};
+	m_pipelineQueries = {m_fastPool};
+
 	return Error::kNone;
 }
 

+ 30 - 0
AnKi/Gr/D3D/D3DCommon.cpp

@@ -63,4 +63,34 @@ D3D12_FILTER convertFilter(SamplingFilter minMagFilter, SamplingFilter mipFilter
 	return out;
 }
 
+void invokeDred()
+{
+	getGrManagerImpl().invokeDred();
+}
+
+DXGI_FORMAT convertFormat(Format fmt)
+{
+	DXGI_FORMAT out = DXGI_FORMAT_UNKNOWN;
+
+	switch(fmt)
+	{
+	case Format::kNone:
+		out = DXGI_FORMAT_UNKNOWN;
+		break;
+
+#define ANKI_FORMAT_DEF(type, vk, d3d, componentCount, texelSize, blockWidth, blockHeight, blockSize, shaderType, depthStencil) \
+	case Format::k##type: \
+		out = DXGI_FORMAT(d3d); \
+		break;
+#include <AnKi/Gr/BackendCommon/Format.def.h>
+#undef ANKI_FORMAT_DEF
+
+	default:
+		ANKI_ASSERT(0);
+	}
+
+	ANKI_ASSERT(out < DXGI_FORMAT(1000));
+	return out;
+}
+
 } // end namespace anki

+ 14 - 2
AnKi/Gr/D3D/D3DCommon.h

@@ -44,13 +44,19 @@ namespace anki {
 #define ANKI_D3D_SELF(class_) class_& self = *static_cast<class_*>(this)
 #define ANKI_D3D_SELF_CONST(class_) const class_& self = *static_cast<const class_*>(this)
 
+void invokeDred();
+
 #define ANKI_D3D_CHECKF(x) \
 	do \
 	{ \
 		HRESULT rez; \
 		if((rez = (x)) < 0) [[unlikely]] \
 		{ \
-			ANKI_D3D_LOGF("D3D function failed (HRESULT: %d message: %s): %s", rez, errorMessageToString(GetLastError()).cstr(), #x); \
+			if(rez == DXGI_ERROR_DEVICE_REMOVED) \
+			{ \
+				invokeDred(); \
+			} \
+			ANKI_D3D_LOGF("D3D function failed (HRESULT: 0x%X message: %s): %s", rez, errorMessageToString(GetLastError()).cstr(), #x); \
 		} \
 	} while(0)
 
@@ -60,7 +66,11 @@ namespace anki {
 		HRESULT rez; \
 		if((rez = (x)) < 0) [[unlikely]] \
 		{ \
-			ANKI_D3D_LOGE("D3D function failed (HRESULT: %d message: %s): %s", rez, errorMessageToString(GetLastError()).cstr(), #x); \
+			ANKI_D3D_LOGE("D3D function failed (HRESULT: 0x%X message: %s): %s", rez, errorMessageToString(GetLastError()).cstr(), #x); \
+			if(rez == DXGI_ERROR_DEVICE_REMOVED) \
+			{ \
+				invokeDred(); \
+			} \
 			return Error::kFunctionFailed; \
 		} \
 	} while(0)
@@ -484,6 +494,8 @@ inline [[nodiscard]] D3D12_CULL_MODE convertCullMode(FaceSelectionBit c)
 
 	return out;
 }
+
+[[nodiscard]] DXGI_FORMAT convertFormat(Format fmt);
 /// @}
 
 } // end namespace anki

+ 26 - 19
AnKi/Gr/D3D/D3DDescriptor.cpp

@@ -355,7 +355,7 @@ Error RootSignatureFactory::getOrCreateRootSignature(const ShaderReflection& ref
 	D3D12_ROOT_SIGNATURE_DESC1& sigDesc = verSigDesc.Desc_1_1;
 	sigDesc.NumParameters = rootParameters.getSize();
 	sigDesc.pParameters = rootParameters.getBegin();
-	sigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED;
+	sigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
 
 	ComPtr<ID3DBlob> signatureBlob;
 	ComPtr<ID3DBlob> errorBlob;
@@ -402,13 +402,17 @@ Error RootSignatureFactory::getOrCreateRootSignature(const ShaderReflection& ref
 			for(U32 arrayIdx = 0; arrayIdx < inBinding.m_arraySize; ++arrayIdx)
 			{
 				const U32 idxInDescriptorsArr = inBinding.m_registerBindingPoint + arrayIdx;
-				signature->m_spaces[spaceIdx].m_descriptors[hlslResourceType].resize(idxInDescriptorsArr + 1); // Account for holes
+				if(idxInDescriptorsArr >= signature->m_spaces[spaceIdx].m_descriptors[hlslResourceType].getSize())
+				{
+					signature->m_spaces[spaceIdx].m_descriptors[hlslResourceType].resize(idxInDescriptorsArr + 1); // Account for holes
+				}
 
 				RootSignature::Descriptor& outDescriptor = signature->m_spaces[spaceIdx].m_descriptors[hlslResourceType][idxInDescriptorsArr];
 				outDescriptor.m_flags = inBinding.m_flags;
 				outDescriptor.m_type = inBinding.m_type;
 				if(outDescriptor.m_type == DescriptorType::kStorageBuffer)
 				{
+					ANKI_ASSERT(inBinding.m_d3dStructuredBufferStride < kMaxU16);
 					outDescriptor.m_structuredBufferStride = inBinding.m_d3dStructuredBufferStride;
 				}
 			}
@@ -540,8 +544,8 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 				}
 
 				const Descriptor& outDescriptor = stateSpace.m_descriptors[hlslResourceType][registerBinding];
-				ANKI_ASSERT(inDescriptor.m_flags == outDescriptor.m_flags && inDescriptor.m_type == outDescriptor.m_type
-							&& "Have bound the wrong thing");
+				ANKI_ASSERT((inDescriptor.m_flags & ~DescriptorFlag::kByteAddressBuffer) == outDescriptor.m_flags
+							&& inDescriptor.m_type == outDescriptor.m_type && "Have bound the wrong thing");
 
 				if(inDescriptor.m_type == DescriptorType::kUniformBuffer)
 				{
@@ -551,7 +555,7 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 					const BufferView& view = outDescriptor.m_bufferView;
 					D3D12_CONSTANT_BUFFER_VIEW_DESC desc = {};
 					desc.BufferLocation = view.m_resource->GetGPUVirtualAddress() + view.m_offset;
-					desc.SizeInBytes = getAlignedRoundUp(view.m_range, 256);
+					desc.SizeInBytes = U32(getAlignedRoundUp(256, view.m_range));
 
 					getDevice().CreateConstantBufferView(&desc, cbvSrvUavHeapOffset.getCpuOffset());
 				}
@@ -573,40 +577,43 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 				}
 				else if(inDescriptor.m_type == DescriptorType::kStorageBuffer && !!(inDescriptor.m_flags & DescriptorFlag::kWrite))
 				{
-					// RWStructuredBuffer
+					// RWStructuredBuffer or RWByteAddressBuffer
 
 					ANKI_ASSERT(!outDescriptor.m_isHandle);
+
 					const BufferView& view = outDescriptor.m_bufferView;
 					D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
+					uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
 					uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
+					uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
 
-					ANKI_ASSERT((view.m_offset % inDescriptor.m_structuredBufferStride) == 0);
-					uavDesc.Buffer.FirstElement = view.m_offset / inDescriptor.m_structuredBufferStride;
+					ANKI_ASSERT((view.m_offset % sizeof(U32)) == 0);
+					uavDesc.Buffer.FirstElement = view.m_offset / sizeof(U32);
 
 					ANKI_ASSERT((view.m_range % inDescriptor.m_structuredBufferStride) == 0);
-					uavDesc.Buffer.NumElements = U32(view.m_range / inDescriptor.m_structuredBufferStride);
-
-					uavDesc.Buffer.StructureByteStride = inDescriptor.m_structuredBufferStride;
+					ANKI_ASSERT((view.m_range % sizeof(U32)) == 0);
+					uavDesc.Buffer.NumElements = U32(view.m_range / sizeof(U32));
 
 					getDevice().CreateUnorderedAccessView(view.m_resource, nullptr, &uavDesc, cbvSrvUavHeapOffset.getCpuOffset());
 				}
 				else if(inDescriptor.m_type == DescriptorType::kStorageBuffer && !(inDescriptor.m_flags & DescriptorFlag::kWrite))
 				{
-					// StructuredBuffer
+					// StructuredBuffer or ByteAddressBuffer
 
 					ANKI_ASSERT(!outDescriptor.m_isHandle);
 					const BufferView& view = outDescriptor.m_bufferView;
 					D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
-					srvDesc.Format = DXGI_FORMAT_UNKNOWN;
+					srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;
 					srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
+					srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
 					srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
 
-					ANKI_ASSERT((view.m_offset % inDescriptor.m_structuredBufferStride) == 0);
-					srvDesc.Buffer.FirstElement = view.m_offset / inDescriptor.m_structuredBufferStride;
+					ANKI_ASSERT((view.m_offset % sizeof(U32)) == 0);
+					srvDesc.Buffer.FirstElement = view.m_offset / sizeof(U32);
 
 					ANKI_ASSERT((view.m_range % inDescriptor.m_structuredBufferStride) == 0);
-					srvDesc.Buffer.NumElements = U32(view.m_range / inDescriptor.m_structuredBufferStride);
-					srvDesc.Buffer.StructureByteStride = inDescriptor.m_structuredBufferStride;
+					ANKI_ASSERT((view.m_range % sizeof(U32)) == 0);
+					srvDesc.Buffer.NumElements = U32(view.m_range / sizeof(U32));
 
 					getDevice().CreateShaderResourceView(view.m_resource, &srvDesc, cbvSrvUavHeapOffset.getCpuOffset());
 				}
@@ -617,7 +624,7 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 					ANKI_ASSERT(!outDescriptor.m_isHandle);
 					const BufferView& view = outDescriptor.m_bufferView;
 					D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
-					uavDesc.Format = view.m_format;
+					uavDesc.Format = convertFormat(view.m_format);
 					uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
 
 					const U32 texelSize = getFormatInfo(Format(view.m_format)).m_texelSize;
@@ -637,7 +644,7 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
 					const BufferView& view = outDescriptor.m_bufferView;
 					D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
 
-					srvDesc.Format = view.m_format;
+					srvDesc.Format = convertFormat(view.m_format);
 					srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
 					srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
 

+ 7 - 7
AnKi/Gr/D3D/D3DDescriptor.h

@@ -175,6 +175,7 @@ public:
 		RingDescriptorAllocator& alloc = (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) ? m_gpuRing.m_cbvSrvUav : m_gpuRing.m_sampler;
 		DescriptorHeapHandle out = alloc.allocate(descriptorCount);
 		out.m_heapType = type;
+		out.validate();
 		return out;
 	}
 
@@ -318,7 +319,7 @@ public:
 
 	void bindRootSignature(const RootSignature* rootSignature, Bool isCompute);
 
-	void bindUav(U32 space, U32 registerBinding, ID3D12Resource* resource, PtrSize offset, PtrSize range, DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN)
+	void bindUav(U32 space, U32 registerBinding, ID3D12Resource* resource, PtrSize offset, PtrSize range, Format fmt = Format::kNone)
 	{
 		Descriptor& descriptor = getDescriptor(HlslResourceType::kUav, space, registerBinding);
 		descriptor.m_bufferView.m_resource = resource;
@@ -327,7 +328,7 @@ public:
 		descriptor.m_bufferView.m_format = fmt;
 		descriptor.m_isHandle = false;
 #if ANKI_ASSERTIONS_ENABLED
-		if(fmt == DXGI_FORMAT_UNKNOWN)
+		if(fmt == Format::kNone)
 		{
 			descriptor.m_type = DescriptorType::kStorageBuffer;
 			descriptor.m_flags = DescriptorFlag::kReadWrite;
@@ -355,7 +356,7 @@ public:
 		m_spaces[space].m_cbvSrvUavDirty = true;
 	}
 
-	void bindSrv(U32 space, U32 registerBinding, ID3D12Resource* resource, PtrSize offset, PtrSize range, DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN)
+	void bindSrv(U32 space, U32 registerBinding, ID3D12Resource* resource, PtrSize offset, PtrSize range, Format fmt = Format::kNone)
 	{
 		Descriptor& descriptor = getDescriptor(HlslResourceType::kSrv, space, registerBinding);
 		descriptor.m_bufferView.m_resource = resource;
@@ -364,7 +365,7 @@ public:
 		descriptor.m_bufferView.m_format = fmt;
 		descriptor.m_isHandle = false;
 #if ANKI_ASSERTIONS_ENABLED
-		if(fmt == DXGI_FORMAT_UNKNOWN)
+		if(fmt == Format::kNone)
 		{
 			descriptor.m_type = DescriptorType::kStorageBuffer;
 			descriptor.m_flags = DescriptorFlag::kRead;
@@ -398,7 +399,7 @@ public:
 		descriptor.m_bufferView.m_resource = resource;
 		descriptor.m_bufferView.m_offset = offset;
 		descriptor.m_bufferView.m_range = range;
-		descriptor.m_bufferView.m_format = DXGI_FORMAT_UNKNOWN;
+		descriptor.m_bufferView.m_format = Format::kNone;
 		descriptor.m_isHandle = false;
 #if ANKI_ASSERTIONS_ENABLED
 		descriptor.m_type = DescriptorType::kUniformBuffer;
@@ -438,7 +439,7 @@ private:
 		ID3D12Resource* m_resource;
 		PtrSize m_offset;
 		PtrSize m_range;
-		DXGI_FORMAT m_format;
+		Format m_format;
 	};
 
 	class Descriptor
@@ -514,7 +515,6 @@ private:
 inline void DescriptorHeapHandle::increment(U32 descriptorCount)
 {
 	m_cpuHandle.ptr += DescriptorFactory::getSingleton().getDescriptorHandleIncrementSize(m_heapType) * descriptorCount;
-	validate();
 }
 /// @}
 

+ 1 - 0
AnKi/Gr/D3D/D3DFence.h

@@ -48,6 +48,7 @@ public:
 	Bool done() const
 	{
 		const U64 cval = m_fence->GetCompletedValue();
+		ANKI_D3D_CHECKF((cval < kMaxU64) ? S_OK : DXGI_ERROR_DEVICE_REMOVED);
 		const U64 val = m_value.load();
 		ANKI_ASSERT(cval <= val);
 		return cval == val;

+ 1 - 1
AnKi/Gr/D3D/D3DFrameGarbageCollector.cpp

@@ -69,7 +69,7 @@ void FrameGarbageCollector::collectGarbage()
 
 		if(frame.m_fence.isCreated() && !frame.m_fence->done())
 		{
-			ANKI_ASSERT(!frame.m_textureGarbage.isEmpty());
+			ANKI_ASSERT(!frame.m_textureGarbage.isEmpty() || !frame.m_bufferGarbage.isEmpty());
 			newFrames.pushBack(&frame);
 			continue;
 		}

+ 68 - 67
AnKi/Gr/D3D/D3DGrManager.cpp

@@ -42,73 +42,18 @@ BoolCVar g_debugMarkersCVar(CVarSubsystem::kGr, "DebugMarkers", false, "Enable o
 BoolCVar g_meshShadersCVar(CVarSubsystem::kGr, "MeshShaders", false, "Enable or not mesh shaders");
 static NumericCVar<U8> g_deviceCVar(CVarSubsystem::kGr, "Device", 0, 0, 16, "Choose an available device. Devices are sorted by performance");
 static BoolCVar g_rayTracingCVar(CVarSubsystem::kGr, "RayTracing", false, "Try enabling ray tracing");
+static BoolCVar g_dredCVar(CVarSubsystem::kGr, "Dred", false, "Enable DRED");
+static BoolCVar g_vrsCVar(CVarSubsystem::kGr, "Vrs", false, "Enable or not VRS");
 
-static LONG NTAPI vexHandler(PEXCEPTION_POINTERS exceptionInfo)
+static void NTAPI d3dDebugMessageCallback([[maybe_unused]] D3D12_MESSAGE_CATEGORY category, D3D12_MESSAGE_SEVERITY severity,
+										  [[maybe_unused]] D3D12_MESSAGE_ID id, LPCSTR pDescription, [[maybe_unused]] void* pContext)
 {
-	PEXCEPTION_RECORD exceptionRecord = exceptionInfo->ExceptionRecord;
-
-	switch(exceptionRecord->ExceptionCode)
+	if(id == D3D12_MESSAGE_ID_INCOMPATIBLE_BARRIER_LAYOUT)
 	{
-	case DBG_PRINTEXCEPTION_WIDE_C:
-	case DBG_PRINTEXCEPTION_C:
-
-		if(exceptionRecord->NumberParameters >= 2)
-		{
-			ULONG len = exceptionRecord->ExceptionInformation[0];
-
-			union
-			{
-				ULONG_PTR up;
-				PCWSTR pwz;
-				PCSTR psz;
-			};
-
-			up = exceptionRecord->ExceptionInformation[1];
-
-			if(exceptionRecord->ExceptionCode == DBG_PRINTEXCEPTION_C)
-			{
-				const ULONG n = MultiByteToWideChar(CP_ACP, 0, psz, len, 0, 0);
-				if(n)
-				{
-					WCHAR* wz = static_cast<WCHAR*>(_malloca(n * sizeof(WCHAR)));
-
-					len = MultiByteToWideChar(CP_ACP, 0, psz, len, wz, n);
-					if(len)
-					{
-						pwz = wz;
-					}
-				}
-			}
-
-			if(len)
-			{
-				const std::wstring wstring(pwz, len - 1);
-				std::string str = ws2s(wstring);
-				str.erase(std::remove(str.begin(), str.end(), '\n'), str.cend());
-				str.erase(std::remove(str.begin(), str.end(), '\r'), str.cend());
-
-				if(str.find("D3D12 INFO") == std::string::npos)
-				{
-					if(GrMemoryPool::isAllocated())
-					{
-						ANKI_D3D_LOGE("%s", str.c_str());
-					}
-					else
-					{
-						printf("D3D12 validation error: %s", str.c_str());
-					}
-				}
-			}
-		}
-		return EXCEPTION_CONTINUE_EXECUTION;
+		// Skip for now
+		return;
 	}
 
-	return EXCEPTION_CONTINUE_SEARCH;
-}
-
-static void NTAPI d3dDebugMessageCallback([[maybe_unused]] D3D12_MESSAGE_CATEGORY category, D3D12_MESSAGE_SEVERITY severity,
-										  [[maybe_unused]] D3D12_MESSAGE_ID id, LPCSTR pDescription, [[maybe_unused]] void* pContext)
-{
 	if(!Logger::isAllocated())
 	{
 		printf("d3dDebugMessageCallback : %s", pDescription);
@@ -213,7 +158,15 @@ void GrManager::swapBuffers()
 
 void GrManager::finish()
 {
-	ANKI_ASSERT(!"TODO");
+	if(FenceFactory::isAllocated())
+	{
+		for(GpuQueueType qType : EnumIterable<GpuQueueType>())
+		{
+			MicroFencePtr fence = FenceFactory::getSingleton().newInstance();
+			fence->gpuSignal(qType);
+			fence->clientWait(kMaxSecond);
+		}
+	}
 }
 
 #define ANKI_NEW_GR_OBJECT(type) \
@@ -333,8 +286,6 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 
 			debugInterface1->SetEnableGPUBasedValidation(true);
 		}
-
-		AddVectoredExceptionHandler(true, vexHandler);
 	}
 
 	ComPtr<IDXGIFactory2> factory2;
@@ -427,6 +378,19 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 		}
 	}
 
+	if(g_dredCVar.get())
+	{
+		ComPtr<ID3D12DeviceRemovedExtendedDataSettings> dredSettings;
+		ANKI_D3D_CHECK(D3D12GetDebugInterface(IID_PPV_ARGS(&dredSettings)));
+
+		// Turn on AutoBreadcrumbs and Page Fault reporting
+		dredSettings->SetAutoBreadcrumbsEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+		dredSettings->SetPageFaultEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+
+		ANKI_D3D_LOGI("DRED is enabled");
+		m_canInvokeDred = true;
+	}
+
 	// Create queues
 	{
 		D3D12_COMMAND_QUEUE_DESC queueDesc = {};
@@ -467,11 +431,11 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 		m_capabilities.m_majorApiVersion = 12;
 		m_capabilities.m_rayTracingEnabled = g_rayTracingCVar.get();
 		m_capabilities.m_64bitAtomics = true;
-		m_capabilities.m_vrs = true;
+		m_capabilities.m_vrs = g_vrsCVar.get();
 		m_capabilities.m_samplingFilterMinMax = true;
 		m_capabilities.m_unalignedBbpTextureFormats = false;
 		m_capabilities.m_dlss = false;
-		m_capabilities.m_meshShaders = true;
+		m_capabilities.m_meshShaders = g_meshShadersCVar.get();
 		m_capabilities.m_pipelineQuery = true;
 		m_capabilities.m_barycentrics = true;
 	}
@@ -491,6 +455,9 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	IndirectCommandSignatureFactory::allocateSingleton();
 	ANKI_CHECK(IndirectCommandSignatureFactory::getSingleton().init());
 
+	TimestampQueryFactory::allocateSingleton();
+	PrimitivesPassedClippingFactory::allocateSingleton();
+
 	return Error::kNone;
 }
 
@@ -506,6 +473,8 @@ void GrManagerImpl::destroy()
 
 	// Destroy systems
 	CommandBufferFactory::freeSingleton();
+	PrimitivesPassedClippingFactory::freeSingleton();
+	TimestampQueryFactory::freeSingleton();
 	SwapchainFactory::freeSingleton();
 	FrameGarbageCollector::freeSingleton();
 	RootSignatureFactory::freeSingleton();
@@ -542,4 +511,36 @@ void GrManagerImpl::waitAllQueues()
 	}
 }
 
+void GrManagerImpl::invokeDred() const
+{
+	Bool error = false;
+
+	do
+	{
+		if(m_canInvokeDred)
+		{
+			ComPtr<ID3D12DeviceRemovedExtendedData> pDred;
+			if(!SUCCEEDED(m_device->QueryInterface(IID_PPV_ARGS(&pDred))))
+			{
+				error = true;
+				break;
+			}
+
+			D3D12_DRED_AUTO_BREADCRUMBS_OUTPUT dredAutoBreadcrumbsOutput;
+			if(!SUCCEEDED(pDred->GetAutoBreadcrumbsOutput(&dredAutoBreadcrumbsOutput)))
+			{
+				error = true;
+				break;
+			}
+
+			D3D12_DRED_PAGE_FAULT_OUTPUT dredPageFaultOutput;
+			if(!SUCCEEDED(pDred->GetPageFaultAllocationOutput(&dredPageFaultOutput)))
+			{
+				error = true;
+				break;
+			}
+		}
+	} while(false);
+}
+
 } // end namespace anki

+ 4 - 0
AnKi/Gr/D3D/D3DGrManager.h

@@ -57,6 +57,8 @@ public:
 		return m_timestampFrequency;
 	}
 
+	void invokeDred() const;
+
 private:
 	ID3D12DeviceX* m_device = nullptr;
 	Array<ID3D12CommandQueue*, U32(GpuQueueType::kCount)> m_queues = {};
@@ -82,6 +84,8 @@ private:
 
 	U64 m_timestampFrequency = 0;
 
+	Bool m_canInvokeDred = false;
+
 	void destroy();
 
 	void waitAllQueues();

+ 30 - 12
AnKi/Gr/D3D/D3DGraphicsState.cpp

@@ -148,7 +148,7 @@ void GraphicsPipelineFactory::flushState(GraphicsStateTracker& state, D3D12Graph
 			D3D12_INPUT_ELEMENT_DESC& elem = inputElementDescs[inputElementDescCount++];
 
 			getVertexAttributeSemanticInfo(i, elem.SemanticName, elem.SemanticIndex);
-			elem.Format = DXGI_FORMAT(staticState.m_vert.m_attribs[i].m_fmt);
+			elem.Format = convertFormat(staticState.m_vert.m_attribs[i].m_fmt);
 			elem.InputSlot = staticState.m_vert.m_attribs[i].m_binding;
 			elem.AlignedByteOffset = staticState.m_vert.m_attribs[i].m_relativeOffset;
 			elem.InputSlotClass = (staticState.m_vert.m_bindings[staticState.m_vert.m_attribs[i].m_binding].m_stepRate == VertexStepRate::kVertex)
@@ -182,16 +182,34 @@ void GraphicsPipelineFactory::flushState(GraphicsStateTracker& state, D3D12Graph
 	if(staticState.m_misc.m_depthStencilFormat != Format::kNone)
 	{
 		Array<D3D12_DEPTH_STENCILOP_DESC1, 2> stencilDescs;
-		for(U32 w = 0; w < 2; ++w)
+		if(hasStencilRt)
 		{
-			stencilDescs[w].StencilFailOp = convertStencilOperation(staticState.m_stencil.m_face[w].m_fail);
-			stencilDescs[w].StencilDepthFailOp = convertStencilOperation(staticState.m_stencil.m_face[w].m_stencilPassDepthFail);
-			stencilDescs[w].StencilPassOp = convertStencilOperation(staticState.m_stencil.m_face[w].m_stencilPassDepthPass);
-			stencilDescs[w].StencilFunc = convertComparisonFunc(staticState.m_stencil.m_face[w].m_compare);
-
-			ANKI_ASSERT(staticState.m_stencil.m_face[w].m_compareMask != 0x5A5A5A5A && staticState.m_stencil.m_face[w].m_writeMask != 0x5A5A5A5A);
-			stencilDescs[w].StencilReadMask = U8(staticState.m_stencil.m_face[w].m_compareMask);
-			stencilDescs[w].StencilWriteMask = U8(staticState.m_stencil.m_face[w].m_writeMask);
+			for(U32 w = 0; w < 2; ++w)
+			{
+				stencilDescs[w].StencilFailOp = convertStencilOperation(staticState.m_stencil.m_face[w].m_fail);
+				stencilDescs[w].StencilDepthFailOp = convertStencilOperation(staticState.m_stencil.m_face[w].m_stencilPassDepthFail);
+				stencilDescs[w].StencilPassOp = convertStencilOperation(staticState.m_stencil.m_face[w].m_stencilPassDepthPass);
+				stencilDescs[w].StencilFunc = convertComparisonFunc(staticState.m_stencil.m_face[w].m_compare);
+
+				ANKI_ASSERT(
+					!stencilTestEnabled
+					|| (staticState.m_stencil.m_face[w].m_compareMask != 0x5A5A5A5A && staticState.m_stencil.m_face[w].m_writeMask != 0x5A5A5A5A));
+				stencilDescs[w].StencilReadMask = U8(staticState.m_stencil.m_face[w].m_compareMask);
+				stencilDescs[w].StencilWriteMask = U8(staticState.m_stencil.m_face[w].m_writeMask);
+			}
+		}
+		else
+		{
+			for(U32 w = 0; w < 2; ++w)
+			{
+				stencilDescs[w].StencilFailOp = D3D12_STENCIL_OP_KEEP;
+				stencilDescs[w].StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
+				stencilDescs[w].StencilPassOp = D3D12_STENCIL_OP_KEEP;
+				stencilDescs[w].StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+
+				stencilDescs[w].StencilReadMask = 0;
+				stencilDescs[w].StencilWriteMask = 0;
+			}
 		}
 
 		dsDesc = {.DepthEnable = depthTestEnabled(staticState.m_depth.m_compare, staticState.m_depth.m_writeEnabled),
@@ -217,7 +235,7 @@ void GraphicsPipelineFactory::flushState(GraphicsStateTracker& state, D3D12Graph
 	{
 		if(staticState.m_misc.m_colorRtMask.get(i))
 		{
-			rtFormats.RTFormats[i] = DXGI_FORMAT(staticState.m_misc.m_colorRtFormats[i]);
+			rtFormats.RTFormats[i] = convertFormat(staticState.m_misc.m_colorRtFormats[i]);
 			rtFormats.NumRenderTargets = i + 1;
 		}
 	}
@@ -245,7 +263,7 @@ void GraphicsPipelineFactory::flushState(GraphicsStateTracker& state, D3D12Graph
 	ANKI_SET_IR(MS, kMesh)
 	desc.BlendState = CD3DX12_BLEND_DESC(blendDesc);
 	desc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC2(dsDesc);
-	desc.DSVFormat = DXGI_FORMAT(staticState.m_misc.m_depthStencilFormat);
+	desc.DSVFormat = convertFormat(staticState.m_misc.m_depthStencilFormat);
 	desc.RasterizerState = CD3DX12_RASTERIZER_DESC2(rastDesc);
 	desc.RTVFormats = rtFormats;
 	desc.SampleDesc = sampleDesc;

+ 2 - 2
AnKi/Gr/D3D/D3DQueryFactory.cpp

@@ -43,7 +43,7 @@ Error QueryFactory::newQuery(QueryHandle& handle)
 		BufferInitInfo buffInit("QueryBuffer");
 		buffInit.m_mapAccess = BufferMapAccessBit::kRead;
 		buffInit.m_usage = BufferUsageBit::kTransferDestination;
-		buffInit.m_size = kMaxQueriesPerQueryChunk * m_resultStructSize64 * sizeof(U64);
+		buffInit.m_size = kMaxQueriesPerQueryChunk * m_resultStructSize;
 		chunk.m_resultsBuffer = GrManager::getSingleton().newBuffer(buffInit);
 
 		chunk.m_resultsBufferCpuAddr = static_cast<U64*>(chunk.m_resultsBuffer->map(0, buffInit.m_size, BufferMapAccessBit::kRead));
@@ -111,7 +111,7 @@ Bool QueryFactory::getResult(QueryHandle handle, U64& result)
 	const Bool available = (it->m_fenceArr[handle.m_queryIndex].isCreated()) ? it->m_fenceArr[handle.m_queryIndex]->done() : true;
 	if(available)
 	{
-		result = it->m_resultsBufferCpuAddr[handle.m_queryIndex * m_resultStructSize64 + m_resultMemberOffset64];
+		result = it->m_resultsBufferCpuAddr[(handle.m_queryIndex * m_resultStructSize + m_resultMemberOffset) / sizeof(U64)];
 		it->m_fenceArr[handle.m_queryIndex].reset(nullptr);
 	}
 	else

+ 5 - 5
AnKi/Gr/D3D/D3DQueryFactory.h

@@ -50,8 +50,8 @@ class QueryFactory
 public:
 	QueryFactory(D3D12_QUERY_HEAP_TYPE type, U32 resultStructSize, U32 resultMemberOffset)
 		: m_type(type)
-		, m_resultStructSize64(resultStructSize / sizeof(U64))
-		, m_resultMemberOffset64(resultMemberOffset / sizeof(U64))
+		, m_resultStructSize(resultStructSize)
+		, m_resultMemberOffset(resultMemberOffset)
 	{
 		ANKI_ASSERT(resultStructSize > 0);
 		ANKI_ASSERT((resultStructSize % sizeof(U64)) == 0);
@@ -79,7 +79,7 @@ public:
 		auto it = m_chunkArray.indexToIterator(handle.m_chunkIndex);
 
 		info.m_resultsBuffer = &static_cast<const BufferImpl&>(*it->m_resultsBuffer).getD3DResource();
-		info.m_resultsBufferOffset = (handle.m_queryIndex * m_resultStructSize64 + m_resultMemberOffset64) / sizeof(U64);
+		info.m_resultsBufferOffset = handle.m_queryIndex * m_resultStructSize + m_resultMemberOffset;
 		info.m_queryHeap = it->m_heap;
 		info.m_indexInHeap = handle.m_queryIndex;
 		return info;
@@ -131,8 +131,8 @@ private:
 	Mutex m_mtx;
 
 	D3D12_QUERY_HEAP_TYPE m_type;
-	U32 m_resultStructSize64;
-	U32 m_resultMemberOffset64;
+	U32 m_resultStructSize;
+	U32 m_resultMemberOffset;
 };
 
 class OcclusionQueryFactory : public QueryFactory, public MakeSingleton<OcclusionQueryFactory>

+ 1 - 1
AnKi/Gr/D3D/D3DSampler.cpp

@@ -35,7 +35,7 @@ Error SamplerImpl::init(const SamplerInitInfo& inf)
 	desc.AddressW = desc.AddressU;
 	desc.MipLODBias = inf.m_lodBias;
 	desc.MaxAnisotropy = inf.m_anisotropyLevel;
-	desc.ComparisonFunc = convertComparisonFunc(inf.m_compareOperation);
+	desc.ComparisonFunc = D3D12_DECODE_IS_COMPARISON_FILTER(desc.Filter) ? convertComparisonFunc(inf.m_compareOperation) : D3D12_COMPARISON_FUNC_NONE;
 
 	if(inf.m_addressing == SamplingAddressing::kBlack)
 	{

+ 2 - 0
AnKi/Gr/D3D/D3DShaderProgram.cpp

@@ -97,6 +97,8 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 		refl.validate();
 	}
 
+	m_refl = refl;
+
 	// Create root signature
 	ANKI_CHECK(RootSignatureFactory::getSingleton().getOrCreateRootSignature(refl, m_rootSignature));
 

+ 156 - 104
AnKi/Gr/D3D/D3DTexture.cpp

@@ -26,7 +26,7 @@ U32 Texture::getOrCreateBindlessTextureIndex(const TextureSubresourceDesc& subre
 {
 	ANKI_D3D_SELF(TextureImpl);
 
-	const TextureImpl::View& view = self.getOrCreateView(subresource, TextureUsageBit::kAllSampled);
+	const TextureImpl::View& view = self.getOrCreateView(subresource, TextureImpl::ViewType::kSrv);
 
 	LockGuard lock(view.m_bindlessLock);
 
@@ -127,7 +127,7 @@ Error TextureImpl::initInternal(ID3D12Resource* external, const TextureInitInfo&
 		desc.Width = m_width;
 		desc.Height = m_height;
 		desc.MipLevels = U16(m_mipCount);
-		desc.Format = DXGI_FORMAT(m_format);
+		desc.Format = convertFormat(m_format);
 		desc.SampleDesc.Count = 1;
 		desc.SampleDesc.Quality = 0;
 		desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
@@ -165,42 +165,38 @@ Error TextureImpl::initInternal(ID3D12Resource* external, const TextureInitInfo&
 		const D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON;
 		ANKI_D3D_CHECK(getDevice().CreateCommittedResource(&heapProperties, heapFlags, &desc, initialState, nullptr, IID_PPV_ARGS(&m_resource)));
 
-		ANKI_D3D_CHECK(m_resource->SetName(s2ws(init.getName().cstr()).c_str()));
+		GrDynamicArray<WChar> wstr;
+		wstr.resize(getName().getLength() + 1);
+		getName().toWideChars(wstr.getBegin(), wstr.getSize());
+		ANKI_D3D_CHECK(m_resource->SetName(wstr.getBegin()));
 	}
 
 	// Create the default views
 	if(!!(m_usage & TextureUsageBit::kAllFramebuffer))
 	{
 		const TextureView tview(this, TextureSubresourceDesc::firstSurface());
-		initView(tview.getSubresource(), TextureUsageBit::kAllFramebuffer, m_firstSurfaceRtvOrDsv);
+		initView(tview.getSubresource(), !!(m_aspect & DepthStencilAspectBit::kDepthStencil) ? ViewType::kDsv : ViewType::kRtv,
+				 m_firstSurfaceRtvOrDsv);
 		m_firstSurfaceRtvOrDsvSubresource = tview.getSubresource();
 	}
 
 	if(!!(m_usage & TextureUsageBit::kAllSampled))
 	{
 		const TextureView tview(this, TextureSubresourceDesc::all());
-		initView(tview.getSubresource(), TextureUsageBit::kAllSampled, m_wholeTextureSrv);
+		initView(tview.getSubresource(), ViewType::kSrv, m_wholeTextureSrv);
 		m_wholeTextureSrvSubresource = tview.getSubresource();
 	}
 
 	return Error::kNone;
 }
 
-void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsageBit usage, View& view) const
+void TextureImpl::initView(const TextureSubresourceDesc& subresource, ViewType type, View& view) const
 {
-	ANKI_ASSERT(!!(m_usage & usage));
+	view.m_type = type;
 
-	const Bool rtv = !!(usage & TextureUsageBit::kAllFramebuffer) && m_aspect == DepthStencilAspectBit::kNone;
-	const Bool dsv = !!(usage & TextureUsageBit::kAllFramebuffer) && m_aspect != DepthStencilAspectBit::kNone;
-	const Bool srv = !!(usage & TextureUsageBit::kAllSampled);
-	const Bool uav = !!(usage & TextureUsageBit::kAllStorage);
-
-	ANKI_ASSERT(rtv + dsv + srv + uav == 1 && "Only enable one");
-
-	view.m_usage = usage;
-
-	if(rtv)
+	if(type == ViewType::kRtv)
 	{
+		ANKI_ASSERT(!!(m_usage & TextureUsageBit::kAllFramebuffer));
 		ANKI_ASSERT(TextureView(this, subresource).isGoodForRenderTarget() && m_aspect == DepthStencilAspectBit::kNone);
 
 		D3D12_RENDER_TARGET_VIEW_DESC desc = {};
@@ -222,19 +218,20 @@ void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsa
 		view.m_handle = DescriptorFactory::getSingleton().allocatePersistent(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, false);
 		getDevice().CreateRenderTargetView(m_resource, (isExternal()) ? nullptr : &desc, view.m_handle.getCpuOffset());
 	}
-	else if(dsv)
+	else if(type == ViewType::kDsv || type == ViewType::kReadOnlyDsv)
 	{
+		ANKI_ASSERT(!!(m_usage & TextureUsageBit::kAllFramebuffer));
 		ANKI_ASSERT(TextureView(this, subresource).isGoodForRenderTarget() && m_aspect != DepthStencilAspectBit::kNone);
 
 		D3D12_DEPTH_STENCIL_VIEW_DESC desc = {};
 
-		desc.Format = DXGI_FORMAT(m_format);
+		desc.Format = convertFormat(m_format);
 
-		const Bool readOnlyDsv = !(usage & TextureUsageBit::kFramebufferWrite);
-		if(readOnlyDsv)
+		if(type == ViewType::kReadOnlyDsv)
 		{
-			desc.Flags |= D3D12_DSV_FLAG_READ_ONLY_DEPTH;
-			desc.Flags |= D3D12_DSV_FLAG_READ_ONLY_STENCIL;
+			desc.Flags |= !!(subresource.m_depthStencilAspect & DepthStencilAspectBit::kDepth) ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : D3D12_DSV_FLAG_NONE;
+			desc.Flags |=
+				!!(subresource.m_depthStencilAspect & DepthStencilAspectBit::kStencil) ? D3D12_DSV_FLAG_READ_ONLY_STENCIL : D3D12_DSV_FLAG_NONE;
 		}
 
 		if(m_texType == TextureType::k2D)
@@ -253,8 +250,9 @@ void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsa
 		view.m_handle = DescriptorFactory::getSingleton().allocatePersistent(D3D12_DESCRIPTOR_HEAP_TYPE_DSV, false);
 		getDevice().CreateDepthStencilView(m_resource, &desc, view.m_handle.getCpuOffset());
 	}
-	else if(srv)
+	else if(type == ViewType::kSrv)
 	{
+		ANKI_ASSERT(!!(m_usage & TextureUsageBit::kAllSrv));
 		const TextureView tview(this, subresource);
 
 		ANKI_ASSERT(tview.getSubresource().m_depthStencilAspect != DepthStencilAspectBit::kDepthStencil && "Can only create a single plane SRV");
@@ -263,7 +261,7 @@ void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsa
 
 		if(!m_aspect)
 		{
-			desc.Format = DXGI_FORMAT(m_format);
+			desc.Format = convertFormat(m_format);
 		}
 		else
 		{
@@ -271,10 +269,10 @@ void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsa
 			switch(m_format)
 			{
 			case Format::kD16_Unorm:
-				desc.Format = DXGI_FORMAT(Format::kR16_Unorm);
+				desc.Format = convertFormat(Format::kR16_Unorm);
 				break;
 			case Format::kD32_Sfloat:
-				desc.Format = DXGI_FORMAT(Format::kR32_Sfloat);
+				desc.Format = convertFormat(Format::kR32_Sfloat);
 				break;
 			case Format::kD24_Unorm_S8_Uint:
 				if(tview.getSubresource().m_depthStencilAspect == DepthStencilAspectBit::kDepth)
@@ -390,11 +388,12 @@ void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsa
 		view.m_handle = DescriptorFactory::getSingleton().allocatePersistent(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, false);
 		getDevice().CreateShaderResourceView(m_resource, &desc, view.m_handle.getCpuOffset());
 	}
-	else if(uav)
+	else if(type == ViewType::kUav)
 	{
+		ANKI_ASSERT(!!(m_usage & TextureUsageBit::kAllUav));
 		D3D12_UNORDERED_ACCESS_VIEW_DESC desc = {};
 
-		desc.Format = DXGI_FORMAT(m_format);
+		desc.Format = convertFormat(m_format);
 
 		if(m_texType == TextureType::k1D)
 		{
@@ -452,17 +451,16 @@ void TextureImpl::initView(const TextureSubresourceDesc& subresource, TextureUsa
 	}
 }
 
-const TextureImpl::View& TextureImpl::getOrCreateView(const TextureSubresourceDesc& subresource, TextureUsageBit usage) const
+const TextureImpl::View& TextureImpl::getOrCreateView(const TextureSubresourceDesc& subresource, ViewType type) const
 {
 	ANKI_ASSERT(subresource == TextureView(this, subresource).getSubresource() && "Should have been sanitized");
-	ANKI_ASSERT(!!(usage & m_usage));
 
 	// Check some pre-created
-	if(!!(usage & TextureUsageBit::kAllSampled) && subresource == m_wholeTextureSrvSubresource)
+	if(type == m_wholeTextureSrv.m_type && subresource == m_wholeTextureSrvSubresource)
 	{
 		return m_wholeTextureSrv;
 	}
-	else if(usage == TextureUsageBit::kAllFramebuffer && subresource == m_firstSurfaceRtvOrDsvSubresource)
+	else if(m_firstSurfaceRtvOrDsv.m_type == type && subresource == m_firstSurfaceRtvOrDsvSubresource)
 	{
 		return m_firstSurfaceRtvOrDsv;
 	}
@@ -474,8 +472,8 @@ const TextureImpl::View& TextureImpl::getOrCreateView(const TextureSubresourceDe
 	{
 	public:
 		TextureSubresourceDesc m_subresource;
-		TextureUsageBit m_usage;
-	} toHash = {subresource, usage};
+		ViewType m_type;
+	} toHash = {subresource, type};
 	ANKI_END_PACKED_STRUCT
 
 	const U64 hash = computeHash(&toHash, sizeof(toHash));
@@ -508,7 +506,7 @@ const TextureImpl::View& TextureImpl::getOrCreateView(const TextureSubresourceDe
 
 	// Need to create it
 	View& nview = *m_viewsMap.emplace(hash);
-	initView(subresource, usage, nview);
+	initView(subresource, type, nview);
 
 	return nview;
 }
@@ -575,99 +573,153 @@ void TextureImpl::computeBarrierInfo(TextureUsageBit usage, D3D12_BARRIER_SYNC&
 	const Bool depthStencil = !!m_aspect;
 	const Bool rt = getGrManagerImpl().getDeviceCapabilities().m_rayTracingEnabled;
 
-	if(!!(usage & (TextureUsageBit::kSampledGeometry | TextureUsageBit::kStorageGeometryRead)))
+	if(depthStencil)
 	{
-		stages |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
-	}
+		// DS is a little bit special, it has 3 states
 
-	if(!!(usage & TextureUsageBit::kStorageGeometryWrite))
-	{
-		stages |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
-	}
+		if(!!(usage & TextureUsageBit::kFramebufferWrite))
+		{
+			// Writing to DS, can't be anything else
+			stages |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
+			accesses |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
+		}
+		else if(!!(usage & TextureUsageBit::kFramebufferRead) && !!(usage & TextureUsageBit::kAllSampled))
+		{
+			// Reading in the renderpass and sampling at the same time
 
-	if(!!(usage & (TextureUsageBit::kSampledFragment | TextureUsageBit::kStorageFragmentRead)))
-	{
-		stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
-	}
+			if(!!(usage & (TextureUsageBit::kSampledGeometry | TextureUsageBit::kSampledFragment)))
+			{
+				stages |= D3D12_BARRIER_SYNC_DRAW;
+			}
 
-	if(!!(usage & TextureUsageBit::kStorageFragmentWrite))
-	{
-		stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
-	}
+			if(!!(usage & TextureUsageBit::kSampledCompute))
+			{
+				stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+			}
 
-	if(!!(usage & (TextureUsageBit::kSampledCompute | TextureUsageBit::kStorageComputeRead)))
-	{
-		stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
-	}
+			if(!!(usage & TextureUsageBit::kSampledTraceRays) && rt)
+			{
+				stages |= D3D12_BARRIER_SYNC_RAYTRACING;
+			}
 
-	if(!!(usage & TextureUsageBit::kStorageComputeWrite))
-	{
-		stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
-	}
+			accesses |= D3D12_BARRIER_ACCESS_COMMON; // Include all
+		}
+		else if(!!(usage & TextureUsageBit::kAllSampled))
+		{
+			// Only sampled
 
-	if(!!(usage & (TextureUsageBit::kSampledTraceRays | TextureUsageBit::kStorageTraceRaysRead)) && rt)
-	{
-		stages |= D3D12_BARRIER_SYNC_RAYTRACING;
-		accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
-	}
+			if(!!(usage & TextureUsageBit::kSampledGeometry))
+			{
+				stages |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+			}
 
-	if(!!(usage & TextureUsageBit::kStorageTraceRaysWrite) && rt)
-	{
-		stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
-	}
+			if(!!(usage & TextureUsageBit::kSampledFragment))
+			{
+				stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+			}
 
-	if(!!(usage & TextureUsageBit::kFramebufferRead))
-	{
-		if(depthStencil)
+			if(!!(usage & TextureUsageBit::kSampledCompute))
+			{
+				stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+			}
+
+			if(!!(usage & TextureUsageBit::kSampledTraceRays) && rt)
+			{
+				stages |= D3D12_BARRIER_SYNC_RAYTRACING;
+			}
+
+			accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+		}
+		else
 		{
+			// Only renderpass read
+			ANKI_ASSERT(!!(usage & TextureUsageBit::kFramebufferRead));
 			stages |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
 			accesses |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
 		}
-		else
+	}
+	else
+	{
+		if(!!(usage & (TextureUsageBit::kSampledGeometry | TextureUsageBit::kStorageGeometryRead)))
 		{
-			stages |= D3D12_BARRIER_SYNC_RENDER_TARGET;
-			accesses |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
+			stages |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
 		}
-	}
 
-	if(!!(usage & TextureUsageBit::kFramebufferWrite))
-	{
-		if(depthStencil)
+		if(!!(usage & TextureUsageBit::kStorageGeometryWrite))
 		{
-			stages |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;
-			accesses |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
+			stages |= D3D12_BARRIER_SYNC_VERTEX_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
 		}
-		else
+
+		if(!!(usage & (TextureUsageBit::kSampledFragment | TextureUsageBit::kStorageFragmentRead)))
+		{
+			stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+		}
+
+		if(!!(usage & TextureUsageBit::kStorageFragmentWrite))
+		{
+			stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+		}
+
+		if(!!(usage & (TextureUsageBit::kSampledCompute | TextureUsageBit::kStorageComputeRead)))
+		{
+			stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+		}
+
+		if(!!(usage & TextureUsageBit::kStorageComputeWrite))
+		{
+			stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+		}
+
+		if(!!(usage & (TextureUsageBit::kSampledTraceRays | TextureUsageBit::kStorageTraceRaysRead)) && rt)
+		{
+			stages |= D3D12_BARRIER_SYNC_RAYTRACING;
+			accesses |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
+		}
+
+		if(!!(usage & TextureUsageBit::kStorageTraceRaysWrite) && rt)
+		{
+			stages |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
+		}
+
+		if(!!(usage & TextureUsageBit::kFramebufferWrite))
 		{
 			stages |= D3D12_BARRIER_SYNC_RENDER_TARGET;
 			accesses |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
 		}
-	}
+		else if(!!(usage & TextureUsageBit::kFramebufferRead))
+		{
+			// Read only
+			stages |= D3D12_BARRIER_SYNC_RENDER_TARGET;
+			accesses |= D3D12_BARRIER_ACCESS_RENDER_TARGET;
+		}
 
-	if(!!(usage & TextureUsageBit::kFramebufferShadingRate))
-	{
-		stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
-		accesses |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
-	}
+		if(!!(usage & TextureUsageBit::kFramebufferShadingRate))
+		{
+			stages |= D3D12_BARRIER_SYNC_PIXEL_SHADING;
+			accesses |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;
+		}
 
-	if(!!(usage & TextureUsageBit::kTransferDestination))
-	{
-		stages |= D3D12_BARRIER_SYNC_COPY;
-		accesses |= D3D12_BARRIER_ACCESS_COPY_DEST;
-	}
+		if(!!(usage & TextureUsageBit::kTransferDestination))
+		{
+			stages |= D3D12_BARRIER_SYNC_COPY;
+			accesses |= D3D12_BARRIER_ACCESS_COPY_DEST;
+		}
 
-	if(!!(usage & TextureUsageBit::kPresent))
-	{
-		stages |= D3D12_BARRIER_SYNC_COPY;
-		accesses |= D3D12_BARRIER_ACCESS_COPY_SOURCE;
+		if(!!(usage & TextureUsageBit::kPresent))
+		{
+			stages |= D3D12_BARRIER_SYNC_COPY;
+			accesses |= D3D12_BARRIER_ACCESS_COPY_SOURCE;
+		}
 	}
+
+	ANKI_ASSERT(!!stages);
 }
 
 D3D12_BARRIER_LAYOUT TextureImpl::computeLayout(TextureUsageBit usage) const
@@ -694,7 +746,7 @@ D3D12_BARRIER_LAYOUT TextureImpl::computeLayout(TextureUsageBit usage) const
 		else if((usage & (TextureUsageBit::kAllSampled | TextureUsageBit::kFramebufferRead)) == usage)
 		{
 			// Only depth tests and sampled
-			out = D3D12_BARRIER_LAYOUT_COMMON;
+			out = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ;
 		}
 		else
 		{

+ 20 - 10
AnKi/Gr/D3D/D3DTexture.h

@@ -39,28 +39,28 @@ public:
 
 	DescriptorHeapHandle getOrCreateRtv(const TextureSubresourceDesc& subresource) const
 	{
-		const View& e = getOrCreateView(subresource, TextureUsageBit::kAllFramebuffer);
+		const View& e = getOrCreateView(subresource, ViewType::kRtv);
 		ANKI_ASSERT(e.m_handle.isCreated());
 		return e.m_handle;
 	}
 
-	DescriptorHeapHandle getOrCreateDsv(const TextureSubresourceDesc& subresource, TextureUsageBit usage) const
+	DescriptorHeapHandle getOrCreateDsv(const TextureSubresourceDesc& subresource, Bool readOnly) const
 	{
-		const View& e = getOrCreateView(subresource, usage);
+		const View& e = getOrCreateView(subresource, (readOnly) ? ViewType::kReadOnlyDsv : ViewType::kDsv);
 		ANKI_ASSERT(e.m_handle.isCreated());
 		return e.m_handle;
 	}
 
 	DescriptorHeapHandle getOrCreateSrv(const TextureSubresourceDesc& subresource) const
 	{
-		const View& e = getOrCreateView(subresource, TextureUsageBit::kAllSampled);
+		const View& e = getOrCreateView(subresource, ViewType::kSrv);
 		ANKI_ASSERT(e.m_handle.isCreated());
 		return e.m_handle;
 	}
 
 	DescriptorHeapHandle getOrCreateUav(const TextureSubresourceDesc& subresource) const
 	{
-		const View& e = getOrCreateView(subresource, TextureUsageBit::kAllStorage);
+		const View& e = getOrCreateView(subresource, ViewType::kUav);
 		ANKI_ASSERT(e.m_handle.isCreated());
 		return e.m_handle;
 	}
@@ -86,10 +86,20 @@ public:
 
 	DXGI_FORMAT getDxgiFormat() const
 	{
-		return DXGI_FORMAT(m_format);
+		return convertFormat(m_format);
 	}
 
 private:
+	enum class ViewType : U8
+	{
+		kNone,
+		kRtv,
+		kDsv,
+		kReadOnlyDsv,
+		kSrv,
+		kUav
+	};
+
 	class View
 	{
 	public:
@@ -99,7 +109,7 @@ private:
 		mutable U32 m_bindlessIndex = kMaxU32;
 		mutable SpinLock m_bindlessLock;
 
-		TextureUsageBit m_usage = TextureUsageBit::kNone;
+		ViewType m_type = ViewType::kNone;
 
 		View() = default;
 
@@ -113,7 +123,7 @@ private:
 			m_handle = b.m_handle;
 			m_bindlessHandle = b.m_bindlessHandle;
 			m_bindlessIndex = b.m_bindlessIndex;
-			m_usage = b.m_usage;
+			m_type = b.m_type;
 			return *this;
 		}
 	};
@@ -131,9 +141,9 @@ private:
 
 	Error initInternal(ID3D12Resource* external, const TextureInitInfo& init);
 
-	const View& getOrCreateView(const TextureSubresourceDesc& subresource, TextureUsageBit usage) const;
+	const View& getOrCreateView(const TextureSubresourceDesc& subresource, ViewType type) const;
 
-	void initView(const TextureSubresourceDesc& subresource, TextureUsageBit usage, View& view) const;
+	void initView(const TextureSubresourceDesc& subresource, ViewType type, View& view) const;
 
 	void computeBarrierInfo(TextureUsageBit usage, D3D12_BARRIER_SYNC& stages, D3D12_BARRIER_ACCESS& accesses) const;
 

+ 10 - 1
AnKi/Gr/RenderGraph.cpp

@@ -1115,7 +1115,9 @@ void RenderGraph::recordAndSubmitCommandBuffers(FencePtr* optionalFence)
 			[this, start, end, pool, &cmdbs, &cmdbsMtx, group, batchGroupCount]([[maybe_unused]] U32 tid) {
 				ANKI_TRACE_SCOPED_EVENT(GrRenderGraphTask);
 
-				CommandBufferInitInfo cmdbInit("RenderGraph cmdb");
+				Array<Char, 32> name;
+				snprintf(name.getBegin(), name.getSize(), "RenderGraph cmdb %u-%u", start, end);
+				CommandBufferInitInfo cmdbInit(name.getBegin());
 				cmdbInit.m_flags = CommandBufferFlag::kGeneralWork;
 				CommandBufferPtr cmdb = GrManager::getSingleton().newCommandBuffer(cmdbInit);
 
@@ -1169,6 +1171,7 @@ void RenderGraph::recordAndSubmitCommandBuffers(FencePtr* optionalFence)
 						inf.m_nextUsage = barrier.m_usageAfter;
 						inf.m_textureView = TextureView(&tex, barrier.m_subresource);
 					}
+
 					DynamicArray<BufferBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> buffBarriers(pool);
 					buffBarriers.resizeStorage(batch.m_bufferBarriersBefore.getSize());
 					for(const BufferBarrier& barrier : batch.m_bufferBarriersBefore)
@@ -1179,6 +1182,12 @@ void RenderGraph::recordAndSubmitCommandBuffers(FencePtr* optionalFence)
 						inf.m_bufferView = BufferView(m_ctx->m_buffers[barrier.m_idx].m_buffer.get(), m_ctx->m_buffers[barrier.m_idx].m_offset,
 													  m_ctx->m_buffers[barrier.m_idx].m_range);
 					}
+
+					// Sort them for the command buffer to merge as many as possible
+					std::sort(buffBarriers.getBegin(), buffBarriers.getEnd(), [](const BufferBarrierInfo& a, const BufferBarrierInfo& b) {
+						return a.m_bufferView.getBuffer().getUuid() < b.m_bufferView.getBuffer().getUuid();
+					});
+
 					DynamicArray<AccelerationStructureBarrierInfo, MemoryPoolPtrWrapper<StackMemoryPool>> asBarriers(pool);
 					for(const ASBarrier& barrier : batch.m_asBarriersBefore)
 					{

+ 2 - 2
AnKi/Renderer/Bloom.cpp

@@ -40,7 +40,7 @@ Error Bloom::initExposure()
 	const U32 height = getRenderer().getDownscaleBlur().getPassHeight(kMaxU32) * 2;
 
 	// Create RT info
-	m_exposure.m_rtDescr = getRenderer().create2DRenderTargetDescription(width, height, kRtPixelFormat, "Bloom Exp");
+	m_exposure.m_rtDescr = getRenderer().create2DRenderTargetDescription(width, height, getRenderer().getHdrFormat(), "Bloom Exp");
 	m_exposure.m_rtDescr.bake();
 
 	// init shaders
@@ -55,7 +55,7 @@ Error Bloom::initUpscale()
 	const U32 height = getRenderer().getPostProcessResolution().y() / kBloomFraction;
 
 	// Create RT descr
-	m_upscale.m_rtDescr = getRenderer().create2DRenderTargetDescription(width, height, kRtPixelFormat, "Bloom Upscale");
+	m_upscale.m_rtDescr = getRenderer().create2DRenderTargetDescription(width, height, getRenderer().getHdrFormat(), "Bloom Upscale");
 	m_upscale.m_rtDescr.bake();
 
 	// init shaders

+ 0 - 2
AnKi/Renderer/Bloom.h

@@ -44,8 +44,6 @@ public:
 	}
 
 private:
-	static constexpr Format kRtPixelFormat = Format::kA2B10G10R10_Unorm_Pack32;
-
 	class
 	{
 	public:

+ 14 - 2
AnKi/Renderer/ClusterBinning.cpp

@@ -120,27 +120,33 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 
 				PtrSize objBufferOffset = 0;
 				PtrSize objBufferRange = 0;
+				U32 elementSize = 0;
 				switch(type)
 				{
 				case GpuSceneNonRenderableObjectType::kLight:
 					objBufferOffset = GpuSceneArrays::Light::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::Light::getSingleton().getBufferRange();
+					elementSize = GpuSceneArrays::Light::getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kDecal:
 					objBufferOffset = GpuSceneArrays::Decal::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::Decal::getSingleton().getBufferRange();
+					elementSize = GpuSceneArrays::Decal::getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kFogDensityVolume:
 					objBufferOffset = GpuSceneArrays::FogDensityVolume::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferRange();
+					elementSize = GpuSceneArrays::FogDensityVolume::getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
 					objBufferOffset = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferRange();
+					elementSize = GpuSceneArrays::GlobalIlluminationProbe::getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kReflectionProbe:
 					objBufferOffset = GpuSceneArrays::ReflectionProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferRange();
+					elementSize = GpuSceneArrays::ReflectionProbe::getElementSize();
 					break;
 				default:
 					ANKI_ASSERT(0);
@@ -149,7 +155,7 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 				if(objBufferRange == 0)
 				{
 					objBufferOffset = 0;
-					objBufferRange = GpuSceneBuffer::getSingleton().getBufferView().getRange();
+					objBufferRange = getAlignedRoundDown(elementSize, GpuSceneBuffer::getSingleton().getBufferView().getRange());
 				}
 
 				cmdb.bindStorageBuffer(ANKI_REG(t1), BufferView(&GpuSceneBuffer::getSingleton().getBuffer(), objBufferOffset, objBufferRange));
@@ -226,27 +232,33 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 
 				PtrSize objBufferOffset = 0;
 				PtrSize objBufferRange = 0;
+				U32 objSize = 0;
 				switch(type)
 				{
 				case GpuSceneNonRenderableObjectType::kLight:
 					objBufferOffset = GpuSceneArrays::Light::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::Light::getSingleton().getBufferRange();
+					objSize = GpuSceneArrays::Light::getSingleton().getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kDecal:
 					objBufferOffset = GpuSceneArrays::Decal::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::Decal::getSingleton().getBufferRange();
+					objSize = GpuSceneArrays::Decal::getSingleton().getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kFogDensityVolume:
 					objBufferOffset = GpuSceneArrays::FogDensityVolume::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::FogDensityVolume::getSingleton().getBufferRange();
+					objSize = GpuSceneArrays::FogDensityVolume::getSingleton().getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kGlobalIlluminationProbe:
 					objBufferOffset = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getBufferRange();
+					objSize = GpuSceneArrays::GlobalIlluminationProbe::getSingleton().getElementSize();
 					break;
 				case GpuSceneNonRenderableObjectType::kReflectionProbe:
 					objBufferOffset = GpuSceneArrays::ReflectionProbe::getSingleton().getGpuSceneOffsetOfArrayBase();
 					objBufferRange = GpuSceneArrays::ReflectionProbe::getSingleton().getBufferRange();
+					objSize = GpuSceneArrays::ReflectionProbe::getSingleton().getElementSize();
 					break;
 				default:
 					ANKI_ASSERT(0);
@@ -255,7 +267,7 @@ void ClusterBinning::populateRenderGraph(RenderingContext& ctx)
 				if(objBufferRange == 0)
 				{
 					objBufferOffset = 0;
-					objBufferRange = GpuSceneBuffer::getSingleton().getBufferView().getRange();
+					objBufferRange = getAlignedRoundDown(objSize, GpuSceneBuffer::getSingleton().getBufferView().getRange());
 				}
 
 				cmdb.bindStorageBuffer(ANKI_REG(t0), BufferView(&GpuSceneBuffer::getSingleton().getBuffer(), objBufferOffset, objBufferRange));

+ 4 - 4
AnKi/Renderer/Sky.cpp

@@ -32,7 +32,7 @@ Error Sky::initInternal()
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/Sky.ankiprogbin", {}, m_prog, m_computeSunColorGrProg, "ComputeSunColor"));
 
 	const TextureUsageBit usage = TextureUsageBit::kAllCompute;
-	const TextureUsageBit initialUsage = TextureUsageBit::kAllCompute;
+	const TextureUsageBit initialUsage = TextureUsageBit::kSampledCompute;
 	const Format formatB =
 		(GrManager::getSingleton().getDeviceCapabilities().m_unalignedBbpTextureFormats) ? Format::kR16G16B16_Unorm : Format::kR16G16B16A16_Unorm;
 
@@ -78,8 +78,8 @@ void Sky::populateRenderGraph(RenderingContext& ctx)
 	RenderTargetHandle multipleScatteringLutRt;
 	if(renderTransAndMultiScatLuts)
 	{
-		transmittanceLutRt = rgraph.importRenderTarget(m_transmittanceLut.get(), TextureUsageBit::kAllCompute);
-		multipleScatteringLutRt = rgraph.importRenderTarget(m_multipleScatteringLut.get(), TextureUsageBit::kAllCompute);
+		transmittanceLutRt = rgraph.importRenderTarget(m_transmittanceLut.get(), TextureUsageBit::kSampledCompute);
+		multipleScatteringLutRt = rgraph.importRenderTarget(m_multipleScatteringLut.get(), TextureUsageBit::kSampledCompute);
 		m_transmittanceAndMultiScatterLutsGenerated = true;
 	}
 	else
@@ -94,7 +94,7 @@ void Sky::populateRenderGraph(RenderingContext& ctx)
 	}
 	else
 	{
-		m_runCtx.m_skyLutRt = rgraph.importRenderTarget(m_skyLut.get(), TextureUsageBit::kAllCompute);
+		m_runCtx.m_skyLutRt = rgraph.importRenderTarget(m_skyLut.get(), TextureUsageBit::kSampledCompute);
 		m_skyLutImportedOnce = true;
 	}
 

+ 1 - 1
AnKi/Scene/Components/GlobalIlluminationProbeComponent.cpp

@@ -85,7 +85,7 @@ Error GlobalIlluminationProbeComponent::update(SceneComponentUpdateInfo& info, B
 		cmdb->dispatchCompute(wgSize.x(), wgSize.y(), wgSize.z());
 
 		texBarrier.m_previousUsage = TextureUsageBit::kStorageComputeWrite;
-		texBarrier.m_nextUsage = m_volTex->getTextureUsage();
+		texBarrier.m_nextUsage = TextureUsageBit::kAllSampled; // Put something random, the renderer will start from kNone
 		cmdb->setPipelineBarrier({&texBarrier, 1}, {}, {});
 
 		cmdb->endRecording();

+ 3 - 1
AnKi/ShaderCompiler/ShaderCompiler.cpp

@@ -519,12 +519,14 @@ Error doReflectionDxil(ConstWeakArray<U8> dxil, ShaderType type, ShaderReflectio
 			// ByteAddressBuffer
 			akBinding.m_type = DescriptorType::kStorageBuffer;
 			akBinding.m_flags = DescriptorFlag::kRead | DescriptorFlag::kByteAddressBuffer;
+			akBinding.m_d3dStructuredBufferStride = sizeof(U32);
 		}
 		else if(bindDesc.Type == D3D_SIT_UAV_RWBYTEADDRESS)
 		{
 			// RWByteAddressBuffer
 			akBinding.m_type = DescriptorType::kStorageBuffer;
 			akBinding.m_flags = DescriptorFlag::kReadWrite | DescriptorFlag::kByteAddressBuffer;
+			akBinding.m_d3dStructuredBufferStride = sizeof(U32);
 		}
 		else if(bindDesc.Type == D3D_SIT_RTACCELERATIONSTRUCTURE)
 		{
@@ -615,7 +617,7 @@ Error doReflectionDxil(ConstWeakArray<U8> dxil, ShaderType type, ShaderReflectio
 #	undef ANKI_ATTRIB_NAME
 
 			refl.m_vertex.m_vertexAttributeMask.set(a);
-			refl.m_vertex.m_vkVertexAttributeLocations[a] = U8(i);
+			refl.m_vertex.m_vkVertexAttributeLocations[a] = U8(i); // Just set something
 		}
 	}
 

+ 5 - 2
AnKi/Shaders/Blit.ankiprog

@@ -3,6 +3,8 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#include <AnKi/Shaders/QuadVert.hlsl>
+
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/Functions.hlsl>
 
@@ -29,7 +31,7 @@ ANKI_PUSH_CONSTANTS(Uniforms, g_unis)
 #	if USE_COMPUTE
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if USE_COMPUTE
@@ -39,6 +41,8 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	}
 
 	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / g_unis.m_viewportSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const RVec3 color = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb;
@@ -52,7 +56,6 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 4 - 2
AnKi/Shaders/Bloom.ankiprog

@@ -3,6 +3,7 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#include <AnKi/Shaders/QuadVert.hlsl>
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/TonemappingFunctions.hlsl>
 #	include <AnKi/Shaders/Functions.hlsl>
@@ -31,7 +32,7 @@ RWTexture2D<RVec4> g_storageTex : register(u1);
 #	if ANKI_COMPUTE_SHADER
 [numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -39,6 +40,8 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	g_storageTex.GetDimensions(imgSize.x, imgSize.y);
 
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / imgSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const RF32 weight = 1.0 / 5.0;
@@ -59,7 +62,6 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 4 - 2
AnKi/Shaders/BloomUpscale.ankiprog

@@ -3,6 +3,7 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#include <AnKi/Shaders/QuadVert.hlsl>
 #if ANKI_FRAGMENT_SHADER || ANKI_COMPUTE_SHADER
 #	include <AnKi/Shaders/Functions.hlsl>
 
@@ -92,7 +93,7 @@ RVec3 upscale(Vec2 uv)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(THREADGROUP_SIZE_XY, THREADGROUP_SIZE_XY, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -100,6 +101,8 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	g_storageTex.GetDimensions(storageTexSize.x, storageTexSize.y);
 
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / storageTexSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const RVec3 outColor = ssLensFlare(uv) + upscale(uv);
@@ -113,7 +116,6 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_FRAGMENT_SHADER || ANKI_COMPUTE_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 7 - 5
AnKi/Shaders/DepthAwareBlur.ankiprog

@@ -3,13 +3,14 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#include <AnKi/Shaders/Common.hlsl>
-#include <AnKi/Shaders/BilateralFilter.hlsl>
-
 #pragma anki mutator ORIENTATION 0 1 2 // 0: VERTICAL, 1: HORIZONTAL, 2: BOX
 #pragma anki mutator SAMPLE_COUNT 3 5 7 9 11 13 15
 #pragma anki mutator COLOR_COMPONENTS 4 3 1
 
+#include <AnKi/Shaders/QuadVert.hlsl>
+#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/BilateralFilter.hlsl>
+
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	define ORIENTATION_VERTICAL 0
 #	define ORIENTATION_HORIZONTAL 1
@@ -62,7 +63,7 @@ void sampleTex(Vec2 uv, F32 refDepth, inout ColorType col, inout F32 weight)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(THREADGROUP_SQRT_SIZE, THREADGROUP_SQRT_SIZE, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-ColorType main(Vec2 uv : TEXCOORD) : SV_TARGET0
+ColorType main(VertOut input) : SV_TARGET0
 #	endif
 {
 	UVec2 textureSize;
@@ -72,6 +73,8 @@ ColorType main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	// Set UVs
 #	if ANKI_COMPUTE_SHADER
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / Vec2(textureSize);
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const Vec2 texelSize = 1.0 / Vec2(textureSize);
@@ -128,7 +131,6 @@ ColorType main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 3 - 3
AnKi/Shaders/DepthDownscale.ankiprog

@@ -99,13 +99,13 @@ AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3)
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
-#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/QuadVert.hlsl>
 
 Texture2D<Vec4> g_inputTex : register(t0);
 SamplerState g_linearAnyClampSampler : register(s0);
 
-F32 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+F32 main(VertOut input) : SV_TARGET0
 {
-	return g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).x;
+	return g_inputTex.SampleLevel(g_linearAnyClampSampler, input.m_uv, 0.0).x;
 }
 #pragma anki technique_end frag

+ 4 - 2
AnKi/Shaders/DownscaleBlur.ankiprog

@@ -3,6 +3,7 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+#include <AnKi/Shaders/QuadVert.hlsl>
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/TonemappingFunctions.hlsl>
 #	include <AnKi/Shaders/Functions.hlsl>
@@ -27,11 +28,13 @@ RWTexture2D<RVec4> g_storageTex : register(u1);
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_unis.m_fbSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	RVec3 output;
@@ -51,7 +54,6 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 4 - 2
AnKi/Shaders/FinalComposite.ankiprog

@@ -7,8 +7,9 @@
 #pragma anki mutator BLOOM_ENABLED 0 1
 #pragma anki mutator DBG_ENABLED 0 1
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -49,8 +50,9 @@ RVec3 colorGrading(RVec3 color)
 	return g_lut.SampleLevel(g_trilinearRepeatSampler, lutCoords, 0.0).rgb;
 }
 
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 {
+	const Vec2 uv = input.m_uv;
 	RVec3 outColor;
 
 	if(g_unis.m_motionBlurSamples > 0u)

+ 4 - 3
AnKi/Shaders/Fsr.ankiprog

@@ -7,6 +7,8 @@
 #pragma anki mutator SHARPEN 0 1
 #pragma anki mutator FSR_QUALITY 0 1
 
+#include <AnKi/Shaders/QuadVert.hlsl>
+
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/Functions.hlsl>
 
@@ -79,7 +81,7 @@ AH3 FsrEasuSampleH(AF2 p)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-Vec3 main(Vec4 svPosition : SV_POSITION) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -90,7 +92,7 @@ Vec3 main(Vec4 svPosition : SV_POSITION) : SV_TARGET0
 
 	const UVec2 uv = svDispatchThreadId.xy;
 #	else
-	const UVec2 uv = UVec2(svPosition.xy);
+	const UVec2 uv = UVec2(input.m_svPosition.xy);
 #	endif
 
 	HVec3 color;
@@ -111,7 +113,6 @@ Vec3 main(Vec4 svPosition : SV_POSITION) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 6 - 3
AnKi/Shaders/GBufferPost.ankiprog

@@ -3,8 +3,9 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -27,8 +28,10 @@ struct FragOut
 	RVec4 m_roughnessAndOther : SV_TARGET1;
 };
 
-FragOut main(Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION)
+FragOut main(VertOut input)
 {
+	const Vec2 uv = input.m_uv;
+
 	// This code blends the diffuse and the specular+rougness of the decals with GBuffer render targets.
 	// Normaly the blending is being done ('D' is the decal diffuse and 'f' is decal blend factor):
 	// d=gbuffer.diff
@@ -46,7 +49,7 @@ FragOut main(Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION)
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 	// Get the cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalUniforms, Vec3(svPosition.xy, depth));
+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalUniforms, Vec3(input.m_svPosition.xy, depth));
 
 	// Make the decalsMask uniform across the wave because we are accessing bindless textures later on
 	U32 decalsMask = cluster.m_decalsMask[0];

+ 5 - 3
AnKi/Shaders/LightShading.ankiprog

@@ -3,8 +3,9 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -41,8 +42,9 @@ Texture2D<RVec4> g_integrationLut : register(t12);
 	const RF32 att = computeAttenuationFactor(light.m_radius, frag2Light); \
 	RF32 lambert = max(0.0, dot(gbuffer.m_normal, l));
 
-RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 {
+	const Vec2 uv = input.m_uv;
 	const F32 depth = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
 	const Vec2 ndc = uvToNdc(uv);
 
@@ -58,7 +60,7 @@ RVec3 main(Vec4 svPosition : SV_POSITION, Vec2 uv : TEXCOORD) : SV_TARGET0
 	const RVec3 viewDir = normalize(g_globalUniforms.m_cameraPosition - worldPos);
 
 	// Get the cluster
-	Cluster cluster = getClusterFragCoord(g_clusters, g_globalUniforms, Vec3(svPosition.xy, depth));
+	Cluster cluster = getClusterFragCoord(g_clusters, g_globalUniforms, Vec3(input.m_svPosition.xy, depth));
 
 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
 

+ 4 - 3
AnKi/Shaders/LightShadingApplyFog.ankiprog

@@ -3,12 +3,12 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
-
 #include <AnKi/Shaders/Functions.hlsl>
 
 SamplerState g_nearestAnyClampSampler : register(s0);
@@ -25,8 +25,9 @@ struct Uniforms
 };
 ANKI_PUSH_CONSTANTS(Uniforms, g_unis)
 
-RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec4 main(VertOut input) : SV_TARGET0
 {
+	const Vec2 uv = input.m_uv;
 	Vec3 uvw;
 
 	// Compute W coordinate

+ 5 - 20
AnKi/Shaders/LightShadingSkybox.ankiprog

@@ -5,26 +5,10 @@
 
 #pragma anki mutator METHOD 0 1 2 // 0: solid colod, 1: 2D image, 2: generated
 
-#pragma anki technique_start vert
-
-#include <AnKi/Shaders/Common.hlsl>
-
-struct VertOut
-{
-	Vec4 m_position : SV_POSITION;
-	Vec2 m_uv : TEXCOORD;
-};
-
-VertOut main(U32 vertId : SV_VERTEXID)
-{
-	VertOut output;
-	output.m_uv = Vec2(vertId & 1, vertId >> 1) * 2.0;
-
-	output.m_position = Vec4(output.m_uv * 2.0 - 1.0, 1.0, 1.0);
-
-	return output;
-}
+#define CUSTOM_DEPTH 1.0
+#include <AnKi/Shaders/QuadVert.hlsl>
 
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -67,8 +51,9 @@ Texture2D<Vec4> g_skyLut : register(t0);
 ConstantBuffer<GlobalRendererUniforms> g_unis : register(b0);
 #endif
 
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 {
+	const Vec2 uv = input.m_uv;
 #if METHOD == 0
 	ANKI_MAYBE_UNUSED(uv);
 	return g_unis.m_solidColor;

+ 4 - 5
AnKi/Shaders/MipmapGenerator.ankiprog

@@ -3,18 +3,17 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
-#include <AnKi/Shaders/Common.hlsl>
-
 Texture2D<Vec4> g_inputTex : register(t0);
 SamplerState g_linearAnyClampSampler : register(s0);
 
-Vec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 {
-	return g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f);
+	return g_inputTex.SampleLevel(g_linearAnyClampSampler, input.m_uv, 0.0f);
 }
 #pragma anki technique_end frag

+ 4 - 3
AnKi/Shaders/MotionVectors.ankiprog

@@ -5,7 +5,7 @@
 
 // Calculates the motion vectors that will be used to sample from the previous frame
 
-#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/QuadVert.hlsl>
 
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/Functions.hlsl>
@@ -37,13 +37,15 @@ struct FragOut
 	Vec2 m_motionVectors : SV_TARGET0;
 };
 
-FragOut main(Vec2 uv : TEXCOORD)
+FragOut main(VertOut input)
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
 	Vec2 texSize;
 	g_motionVectorsStorageTex.GetDimensions(texSize.x, texSize.y);
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / texSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const Vec2 velocity = g_velocityTex.SampleLevel(g_nearesetAnyClampSampler, uv, 0.0).rg;
@@ -83,7 +85,6 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 8 - 2
AnKi/Shaders/QuadVert.hlsl

@@ -5,18 +5,24 @@
 
 #include <AnKi/Shaders/Common.hlsl>
 
+#if !defined(CUSTOM_DEPTH)
+#	define CUSTOM_DEPTH 0.0
+#endif
+
 struct VertOut
 {
-	Vec4 m_position : SV_POSITION;
+	Vec4 m_svPosition : SV_POSITION;
 	Vec2 m_uv : TEXCOORD;
 };
 
+#if ANKI_VERTEX_SHADER
 VertOut main(U32 vertId : SV_VERTEXID)
 {
 	VertOut output;
 	output.m_uv = Vec2(vertId & 1, vertId >> 1) * 2.0;
 
-	output.m_position = Vec4(output.m_uv * 2.0 - 1.0, 0.0, 1.0);
+	output.m_svPosition = Vec4(output.m_uv * 2.0 - 1.0, CUSTOM_DEPTH, 1.0);
 
 	return output;
 }
+#endif

+ 4 - 2
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -6,6 +6,7 @@
 #pragma anki mutator PCF 0 1
 #pragma anki mutator DIRECTIONAL_LIGHT_SHADOW_RESOLVED 0 1
 
+#include <AnKi/Shaders/QuadVert.hlsl>
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/ClusteredShadingFunctions.hlsl>
 
@@ -63,12 +64,14 @@ Vec3 computeDebugShadowCascadeColor(U32 cascade)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec4 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
 	svDispatchThreadId = min(svDispatchThreadId, UVec2(g_unis.m_framebufferSize - 1.0f)); // Just to be sure
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_unis.m_framebufferSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 #	if PCF
@@ -217,7 +220,6 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 10 - 8
AnKi/Shaders/Ssao.ankiprog

@@ -7,7 +7,7 @@
 
 #pragma anki mutator SPATIAL_DENOISE_QUALITY 0 1
 
-#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/QuadVert.hlsl>
 
 // ===========================================================================
 // SSAO                                                                      =
@@ -51,14 +51,15 @@ RF32 computeFalloff(RF32 len)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec4 main(Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
+RVec4 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_unis.m_viewportSizef;
 #	else
-	const UVec2 svDispatchThreadId = svPosition;
+	const UVec2 svDispatchThreadId = input.m_svPosition;
 	ANKI_MAYBE_UNUSED(svDispatchThreadId);
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const Vec2 ndc = uvToNdc(uv);
@@ -201,7 +202,7 @@ void sampleTex(Vec2 uv, IVec2 offset, F32 refDepth, inout RF32 ssao, inout RVec3
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec4 main(VertOut input) : SV_TARGET0
 #	endif
 {
 // Set UVs
@@ -210,6 +211,8 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	U32 mipCount;
 	g_bentNormalsAndSsaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	// Sample ref
@@ -276,7 +279,7 @@ RWTexture2D<RVec4> g_bentNormalsAndSsaoStorageTex : register(u0);
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec4 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -284,6 +287,8 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	U32 mipCount;
 	g_bentNormalsAndSsaoTex.GetDimensions(0, textureSize.x, textureSize.y, mipCount);
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / textureSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const Vec2 historyUv = uv + g_motionVectorsTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0f).xy;
@@ -343,7 +348,6 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 // Techniques                                                                =
 // ===========================================================================
 #pragma anki technique_start vert Ssao uses_mutators
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert Ssao
 
 #pragma anki technique_start frag Ssao uses_mutators
@@ -353,7 +357,6 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #pragma anki technique_end comp Ssao
 
 #pragma anki technique_start vert SsaoSpatialDenoise
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert SsaoSpatialDenoise
 
 #pragma anki technique_start frag SsaoSpatialDenoise
@@ -363,7 +366,6 @@ RVec4 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #pragma anki technique_end comp SsaoSpatialDenoise
 
 #pragma anki technique_start vert SsaoTemporalDenoise
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert SsaoTemporalDenoise
 
 #pragma anki technique_start frag SsaoTemporalDenoise

+ 4 - 8
AnKi/Shaders/Ssr.ankiprog

@@ -5,15 +5,11 @@
 
 // Screen space reflections
 
-#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/QuadVert.hlsl>
 
 // ===========================================================================
 // SSR                                                                       =
 // ===========================================================================
-#if defined(__INTELLISENSE__)
-#	define ANKI_TECHNIQUE_Ssr 1
-#endif
-
 #if ANKI_TECHNIQUE_Ssr && (ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER)
 #	include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #	include <AnKi/Shaders/Functions.hlsl>
@@ -39,13 +35,14 @@ RWTexture2D<RVec4> g_storageTex : register(u0);
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec4 main(Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
+RVec4 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5f) / g_unis.m_viewportSizef;
 #	else
-	const UVec2 svDispatchThreadId = UVec2(svPosition.xy);
+	const UVec2 svDispatchThreadId = UVec2(input.m_svPosition.xy);
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	// Read part of the G-buffer
@@ -160,7 +157,6 @@ RVec4 main(Vec2 uv : TEXCOORD, Vec4 svPosition : SV_POSITION) : SV_TARGET0
 // Techniques                                                                =
 // ===========================================================================
 #pragma anki technique_start vert Ssr
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert Ssr
 
 #pragma anki technique_start frag Ssr

+ 4 - 2
AnKi/Shaders/TemporalAA.ankiprog

@@ -6,6 +6,7 @@
 #pragma anki mutator VARIANCE_CLIPPING 0 1
 #pragma anki mutator YCBCR 0 1
 
+#include <AnKi/Shaders/QuadVert.hlsl>
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/Functions.hlsl>
 #	include <AnKi/Shaders/PackFunctions.hlsl>
@@ -44,7 +45,7 @@ struct FragOut
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-FragOut main(Vec2 uv : TEXCOORD)
+FragOut main(VertOut input)
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -52,6 +53,8 @@ FragOut main(Vec2 uv : TEXCOORD)
 	g_storageTex.GetDimensions(outStorageTexSize.x, outStorageTexSize.y);
 
 	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / outStorageTexSize;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	// Get prev uv coords
@@ -118,7 +121,6 @@ FragOut main(Vec2 uv : TEXCOORD)
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 4 - 2
AnKi/Shaders/Tonemap.ankiprog

@@ -5,6 +5,7 @@
 
 // Does tonemapping
 
+#include <AnKi/Shaders/QuadVert.hlsl>
 #if ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 #	include <AnKi/Shaders/Functions.hlsl>
 #	include <AnKi/Shaders/TonemappingFunctions.hlsl>
@@ -31,7 +32,7 @@ ANKI_PUSH_CONSTANTS(Uniforms, g_unis)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(THREADGROUP_SIZE_SQRT, THREADGROUP_SIZE_SQRT, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -41,6 +42,8 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 	}
 
 	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5f) * g_unis.m_viewportSizeOverOne;
+#	else
+	const Vec2 uv = input.m_uv;
 #	endif
 
 	const RVec3 hdr = g_inputRt.SampleLevel(g_nearestAnyClampSampler, uv, 0.0f).rgb;
@@ -55,7 +58,6 @@ RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
 #endif // ANKI_COMPUTE_SHADER || ANKI_FRAGMENT_SHADER
 
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag

+ 4 - 2
AnKi/Shaders/TraditionalDeferredShading.ankiprog

@@ -7,9 +7,10 @@
 
 #pragma anki mutator SPECULAR 0 1
 
+#include <AnKi/Shaders/QuadVert.hlsl>
+
 // VERT
 #pragma anki technique_start vert
-#include <AnKi/Shaders/QuadVert.hlsl>
 #pragma anki technique_end vert
 
 // FRAG
@@ -36,8 +37,9 @@ Texture2D<RVec4> g_shadowMap : register(t6);
 
 ConstantBuffer<GlobalRendererUniforms> g_globalRendererConsts : register(b1);
 
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 {
+	const Vec2 uv = input.m_uv;
 	const F32 depth = g_depthTex.SampleLevel(g_gbufferSampler, uv, 0.0).r;
 	if(depth == 1.0f)
 	{

+ 4 - 2
AnKi/Shaders/TraditionalDeferredShadingSkybox.ankiprog

@@ -5,8 +5,9 @@
 
 #pragma anki mutator METHOD 0 1 2 // 0: solid colod, 1: 2D image, 2: Generated
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -32,8 +33,9 @@ Texture2D<Vec4> g_skyLut : register(t1);
 
 ConstantBuffer<GlobalRendererUniforms> g_globalRendererConsts : register(b0);
 
-RVec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+RVec3 main(VertOut input) : SV_TARGET0
 {
+	const Vec2 uv = input.m_uv;
 	const F32 depth = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).r;
 	if(depth != 1.0f)
 	{

+ 4 - 3
AnKi/Shaders/VisualizeGBufferNormal.ankiprog

@@ -3,8 +3,9 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -13,8 +14,8 @@
 SamplerState g_nearestAnyClampSampler : register(s0);
 Texture2D g_inTex : register(t0);
 
-Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 {
-	return unpackNormalFromGBuffer(g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0)) / 2.0f + 0.5f;
+	return unpackNormalFromGBuffer(g_inTex.SampleLevel(g_nearestAnyClampSampler, input.m_uv, 0.0)) / 2.0f + 0.5f;
 }
 #pragma anki technique_end frag

+ 4 - 3
AnKi/Shaders/VisualizeHdrRenderTarget.ankiprog

@@ -3,8 +3,9 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -13,8 +14,8 @@
 SamplerState g_nearestAnyClampSampler : register(s0);
 Texture2D g_inTex : register(t0);
 
-Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 {
-	return reinhardTonemap(g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0).rgb);
+	return reinhardTonemap(g_inTex.SampleLevel(g_nearestAnyClampSampler, input.m_uv, 0.0).rgb);
 }
 #pragma anki technique_end frag

+ 4 - 3
AnKi/Shaders/VisualizeRenderTarget.ankiprog

@@ -3,8 +3,9 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -13,9 +14,9 @@
 SamplerState g_nearestAnyClampSampler : register(s0);
 Texture2D g_inTex : register(t0);
 
-Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 {
-	const Vec4 rgba = g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
+	const Vec4 rgba = g_inTex.SampleLevel(g_nearestAnyClampSampler, input.m_uv, 0.0);
 	return rgba.xyz;
 }
 #pragma anki technique_end frag

+ 4 - 3
AnKi/Shaders/VrsSriVisualizeRenderTarget.ankiprog

@@ -3,8 +3,9 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki technique_start vert
 #include <AnKi/Shaders/QuadVert.hlsl>
+
+#pragma anki technique_start vert
 #pragma anki technique_end vert
 
 #pragma anki technique_start frag
@@ -13,9 +14,9 @@
 SamplerState g_nearestAnyClampSampler : register(s0);
 Texture2D<U32> g_inTex : register(t0);
 
-Vec3 main(Vec2 uv : TEXCOORD) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 {
-	const U32 texel = g_inTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0);
+	const U32 texel = g_inTex.SampleLevel(g_nearestAnyClampSampler, input.m_uv, 0.0);
 	const UVec2 rate = decodeVrsRate(texel);
 	return visualizeVrsRate(rate);
 }

+ 4 - 1
AnKi/Util/Assert.cpp

@@ -42,7 +42,10 @@ void akassert(const char* exprTxt, const char* file, int line, const char* func)
 #	if ANKI_OS_WINDOWS
 	Array<Char, 512> msg;
 	snprintf(msg.getBegin(), msg.getSize(), "%s\n\n%s:%d %s", exprTxt, file, line, func);
-	MessageBoxA(nullptr, msg.getBegin(), "Assert failed", MB_OK | MB_ICONERROR);
+	if(!IsDebuggerPresent())
+	{
+		MessageBoxA(nullptr, msg.getBegin(), "Assert failed", MB_OK | MB_ICONERROR);
+	}
 #	endif
 	ANKI_DEBUG_BREAK();
 }

+ 10 - 1
AnKi/Util/Logger.cpp

@@ -91,7 +91,16 @@ void Logger::write(const Char* file, int line, const Char* func, const Char* sub
 
 	if(type == LoggerMessageType::kFatal)
 	{
-		abort();
+#if ANKI_OS_WINDOWS
+		if(!IsDebuggerPresent())
+		{
+			abort();
+		}
+		else
+#endif
+		{
+			ANKI_DEBUG_BREAK();
+		}
 	}
 }
 

+ 18 - 0
AnKi/Util/String.h

@@ -258,6 +258,24 @@ public:
 		return anki::computeHash(m_ptr, getLength());
 	}
 
+	void toWideChars(WChar* arr, U32 arrSize) const
+	{
+		checkInit();
+		const U32 len = getLength();
+		ANKI_ASSERT(arrSize >= len + 1);
+
+		if(len > 0)
+		{
+			PtrSize outSize;
+			[[maybe_unused]] const errno_t err = mbstowcs_s(&outSize, arr, arrSize, m_ptr, len);
+			ANKI_ASSERT(err == 0 && outSize == len + 1);
+		}
+		else
+		{
+			*arr = L'\0';
+		}
+	}
+
 private:
 	const Char* m_ptr = nullptr;
 

+ 6 - 0
AnKi/Util/Win32Minimal.h

@@ -129,6 +129,7 @@ ANKI_WINBASEAPI int ANKI_WINAPI MessageBoxA(HWND hWnd, LPCSTR lpText, LPCSTR lpC
 ANKI_WINBASEAPI DWORD ANKI_WINAPI FormatMessageA(DWORD dwFlags, LPCVOID lpSource, DWORD dwMessageId, DWORD dwLanguageId, LPSTR lpBuffer, DWORD nSize,
 												 va_list* Arguments);
 ANKI_WINBASEAPI HLOCAL ANKI_WINAPI LocalFree(HLOCAL hMem);
+ANKI_WINBASEAPI BOOL ANKI_WINAPI IsDebuggerPresent();
 
 #undef ANKI_WINBASEAPI
 #undef ANKI_DECLARE_HANDLE
@@ -486,4 +487,9 @@ inline HLOCAL LocalFree(HLOCAL hMem)
 	return ::LocalFree(hMem);
 }
 
+inline BOOL IsDebuggerPresent()
+{
+	return ::IsDebuggerPresent();
+}
+
 } // end namespace anki

+ 4 - 2
Tests/Gr/Gr.cpp

@@ -323,7 +323,7 @@ SamplerState g_sampler : register(s2);
 [numthreads(1, 1, 1)]
 void main()
 {
-	g_rwstructured[0] = g_structured[0];
+	g_rwstructured[0] = g_structured[0] + g_structured[1];
 
 	g_rwtex[0][uint2(0, 0)] = g_consts.m_val;
 
@@ -358,6 +358,7 @@ void main()
 		const Vec4 kInvalidVec(1.0f, 2.0f, 3.0f, 4.0f);
 
 		buffInit.m_usage = BufferUsageBit::kAllStorage;
+		buffInit.m_size = sizeof(kMagicVec) * 2;
 		BufferPtr structured = createBuffer(buffInit, kMagicVec, "structured");
 
 		texInit.m_usage = TextureUsageBit::kSampledCompute | TextureUsageBit::kTransferDestination;
@@ -367,6 +368,7 @@ void main()
 		BufferPtr buff = createBuffer(buffInit, kMagicVec * 2.0f, "buff");
 
 		buffInit.m_usage = BufferUsageBit::kAllStorage;
+		buffInit.m_size = sizeof(kInvalidVec);
 		BufferPtr rwstructured = createBuffer(buffInit, kInvalidVec, "rwstructured");
 
 		buffInit.m_usage = BufferUsageBit::kAllTexel;
@@ -415,7 +417,7 @@ void main()
 		signalFence->clientWait(kMaxSecond);
 
 		// Check
-		validateBuffer(rwstructured, kMagicVec);
+		validateBuffer(rwstructured, kMagicVec + kMagicVec);
 		validateBuffer(rwbuff, kMagicVec * 2.0f);
 	}