Panagiotis Christopoulos Charitos 4 лет назад
Родитель
Сommit
e92bbb5c03

+ 8 - 0
AnKi/Math/Vec.h

@@ -96,6 +96,14 @@ public:
 		m_simd = _mm_load_ps(arr);
 		m_simd = _mm_load_ps(arr);
 	}
 	}
 
 
+	explicit TVec(const Array<T, N>& arr)
+	{
+		for(U i = 0; i < N; ++i)
+		{
+			m_arr[i] = arr[i];
+		}
+	}
+
 	explicit TVec(const Simd& simd)
 	explicit TVec(const Simd& simd)
 	{
 	{
 		m_simd = simd;
 		m_simd = simd;

+ 86 - 67
AnKi/Renderer/ShadowMapping.cpp

@@ -9,6 +9,7 @@
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Core/ConfigSet.h>
 #include <AnKi/Util/ThreadHive.h>
 #include <AnKi/Util/ThreadHive.h>
 #include <AnKi/Util/Tracer.h>
 #include <AnKi/Util/Tracer.h>
+#include <AnKi/Shaders/Include/ShadowMappingTypes.h>
 
 
 namespace anki
 namespace anki
 {
 {
@@ -16,7 +17,7 @@ namespace anki
 class ShadowMapping::Scratch::WorkItem
 class ShadowMapping::Scratch::WorkItem
 {
 {
 public:
 public:
-	Array<U32, 4> m_viewport;
+	UVec4 m_viewport;
 	RenderQueue* m_renderQueue;
 	RenderQueue* m_renderQueue;
 	U32 m_firstRenderableElement;
 	U32 m_firstRenderableElement;
 	U32 m_renderableElementCount;
 	U32 m_renderableElementCount;
@@ -27,7 +28,7 @@ public:
 class ShadowMapping::Scratch::LightToRenderToScratchInfo
 class ShadowMapping::Scratch::LightToRenderToScratchInfo
 {
 {
 public:
 public:
-	Array<U32, 4> m_viewport;
+	UVec4 m_viewport;
 	RenderQueue* m_renderQueue;
 	RenderQueue* m_renderQueue;
 	U32 m_drawcallCount;
 	U32 m_drawcallCount;
 	U32 m_renderQueueElementsLod;
 	U32 m_renderQueueElementsLod;
@@ -36,8 +37,9 @@ public:
 class ShadowMapping::Atlas::ResolveWorkItem
 class ShadowMapping::Atlas::ResolveWorkItem
 {
 {
 public:
 public:
+	Vec4 m_uvInBounds; ///< Bounds used to avoid blurring neighbour tiles.
 	Vec4 m_uvIn; ///< UV + size that point to the scratch buffer.
 	Vec4 m_uvIn; ///< UV + size that point to the scratch buffer.
-	Array<U32, 4> m_viewportOut; ///< Viewport in the atlas RT.
+	UVec4 m_viewportOut; ///< Viewport in the atlas RT.
 	Bool m_blur;
 	Bool m_blur;
 };
 };
 
 
@@ -147,36 +149,38 @@ void ShadowMapping::runAtlas(RenderPassWorkContext& rgraphCtx)
 
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 
-	cmdb->bindShaderProgram(m_atlas.m_resolveGrProg);
+	// Allocate and populate uniforms
+	ShadowMappingUniforms* uniforms = allocateAndBindStorage<ShadowMappingUniforms*>(
+		m_atlas.m_resolveWorkItems.getSize() * sizeof(ShadowMappingUniforms), cmdb, 0, 0);
+	for(U32 i = 0; i < m_atlas.m_resolveWorkItems.getSize(); ++i)
+	{
+		ShadowMappingUniforms& uni = uniforms[i];
+		const Atlas::ResolveWorkItem& workItem = m_atlas.m_resolveWorkItems[i];
 
 
-	cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
-	rgraphCtx.bindTexture(0, 1, m_scratch.m_rt, TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
-	rgraphCtx.bindImage(0, 2, m_atlas.m_rt, {});
+		uni.m_viewportXY = IVec2(workItem.m_viewportOut.xy());
+		uni.m_viewportZW = Vec2(workItem.m_viewportOut.zw());
 
 
-	for(const Atlas::ResolveWorkItem& workItem : m_atlas.m_resolveWorkItems)
-	{
-		ANKI_TRACE_INC_COUNTER(R_SHADOW_PASSES, 1);
+		uni.m_uvScale = workItem.m_uvIn.zw();
+		uni.m_uvTranslation = workItem.m_uvIn.xy();
 
 
-		struct Uniforms
-		{
-			UVec4 m_viewport;
-			Vec2 m_uvScale;
-			Vec2 m_uvTranslation;
-			U32 m_blur;
-			U32 m_padding0;
-			U32 m_padding1;
-			U32 m_padding2;
-		} unis;
-		unis.m_uvScale = workItem.m_uvIn.zw();
-		unis.m_uvTranslation = workItem.m_uvIn.xy();
-		unis.m_viewport = UVec4(workItem.m_viewportOut[0], workItem.m_viewportOut[1], workItem.m_viewportOut[2],
-								workItem.m_viewportOut[3]);
-		unis.m_blur = workItem.m_blur;
-
-		cmdb->setPushConstants(&unis, sizeof(unis));
-
-		dispatchPPCompute(cmdb, 8, 8, workItem.m_viewportOut[2], workItem.m_viewportOut[3]);
+		uni.m_uvMin = workItem.m_uvInBounds.xy();
+		uni.m_uvMax = workItem.m_uvInBounds.xy() + workItem.m_uvInBounds.zw();
+
+		uni.m_blur = workItem.m_blur;
 	}
 	}
+
+	cmdb->bindShaderProgram(m_atlas.m_resolveGrProg);
+
+	// Continue
+	cmdb->bindSampler(0, 1, m_r->getSamplers().m_trilinearClamp);
+	rgraphCtx.bindTexture(0, 2, m_scratch.m_rt, TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+	rgraphCtx.bindImage(0, 3, m_atlas.m_rt);
+
+	constexpr U32 workgroupSize = 8;
+	ANKI_ASSERT(m_atlas.m_tileResolution >= workgroupSize && (m_atlas.m_tileResolution % workgroupSize) == 0);
+
+	cmdb->dispatchCompute(m_atlas.m_tileResolution / workgroupSize, m_atlas.m_tileResolution / workgroupSize,
+						  m_atlas.m_resolveWorkItems.getSize());
 }
 }
 
 
 void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
@@ -262,7 +266,7 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 	}
 	}
 }
 }
 
 
-Mat4 ShadowMapping::createSpotLightTextureMatrix(const Viewport& viewport) const
+Mat4 ShadowMapping::createSpotLightTextureMatrix(const UVec4& viewport) const
 {
 {
 	const F32 atlasSize = F32(m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis);
 	const F32 atlasSize = F32(m_atlas.m_tileResolution * m_atlas.m_tileCountBothAxis);
 #if ANKI_COMPILER_GCC_COMPATIBLE
 #if ANKI_COMPILER_GCC_COMPATIBLE
@@ -337,8 +341,8 @@ void ShadowMapping::chooseLod(const Vec4& cameraOrigin, const SpotLightQueueElem
 
 
 TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps,
 TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps,
 																const U32* faceIndices, const U32* drawcallsCount,
 																const U32* faceIndices, const U32* drawcallsCount,
-																const U32* lods, Viewport* atlasTileViewports,
-																Viewport* scratchTileViewports,
+																const U32* lods, UVec4* atlasTileViewports,
+																UVec4* scratchTileViewports,
 																TileAllocatorResult* subResults)
 																TileAllocatorResult* subResults)
 {
 {
 	ANKI_ASSERT(lightUuid > 0);
 	ANKI_ASSERT(lightUuid > 0);
@@ -353,8 +357,9 @@ TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U
 	// Allocate atlas tiles first. They may be cached and that will affect how many scratch tiles we'll need
 	// Allocate atlas tiles first. They may be cached and that will affect how many scratch tiles we'll need
 	for(U i = 0; i < faceCount; ++i)
 	for(U i = 0; i < faceCount; ++i)
 	{
 	{
+		Array<U32, 4> tileRanges;
 		res = m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
 		res = m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
-										   drawcallsCount[i], lods[i], atlasTileViewports[i]);
+										   drawcallsCount[i], lods[i], tileRanges);
 
 
 		if(res == TileAllocatorResult::ALLOCATION_FAILED)
 		if(res == TileAllocatorResult::ALLOCATION_FAILED)
 		{
 		{
@@ -372,11 +377,8 @@ TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U
 
 
 		subResults[i] = res;
 		subResults[i] = res;
 
 
-		// Fix viewport
-		atlasTileViewports[i][0] *= m_atlas.m_tileResolution;
-		atlasTileViewports[i][1] *= m_atlas.m_tileResolution;
-		atlasTileViewports[i][2] *= m_atlas.m_tileResolution;
-		atlasTileViewports[i][3] *= m_atlas.m_tileResolution;
+		// Set viewport
+		atlasTileViewports[i] = UVec4(tileRanges) * m_atlas.m_tileResolution;
 	}
 	}
 
 
 	// Allocate scratch tiles
 	// Allocate scratch tiles
@@ -389,8 +391,9 @@ TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U
 
 
 		ANKI_ASSERT(subResults[i] == TileAllocatorResult::ALLOCATION_SUCCEEDED);
 		ANKI_ASSERT(subResults[i] == TileAllocatorResult::ALLOCATION_SUCCEEDED);
 
 
+		Array<U32, 4> tileRanges;
 		res = m_scratch.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
 		res = m_scratch.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), faceTimestamps[i], lightUuid, faceIndices[i],
-											 drawcallsCount[i], lods[i], scratchTileViewports[i]);
+											 drawcallsCount[i], lods[i], tileRanges);
 
 
 		if(res == TileAllocatorResult::ALLOCATION_FAILED)
 		if(res == TileAllocatorResult::ALLOCATION_FAILED)
 		{
 		{
@@ -407,10 +410,7 @@ TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid, U
 		}
 		}
 
 
 		// Fix viewport
 		// Fix viewport
-		scratchTileViewports[i][0] *= m_scratch.m_tileResolution;
-		scratchTileViewports[i][1] *= m_scratch.m_tileResolution;
-		scratchTileViewports[i][2] *= m_scratch.m_tileResolution;
-		scratchTileViewports[i][3] *= m_scratch.m_tileResolution;
+		scratchTileViewports[i] = UVec4(tileRanges) * m_scratch.m_tileResolution;
 
 
 		// Update the max view width
 		// Update the max view width
 		m_scratch.m_maxViewportWidth =
 		m_scratch.m_maxViewportWidth =
@@ -435,10 +435,13 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 	DynamicArrayAuto<Atlas::ResolveWorkItem> atlasWorkItems(ctx.m_tempAllocator);
 	DynamicArrayAuto<Atlas::ResolveWorkItem> atlasWorkItems(ctx.m_tempAllocator);
 
 
 	// First thing, allocate an empty tile for empty faces of point lights
 	// First thing, allocate an empty tile for empty faces of point lights
-	Viewport emptyTileViewport;
+	UVec4 emptyTileViewport;
 	{
 	{
-		const TileAllocatorResult res = m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), 1, MAX_U64, 0, 1,
-																	 m_pointLightsMaxLod, emptyTileViewport);
+		Array<U32, 4> tileRange;
+		const TileAllocatorResult res =
+			m_atlas.m_tileAlloc.allocate(m_r->getGlobalTimestamp(), 1, MAX_U64, 0, 1, m_pointLightsMaxLod, tileRange);
+
+		emptyTileViewport = UVec4(tileRange);
 
 
 		(void)res;
 		(void)res;
 #if ANKI_ENABLE_ASSERTIONS
 #if ANKI_ENABLE_ASSERTIONS
@@ -463,8 +466,8 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 		Array<U64, MAX_SHADOW_CASCADES2> timestamps;
 		Array<U64, MAX_SHADOW_CASCADES2> timestamps;
 		Array<U32, MAX_SHADOW_CASCADES2> cascadeIndices;
 		Array<U32, MAX_SHADOW_CASCADES2> cascadeIndices;
 		Array<U32, MAX_SHADOW_CASCADES2> drawcallCounts;
 		Array<U32, MAX_SHADOW_CASCADES2> drawcallCounts;
-		Array<Viewport, MAX_SHADOW_CASCADES2> atlasViewports;
-		Array<Viewport, MAX_SHADOW_CASCADES2> scratchViewports;
+		Array<UVec4, MAX_SHADOW_CASCADES2> atlasViewports;
+		Array<UVec4, MAX_SHADOW_CASCADES2> scratchViewports;
 		Array<TileAllocatorResult, MAX_SHADOW_CASCADES2> subResults;
 		Array<TileAllocatorResult, MAX_SHADOW_CASCADES2> subResults;
 		Array<U32, MAX_SHADOW_CASCADES2> lods;
 		Array<U32, MAX_SHADOW_CASCADES2> lods;
 		Array<U32, MAX_SHADOW_CASCADES2> renderQueueElementsLods;
 		Array<U32, MAX_SHADOW_CASCADES2> renderQueueElementsLods;
@@ -550,8 +553,8 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 		Array<U64, 6> timestamps;
 		Array<U64, 6> timestamps;
 		Array<U32, 6> faceIndices;
 		Array<U32, 6> faceIndices;
 		Array<U32, 6> drawcallCounts;
 		Array<U32, 6> drawcallCounts;
-		Array<Viewport, 6> atlasViewports;
-		Array<Viewport, 6> scratchViewports;
+		Array<UVec4, 6> atlasViewports;
+		Array<UVec4, 6> scratchViewports;
 		Array<TileAllocatorResult, 6> subResults;
 		Array<TileAllocatorResult, 6> subResults;
 		Array<U32, 6> lods;
 		Array<U32, 6> lods;
 		U32 numOfFacesThatHaveDrawcalls = 0;
 		U32 numOfFacesThatHaveDrawcalls = 0;
@@ -603,8 +606,8 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 				{
 				{
 					// Has drawcalls, asigned it to a tile
 					// Has drawcalls, asigned it to a tile
 
 
-					const Viewport& atlasViewport = atlasViewports[numOfFacesThatHaveDrawcalls];
-					const Viewport& scratchViewport = scratchViewports[numOfFacesThatHaveDrawcalls];
+					const UVec4& atlasViewport = atlasViewports[numOfFacesThatHaveDrawcalls];
+					const UVec4& scratchViewport = scratchViewports[numOfFacesThatHaveDrawcalls];
 
 
 					// Add a half texel to the viewport's start to avoid bilinear filtering bleeding
 					// Add a half texel to the viewport's start to avoid bilinear filtering bleeding
 					light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + 0.5f) / atlasResolution;
 					light.m_shadowAtlasTileOffsets[face].x() = (F32(atlasViewport[0]) + 0.5f) / atlasResolution;
@@ -622,7 +625,7 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 				else
 				else
 				{
 				{
 					// Doesn't have renderables, point the face to the empty tile
 					// Doesn't have renderables, point the face to the empty tile
-					Viewport atlasViewport = emptyTileViewport;
+					UVec4 atlasViewport = emptyTileViewport;
 					ANKI_ASSERT(F32(atlasViewport[2]) <= superTileSize && F32(atlasViewport[3]) <= superTileSize);
 					ANKI_ASSERT(F32(atlasViewport[2]) <= superTileSize && F32(atlasViewport[3]) <= superTileSize);
 					atlasViewport[2] = U32(superTileSize);
 					atlasViewport[2] = U32(superTileSize);
 					atlasViewport[3] = U32(superTileSize);
 					atlasViewport[3] = U32(superTileSize);
@@ -650,8 +653,8 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 		// Allocate tiles
 		// Allocate tiles
 		U32 faceIdx = 0;
 		U32 faceIdx = 0;
 		TileAllocatorResult subResult;
 		TileAllocatorResult subResult;
-		Viewport atlasViewport;
-		Viewport scratchViewport;
+		UVec4 atlasViewport;
+		UVec4 scratchViewport;
 		const U32 localDrawcallCount = light.m_shadowRenderQueue->m_renderables.getSize();
 		const U32 localDrawcallCount = light.m_shadowRenderQueue->m_renderables.getSize();
 
 
 		Bool blurAtlas;
 		Bool blurAtlas;
@@ -762,7 +765,7 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 }
 }
 
 
 void ShadowMapping::newScratchAndAtlasResloveRenderWorkItems(
 void ShadowMapping::newScratchAndAtlasResloveRenderWorkItems(
-	const Viewport& atlasViewport, const Viewport& scratchVewport, Bool blurAtlas, RenderQueue* lightRenderQueue,
+	const UVec4& atlasViewport, const UVec4& scratchVewport, Bool blurAtlas, RenderQueue* lightRenderQueue,
 	U32 renderQueueElementsLod, DynamicArrayAuto<Scratch::LightToRenderToScratchInfo>& scratchWorkItem,
 	U32 renderQueueElementsLod, DynamicArrayAuto<Scratch::LightToRenderToScratchInfo>& scratchWorkItem,
 	DynamicArrayAuto<Atlas::ResolveWorkItem>& atlasResolveWorkItem, U32& drawcallCount) const
 	DynamicArrayAuto<Atlas::ResolveWorkItem>& atlasResolveWorkItem, U32& drawcallCount) const
 {
 {
@@ -778,21 +781,37 @@ void ShadowMapping::newScratchAndAtlasResloveRenderWorkItems(
 		drawcallCount += lightRenderQueue->m_renderables.getSize();
 		drawcallCount += lightRenderQueue->m_renderables.getSize();
 	}
 	}
 
 
-	// Atlas resolve work item
+	// Atlas resolve work items
+	const U32 tilesX = scratchVewport[2] / m_scratch.m_tileResolution;
+	const U32 tilesY = scratchVewport[3] / m_scratch.m_tileResolution;
+	for(U32 x = 0; x < tilesX; ++x)
 	{
 	{
-		const F32 scratchAtlasWidth = F32(m_scratch.m_tileCountX * m_scratch.m_tileResolution);
-		const F32 scratchAtlasHeight = F32(m_scratch.m_tileCountY * m_scratch.m_tileResolution);
+		for(U32 y = 0; y < tilesY; ++y)
+		{
+			const F32 scratchAtlasWidth = F32(m_scratch.m_tileCountX * m_scratch.m_tileResolution);
+			const F32 scratchAtlasHeight = F32(m_scratch.m_tileCountY * m_scratch.m_tileResolution);
 
 
-		Atlas::ResolveWorkItem atlasItem;
-		atlasItem.m_uvIn[0] = F32(scratchVewport[0]) / scratchAtlasWidth;
-		atlasItem.m_uvIn[1] = F32(scratchVewport[1]) / scratchAtlasHeight;
-		atlasItem.m_uvIn[2] = F32(scratchVewport[2]) / scratchAtlasWidth;
-		atlasItem.m_uvIn[3] = F32(scratchVewport[3]) / scratchAtlasHeight;
+			Atlas::ResolveWorkItem atlasItem;
 
 
-		atlasItem.m_viewportOut = atlasViewport;
-		atlasItem.m_blur = blurAtlas;
+			atlasItem.m_uvInBounds[0] = F32(scratchVewport[0]) / scratchAtlasWidth;
+			atlasItem.m_uvInBounds[1] = F32(scratchVewport[1]) / scratchAtlasHeight;
+			atlasItem.m_uvInBounds[2] = F32(scratchVewport[2]) / scratchAtlasWidth;
+			atlasItem.m_uvInBounds[3] = F32(scratchVewport[3]) / scratchAtlasHeight;
 
 
-		atlasResolveWorkItem.emplaceBack(atlasItem);
+			atlasItem.m_uvIn[0] = F32(scratchVewport[0] + scratchVewport[2] / tilesX * x) / scratchAtlasWidth;
+			atlasItem.m_uvIn[1] = F32(scratchVewport[1] + scratchVewport[3] / tilesY * y) / scratchAtlasHeight;
+			atlasItem.m_uvIn[2] = F32(scratchVewport[2] / tilesX) / scratchAtlasWidth;
+			atlasItem.m_uvIn[3] = F32(scratchVewport[3] / tilesY) / scratchAtlasHeight;
+
+			atlasItem.m_viewportOut[0] = atlasViewport[0] + atlasViewport[2] / tilesX * x;
+			atlasItem.m_viewportOut[1] = atlasViewport[1] + atlasViewport[3] / tilesY * y;
+			atlasItem.m_viewportOut[2] = atlasViewport[2] / tilesX;
+			atlasItem.m_viewportOut[3] = atlasViewport[3] / tilesY;
+
+			atlasItem.m_blur = blurAtlas;
+
+			atlasResolveWorkItem.emplaceBack(atlasItem);
+		}
 	}
 	}
 }
 }
 
 

+ 3 - 5
AnKi/Renderer/ShadowMapping.h

@@ -38,8 +38,6 @@ public:
 	}
 	}
 
 
 private:
 private:
-	using Viewport = Array<U32, 4>;
-
 	/// @name Atlas stuff
 	/// @name Atlas stuff
 	/// @{
 	/// @{
 
 
@@ -64,7 +62,7 @@ private:
 
 
 	ANKI_USE_RESULT Error initAtlas(const ConfigSet& cfg);
 	ANKI_USE_RESULT Error initAtlas(const ConfigSet& cfg);
 
 
-	inline Mat4 createSpotLightTextureMatrix(const Viewport& viewport) const;
+	inline Mat4 createSpotLightTextureMatrix(const UVec4& viewport) const;
 
 
 	void runAtlas(RenderPassWorkContext& rgraphCtx);
 	void runAtlas(RenderPassWorkContext& rgraphCtx);
 	/// @}
 	/// @}
@@ -114,12 +112,12 @@ private:
 	/// Try to allocate a number of scratch tiles and regular tiles.
 	/// Try to allocate a number of scratch tiles and regular tiles.
 	TileAllocatorResult allocateTilesAndScratchTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps,
 	TileAllocatorResult allocateTilesAndScratchTiles(U64 lightUuid, U32 faceCount, const U64* faceTimestamps,
 													 const U32* faceIndices, const U32* drawcallsCount, const U32* lods,
 													 const U32* faceIndices, const U32* drawcallsCount, const U32* lods,
-													 Viewport* atlasTileViewports, Viewport* scratchTileViewports,
+													 UVec4* atlasTileViewports, UVec4* scratchTileViewports,
 													 TileAllocatorResult* subResults);
 													 TileAllocatorResult* subResults);
 
 
 	/// Add new work to render to scratch buffer and atlas buffer.
 	/// Add new work to render to scratch buffer and atlas buffer.
 	void newScratchAndAtlasResloveRenderWorkItems(
 	void newScratchAndAtlasResloveRenderWorkItems(
-		const Viewport& atlasViewport, const Viewport& scratchVewport, Bool blurAtlas, RenderQueue* lightRenderQueue,
+		const UVec4& atlasViewport, const UVec4& scratchVewport, Bool blurAtlas, RenderQueue* lightRenderQueue,
 		U32 renderQueueElementsLod, DynamicArrayAuto<Scratch::LightToRenderToScratchInfo>& scratchWorkItem,
 		U32 renderQueueElementsLod, DynamicArrayAuto<Scratch::LightToRenderToScratchInfo>& scratchWorkItem,
 		DynamicArrayAuto<Atlas::ResolveWorkItem>& atlasResolveWorkItem, U32& drawcallCount) const;
 		DynamicArrayAuto<Atlas::ResolveWorkItem>& atlasResolveWorkItem, U32& drawcallCount) const;
 
 

+ 14 - 28
AnKi/Shaders/ExponentialShadowmappingResolve.ankiprog

@@ -8,31 +8,21 @@ ANKI_SPECIALIZATION_CONSTANT_UVEC2(INPUT_TEXTURE_SIZE, 0u);
 #pragma anki start comp
 #pragma anki start comp
 #include <AnKi/Shaders/GaussianBlurCommon.glsl>
 #include <AnKi/Shaders/GaussianBlurCommon.glsl>
 #include <AnKi/Shaders/LightFunctions.glsl>
 #include <AnKi/Shaders/LightFunctions.glsl>
+#include <AnKi/Shaders/Include/ShadowMappingTypes.h>
 
 
-layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+layout(local_size_x = 8, local_size_y = 8) in;
 
 
 const F32 OFFSET = 1.25;
 const F32 OFFSET = 1.25;
 
 
-struct Uniforms
+layout(set = 0, binding = 0, std430) readonly buffer b_unis
 {
 {
-	UVec4 m_viewport;
-	Vec2 m_uvScale;
-	Vec2 m_uvTranslation;
-	U32 m_blur;
-	U32 m_padding0;
-	U32 m_padding1;
-	U32 m_padding2;
+	ShadowMappingUniforms u_uniforms[];
 };
 };
 
 
-layout(push_constant, std430) uniform pc_
-{
-	Uniforms u_uniforms;
-};
-
-layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_inputTex;
+layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
+layout(set = 0, binding = 2) uniform texture2D u_inputTex;
 
 
-layout(set = 0, binding = 2) uniform writeonly image2D u_outImg;
+layout(set = 0, binding = 3) uniform writeonly image2D u_outImg;
 
 
 Vec4 computeMoments(Vec2 uv)
 Vec4 computeMoments(Vec2 uv)
 {
 {
@@ -43,21 +33,17 @@ Vec4 computeMoments(Vec2 uv)
 
 
 void main()
 void main()
 {
 {
-	if(gl_GlobalInvocationID.x >= u_uniforms.m_viewport.z || gl_GlobalInvocationID.y >= u_uniforms.m_viewport.w)
-	{
-		// Skip if it's out of bounds
-		return;
-	}
+	const ShadowMappingUniforms uni = u_uniforms[gl_GlobalInvocationID.z];
 
 
 	// Compute the read UV
 	// Compute the read UV
-	Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(u_uniforms.m_viewport.zw);
-	uv = uv * u_uniforms.m_uvScale + u_uniforms.m_uvTranslation;
+	Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / uni.m_viewportZW;
+	uv = uv * uni.m_uvScale + uni.m_uvTranslation;
 
 
 	// Compute the UV limits. We can't sample beyond those
 	// Compute the UV limits. We can't sample beyond those
 	const Vec2 TEXEL_SIZE = 1.0 / Vec2(INPUT_TEXTURE_SIZE);
 	const Vec2 TEXEL_SIZE = 1.0 / Vec2(INPUT_TEXTURE_SIZE);
 	const Vec2 HALF_TEXEL_SIZE = TEXEL_SIZE / 2.0;
 	const Vec2 HALF_TEXEL_SIZE = TEXEL_SIZE / 2.0;
-	const Vec2 maxUv = (Vec2(1.0) * u_uniforms.m_uvScale + u_uniforms.m_uvTranslation) - HALF_TEXEL_SIZE;
-	const Vec2 minUv = (Vec2(0.0) * u_uniforms.m_uvScale + u_uniforms.m_uvTranslation) + HALF_TEXEL_SIZE;
+	const Vec2 maxUv = uni.m_uvMax - HALF_TEXEL_SIZE;
+	const Vec2 minUv = uni.m_uvMin + HALF_TEXEL_SIZE;
 
 
 	// Sample
 	// Sample
 	const Vec2 UV_OFFSET = OFFSET * TEXEL_SIZE;
 	const Vec2 UV_OFFSET = OFFSET * TEXEL_SIZE;
@@ -65,7 +51,7 @@ void main()
 	const F32 w1 = BOX_WEIGHTS[1u];
 	const F32 w1 = BOX_WEIGHTS[1u];
 	const F32 w2 = BOX_WEIGHTS[2u];
 	const F32 w2 = BOX_WEIGHTS[2u];
 	Vec4 moments;
 	Vec4 moments;
-	if(u_uniforms.m_blur != 0u)
+	if(uni.m_blur != 0u)
 	{
 	{
 		moments = computeMoments(uv) * w0;
 		moments = computeMoments(uv) * w0;
 		moments += computeMoments(clamp(uv + Vec2(UV_OFFSET.x, 0.0), minUv, maxUv)) * w1;
 		moments += computeMoments(clamp(uv + Vec2(UV_OFFSET.x, 0.0), minUv, maxUv)) * w1;
@@ -88,6 +74,6 @@ void main()
 #else
 #else
 	const Vec4 outColor = Vec4(moments.xy, 0.0, 0.0);
 	const Vec4 outColor = Vec4(moments.xy, 0.0, 0.0);
 #endif
 #endif
-	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy) + IVec2(u_uniforms.m_viewport.xy), outColor);
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy) + uni.m_viewportXY, outColor);
 }
 }
 #pragma anki end
 #pragma anki end

+ 26 - 0
AnKi/Shaders/Include/ShadowMappingTypes.h

@@ -0,0 +1,26 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Include/Common.h>
+
+ANKI_BEGIN_NAMESPACE
+
+struct ShadowMappingUniforms
+{
+	IVec2 m_viewportXY;
+	Vec2 m_viewportZW;
+	Vec2 m_uvScale;
+	Vec2 m_uvTranslation;
+	Vec2 m_uvMin;
+	Vec2 m_uvMax;
+	U32 m_blur;
+	U32 m_padding0;
+	U32 m_padding1;
+	U32 m_padding2;
+};
+
+ANKI_END_NAMESPACE

+ 18 - 14
Tools/Android/GenerateAndroidProject.py

@@ -54,28 +54,32 @@ def main():
     """ The main """
     """ The main """
 
 
     ctx = parse_commandline()
     ctx = parse_commandline()
+    this_script_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
 
 
     # Copy dir
     # Copy dir
     project_dir = os.path.join(ctx.out_dir, "AndroidProject_%s" % ctx.target)
     project_dir = os.path.join(ctx.out_dir, "AndroidProject_%s" % ctx.target)
-    if os.path.isdir(project_dir):
-        raise Exception("Directory already exists: %s" % project_dir)
-
-    this_script_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
-    shutil.copytree(this_script_dir, project_dir)
+    if not os.path.isdir(project_dir):
+        shutil.copytree(this_script_dir, project_dir)
 
 
     # RM the script
     # RM the script
-    os.remove(os.path.join(project_dir, "GenerateAndroidProject.py"))
+    try:
+        os.remove(os.path.join(project_dir, "GenerateAndroidProject.py"))
+    except OSError:
+        pass
 
 
     # Create the assets dir structure
     # Create the assets dir structure
     assets_dir = os.path.join(project_dir, "assets")
     assets_dir = os.path.join(project_dir, "assets")
-    os.mkdir(assets_dir)
-    os.mkdir(os.path.join(project_dir, "assets/AnKi/"))
-    os.symlink(os.path.join(this_script_dir, "../../AnKi/Shaders"), os.path.join(project_dir, "assets/AnKi/Shaders"))
-    os.symlink(os.path.join(this_script_dir, "../../EngineAssets"), os.path.join(project_dir, "assets/EngineAssets"))
-    os.mkdir(os.path.join(project_dir, "assets/ThirdParty/"))
-    os.symlink(os.path.join(this_script_dir, "../../ThirdParty/Fsr"), os.path.join(project_dir,
-                                                                                   "assets/ThirdParty/Fsr"))
-    os.symlink(ctx.asserts_dir, os.path.join(project_dir, "assets/Assets"))
+    if not os.path.isdir(assets_dir):
+        os.mkdir(assets_dir)
+        os.mkdir(os.path.join(project_dir, "assets/AnKi/"))
+        os.symlink(os.path.join(this_script_dir, "../../AnKi/Shaders"),
+                   os.path.join(project_dir, "assets/AnKi/Shaders"))
+        os.symlink(os.path.join(this_script_dir, "../../EngineAssets"),
+                   os.path.join(project_dir, "assets/EngineAssets"))
+        os.mkdir(os.path.join(project_dir, "assets/ThirdParty/"))
+        os.symlink(os.path.join(this_script_dir, "../../ThirdParty/Fsr"),
+                   os.path.join(project_dir, "assets/ThirdParty/Fsr"))
+        os.symlink(ctx.asserts_dir, os.path.join(project_dir, "assets/Assets"))
 
 
     # Write the asset directory structure to a file
     # Write the asset directory structure to a file
     dir_structure_file = open(os.path.join(assets_dir, "DirStructure.txt"), "w", newline="\n")
     dir_structure_file = open(os.path.join(assets_dir, "DirStructure.txt"), "w", newline="\n")