浏览代码

Letting pixel shaders select their own permutations before rendering each triangle.

David Piuva 2 年之前
父节点
当前提交
d901a326eb

+ 1 - 97
Source/DFPSR/render/renderCore.cpp

@@ -30,10 +30,6 @@
 
 
 using namespace dsr;
 using namespace dsr;
 
 
-//#define DISABLE_VERTEX_COLOR
-//#define DISABLE_DIFFUSE_MAP
-//#define DISABLE_LIGHT_MAP
-
 class SubVertex {
 class SubVertex {
 public:
 public:
 	FVector3D cs; // Camera space position based on the weights
 	FVector3D cs; // Camera space position based on the weights
@@ -201,26 +197,6 @@ Visibility dsr::getTriangleVisibility(const ITriangle2D &triangle, const Camera
 	return Visibility::Full;
 	return Visibility::Full;
 }
 }
 
 
-static bool almostZero(float value) {
-	return value > -0.001f && value < 0.001f;
-}
-
-static bool almostZero(const FVector3D &channel) {
-	return almostZero(channel.x) && almostZero(channel.y) && almostZero(channel.z);
-}
-
-static bool almostOne(float value) {
-	return value > 0.999f && value < 1.001f;
-}
-
-static bool almostOne(const FVector3D &channel) {
-	return almostOne(channel.x) && almostOne(channel.y) && almostOne(channel.z);
-}
-
-static bool almostSame(const FVector3D &channel) {
-	return almostZero(channel.x - channel.y) && almostZero(channel.x - channel.z) && almostZero(channel.y - channel.z);
-}
-
 static const int alignX = 2;
 static const int alignX = 2;
 static const int alignY = 2;
 static const int alignY = 2;
 
 
@@ -338,81 +314,9 @@ void dsr::renderTriangleFromData(
 	// Only draw visible triangles
 	// Only draw visible triangles
 	Visibility visibility = getTriangleVisibility(triangle, camera, false);
 	Visibility visibility = getTriangleVisibility(triangle, camera, false);
 	if (visibility != Visibility::Hidden) {
 	if (visibility != Visibility::Hidden) {
-		// Disable features when debugging
-		#ifdef DISABLE_VERTEX_COLOR
-			colors = TriangleColors(1.0f);
-		#endif
-		#ifdef DISABLE_DIFFUSE_MAP
-			diffuse = nullptr;
-		#endif
-		#ifdef DISABLE_LIGHT_MAP
-			light = nullptr;
-		#endif
 		// Select an instance of the default shader
 		// Select an instance of the default shader
 		if (!(filter == Filter::Alpha && almostZero(colors.alpha))) {
 		if (!(filter == Filter::Alpha && almostZero(colors.alpha))) {
-			bool hasVertexFade = !(almostSame(colors.red) && almostSame(colors.green) && almostSame(colors.blue) && almostSame(colors.alpha));
-			bool colorless = almostOne(colors.red) && almostOne(colors.green) && almostOne(colors.blue) && almostOne(colors.alpha);
-			// Get the function pointer to the correct shader
-			DRAW_CALLBACK_TYPE drawTask = &drawCallbackTemplate;
-			if (diffuse) {
-				bool hasDiffusePyramid = diffuse->texture.hasMipBuffer();
-				if (light) {
-					if (hasVertexFade) { // DiffuseLightVertex
-						if (hasDiffusePyramid) { // With mipmap
-							drawTask = &(Shader_RgbaMultiply<true, true, true, false, false>::processTriangle);
-						} else { // Without mipmap
-							drawTask = &(Shader_RgbaMultiply<true, true, true, false, true>::processTriangle);
-						}
-					} else { // DiffuseLight
-						if (hasDiffusePyramid) { // With mipmap
-							drawTask = &(Shader_RgbaMultiply<true, true, false, false, false>::processTriangle);
-						} else { // Without mipmap
-							drawTask = &(Shader_RgbaMultiply<true, true, false, false, true>::processTriangle);
-						}
-					}
-				} else {
-					if (hasVertexFade) { // DiffuseVertex
-						if (hasDiffusePyramid) { // With mipmap
-							drawTask = &(Shader_RgbaMultiply<true, false, true, false, false>::processTriangle);
-						} else { // Without mipmap
-							drawTask = &(Shader_RgbaMultiply<true, false, true, false, true>::processTriangle);
-						}
-					} else {
-						if (colorless) { // Diffuse without normalization
-							if (hasDiffusePyramid) { // With mipmap
-								drawTask = &(Shader_RgbaMultiply<true, false, false, true, false>::processTriangle);
-							} else { // Without mipmap
-								drawTask = &(Shader_RgbaMultiply<true, false, false, true, true>::processTriangle);
-							}
-						} else { // Diffuse
-							if (hasDiffusePyramid) { // With mipmap
-								drawTask = &(Shader_RgbaMultiply<true, false, false, false, false>::processTriangle);
-							} else { // Without mipmap
-								drawTask = &(Shader_RgbaMultiply<true, false, false, false, true>::processTriangle);
-							}
-						}
-					}
-				}
-			} else {
-				if (light) {
-					if (hasVertexFade) { // LightVertex
-						drawTask = &(Shader_RgbaMultiply<false, true, true, false, false>::processTriangle);
-					} else {
-						if (colorless) { // Light without normalization
-							drawTask = &(Shader_RgbaMultiply<false, true, false, true, false>::processTriangle);
-						} else { // Light
-							drawTask = &(Shader_RgbaMultiply<false, true, false, false, false>::processTriangle);
-						}
-					}
-				} else {
-					if (hasVertexFade) { // Vertex
-						drawTask = &(Shader_RgbaMultiply<false, false, true, false, false>::processTriangle);
-					} else { // Single color
-						drawTask = &(Shader_RgbaMultiply<false, false, false, false, false>::processTriangle);
-					}
-				}
-			}
-			renderTriangleWithShader(commandQueue, TriangleDrawData(targetImage, depthBuffer, camera.perspective, filter, TriangleInput(diffuse, light, texCoords, colors), drawTask), camera, triangle, clipBound);
+			renderTriangleWithShader(commandQueue, TriangleDrawData(targetImage, depthBuffer, camera.perspective, filter, TriangleInput(diffuse, light, texCoords, colors), &processTriangle_RgbaMultiply), camera, triangle, clipBound);
 		}
 		}
 	}
 	}
 }
 }

+ 101 - 41
Source/DFPSR/render/shader/RgbaMultiply.h

@@ -29,13 +29,12 @@
 #include <cassert>
 #include <cassert>
 #include <algorithm>
 #include <algorithm>
 #include "Shader.h"
 #include "Shader.h"
+#include "fillerTemplates.h"
 #include "../../image/ImageRgbaU8.h"
 #include "../../image/ImageRgbaU8.h"
 
 
 namespace dsr {
 namespace dsr {
 
 
-template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>
-class Shader_RgbaMultiply : public Shader {
-private:
+struct RgbaMultiply_data {
 	const TextureRgba *diffuseMap; // Mip-mapping is allowed for diffuse textures.
 	const TextureRgba *diffuseMap; // Mip-mapping is allowed for diffuse textures.
 	const TextureRgba *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.
 	const TextureRgba *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.
 	// Planar format with each vector representing the three triangle corners
 	// Planar format with each vector representing the three triangle corners
@@ -44,67 +43,128 @@ private:
 	// Normalize the color product by pre-multiplying the vertex colors
 	// Normalize the color product by pre-multiplying the vertex colors
 	float getVertexScale() {
 	float getVertexScale() {
 		float result = 255.0f; // Scale from normalized to byte for the output
 		float result = 255.0f; // Scale from normalized to byte for the output
-		if (HAS_DIFFUSE_MAP) {
+		if (this->diffuseMap) {
 			result *= 1.0f / 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color
 			result *= 1.0f / 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color
 		}
 		}
-		if (HAS_LIGHT_MAP) {
+		if (this->lightMap) {
 			result *= 1.0f / 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color
 			result *= 1.0f / 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color
 		}
 		}
 		return result;
 		return result;
 	}
 	}
-	explicit Shader_RgbaMultiply(const TriangleInput &triangleInput) :
+	explicit RgbaMultiply_data(const TriangleInput &triangleInput) :
 	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),
 	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),
 	  lightMap(triangleInput.lightImage ? &(triangleInput.lightImage->texture) : nullptr),
 	  lightMap(triangleInput.lightImage ? &(triangleInput.lightImage->texture) : nullptr),
 	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {
 	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {
 		// Texture coordinates must be on the positive side to allow using truncation as a floor function
 		// Texture coordinates must be on the positive side to allow using truncation as a floor function
-		if (HAS_DIFFUSE_MAP) {
+		if (this->diffuseMap) {
 			assert(this->diffuseMap != nullptr); // Cannot sample null
 			assert(this->diffuseMap != nullptr); // Cannot sample null
 			assert(this->diffuseMap->exists()); // Cannot sample regular images
 			assert(this->diffuseMap->exists()); // Cannot sample regular images
 		}
 		}
-		if (HAS_LIGHT_MAP) {
+		if (this->lightMap) {
 			assert(this->lightMap != nullptr); // Cannot sample null
 			assert(this->lightMap != nullptr); // Cannot sample null
 			assert(this->lightMap->exists()); // Cannot sample regular images
 			assert(this->lightMap->exists()); // Cannot sample regular images
 		}
 		}
 	}
 	}
-public:
-	// The process method to take a function pointer to.
-	//    Must have the same signature as drawCallbackTemplate in Shader.h.
-	static void processTriangle(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
-		Shader_RgbaMultiply tempShader(triangleInput);
-		tempShader.fillShape(colorBuffer, depthBuffer, triangle, projection, shape, filter);
+};
+
+template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>
+static Rgba_F32 getPixels_2x2(void *data, const F32x4x3 &vertexWeights) {
+	if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {
+		// Optimized for diffuse only
+		F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);
+		F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);
+		return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);
+	} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {
+		// Optimized for light only
+		F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);
+		F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);
+		return shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);
+	} else {
+		// Interpolate the vertex color
+		Rgba_F32 color = HAS_VERTEX_FADING ?
+		  shaderMethods::interpolateVertexColor(((RgbaMultiply_data*)data)->colors.red, ((RgbaMultiply_data*)data)->colors.green, ((RgbaMultiply_data*)data)->colors.blue, ((RgbaMultiply_data*)data)->colors.alpha, vertexWeights) :
+		  Rgba_F32(F32x4(((RgbaMultiply_data*)data)->colors.red.x), F32x4(((RgbaMultiply_data*)data)->colors.green.x), F32x4(((RgbaMultiply_data*)data)->colors.blue.x), F32x4(((RgbaMultiply_data*)data)->colors.alpha.x));
+		// Sample diffuse
+		if (HAS_DIFFUSE_MAP) {
+			F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);
+			F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);
+			color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);
+		}
+		// Sample lightmap
+		if (HAS_LIGHT_MAP) {
+			F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);
+			F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);
+			color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);
+		}
+		return color;
 	}
 	}
-	Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const override {
-		if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {
-			// Optimized for diffuse only
-			F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));
-			F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));
-			return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);
-		} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {
-			// Optimized for light only
-			F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));
-			F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));
-			return shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);
+}
+
+// The process method to take a function pointer to.
+//    Must have the same signature as drawCallbackTemplate in Shader.h.
+static void processTriangle_RgbaMultiply(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
+	RgbaMultiply_data data = RgbaMultiply_data(triangleInput);
+	bool hasVertexFade = !(almostSame(data.colors.red) && almostSame(data.colors.green) && almostSame(data.colors.blue) && almostSame(data.colors.alpha));
+	bool colorless = almostOne(data.colors.red) && almostOne(data.colors.green) && almostOne(data.colors.blue) && almostOne(data.colors.alpha);
+	if (data.diffuseMap) {
+		bool hasDiffusePyramid = data.diffuseMap->hasMipBuffer();
+		if (data.lightMap) {
+			if (hasVertexFade) { // DiffuseLightVertex
+				if (hasDiffusePyramid) { // With mipmap
+					fillShape(&data, getPixels_2x2<true, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				} else { // Without mipmap
+					fillShape(&data, getPixels_2x2<true, true, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				}
+			} else { // DiffuseLight
+				if (hasDiffusePyramid) { // With mipmap
+					fillShape(&data, getPixels_2x2<true, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				} else { // Without mipmap
+					fillShape(&data, getPixels_2x2<true, true, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				}
+			}
 		} else {
 		} else {
-			// Interpolate the vertex color
-			Rgba_F32 color = HAS_VERTEX_FADING ?
-			  shaderMethods::interpolateVertexColor(this->colors.red, this->colors.green, this->colors.blue, this->colors.alpha, vertexWeights) :
-			  Rgba_F32(F32x4(this->colors.red.x), F32x4(this->colors.green.x), F32x4(this->colors.blue.x), F32x4(this->colors.alpha.x));
-			// Sample diffuse
-			if (HAS_DIFFUSE_MAP) {
-				F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));
-				F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));
-				color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);
+			if (hasVertexFade) { // DiffuseVertex
+				if (hasDiffusePyramid) { // With mipmap
+					fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				} else { // Without mipmap
+					fillShape(&data, getPixels_2x2<true, false, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				}
+			} else {
+				if (colorless) { // Diffuse without normalization
+					if (hasDiffusePyramid) { // With mipmap
+						fillShape(&data, getPixels_2x2<true, false, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					} else { // Without mipmap
+					fillShape(&data, getPixels_2x2<true, false, false, true, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					}
+				} else { // Diffuse
+					if (hasDiffusePyramid) { // With mipmap
+						fillShape(&data, getPixels_2x2<true, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					} else { // Without mipmap
+						fillShape(&data, getPixels_2x2<true, false, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					}
+				}
 			}
 			}
-			// Sample lightmap
-			if (HAS_LIGHT_MAP) {
-				F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));
-				F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));
-				color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);
+		}
+	} else {
+		if (data.lightMap) {
+			if (hasVertexFade) { // LightVertex
+				fillShape(&data, getPixels_2x2<false, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+			} else {
+				if (colorless) { // Light without normalization
+					fillShape(&data, getPixels_2x2<false, true, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				} else { // Light
+					fillShape(&data, getPixels_2x2<false, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				}
+			}
+		} else {
+			if (hasVertexFade) { // Vertex
+				fillShape(&data, getPixels_2x2<false, false, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+			} else { // Single color
+				fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 			}
 			}
-			return color;
 		}
 		}
 	}
 	}
-};
+}
 
 
 }
 }
 
 

+ 2 - 10
Source/DFPSR/render/shader/Shader.h

@@ -1,6 +1,6 @@
 // zlib open source license
 // zlib open source license
 //
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2023 David Forsgren Piuva
 // 
 // 
 // This software is provided 'as-is', without any express or implied
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
 // warranty. In no event will the authors be held liable for any damages
@@ -66,15 +66,7 @@ struct TriangleInput {
 
 
 // The template for function pointers doing the work
 // The template for function pointers doing the work
 inline void drawCallbackTemplate(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {}
 inline void drawCallbackTemplate(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {}
-#define DRAW_CALLBACK_TYPE decltype(&drawCallbackTemplate)
-
-// Inherit this class for pixel shaders
-class Shader {
-public:
-	void fillShape(ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter);
-	// The main call that defines the pixel shader
-	virtual Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const = 0;
-};
+using DRAW_CALLBACK_TYPE = decltype(&drawCallbackTemplate);
 
 
 }
 }
 
 

+ 463 - 434
Source/DFPSR/render/shader/Shader.cpp → Source/DFPSR/render/shader/fillerTemplates.h

@@ -1,434 +1,463 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "Shader.h"
-#include <stdio.h>
-#include <algorithm>
-#include "../../image/internal/imageInternal.h"
-#include "../../image/ImageRgbaU8.h"
-#include "../../image/ImageF32.h"
-
-using namespace dsr;
-
-inline static const uint32_t roundUpEven(uint32_t x) {
-	return (x + 1u) & ~1u;
-}
-
-inline static const uint32_t roundDownEven(uint32_t x) {
-	return x & ~1u;
-}
-
-template<bool CLIP_SIDES>
-static inline U32x4 clippedRead(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3) {
-	if (CLIP_SIDES) {
-		return U32x4(vis0 ? upperLeft[0] : 0, vis1 ? upperLeft[1] : 0, vis2 ? lowerLeft[0] : 0, vis3 ? lowerLeft[1] : 0);
-	} else {
-		return U32x4(upperLeft[0], upperLeft[1], lowerLeft[0], lowerLeft[1]);
-	}
-}
-
-static inline void clippedWrite(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, U32x4 vColor) {
-	// Read back SIMD vector to scalar type
-	UVector4D color = vColor.get();
-	// Write colors for visible pixels
-	if (vis0) { upperLeft[0] = color.x; }
-	if (vis1) { upperLeft[1] = color.y; }
-	if (vis2) { lowerLeft[0] = color.z; }
-	if (vis3) { lowerLeft[1] = color.w; }
-}
-
-static inline void clippedWrite(SafePointer<float> upperLeft, SafePointer<float> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, FVector4D depth) {
-	// Write colors for visible pixels
-	if (vis0) { upperLeft[0] = depth.x; }
-	if (vis1) { upperLeft[1] = depth.y; }
-	if (vis2) { lowerLeft[0] = depth.z; }
-	if (vis3) { lowerLeft[1] = depth.w; }
-}
-
-template<bool CLIP_SIDES>
-static inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lowerRow, bool &clip0, bool &clip1, bool &clip2, bool &clip3) {
-	if (CLIP_SIDES) {
-		int x2 = x + 1;
-		clip0 = x >= upperRow.left && x < upperRow.right;
-		clip1 = x2 >= upperRow.left && x2 < upperRow.right;
-		clip2 = x >= lowerRow.left && x < lowerRow.right;
-		clip3 = x2 >= lowerRow.left && x2 < lowerRow.right;
-	} else {
-		clip0 = true;
-		clip1 = true;
-		clip2 = true;
-		clip3 = true;
-	}
-}
-
-template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>
-static inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {
-	// Clip pixels
-	bool clip0, clip1, clip2, clip3;
-	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);
-	// Compare to depth buffer
-	bool front0, front1, front2, front3;
-	if (DEPTH_READ) {
-		if (AFFINE) {
-			if (CLIP_SIDES) {
-				front0 = clip0 ? depth.x < depthDataUpper[0] : false;
-				front1 = clip1 ? depth.y < depthDataUpper[1] : false;
-				front2 = clip2 ? depth.z < depthDataLower[0] : false;
-				front3 = clip3 ? depth.w < depthDataLower[1] : false;
-			} else {
-				front0 = depth.x < depthDataUpper[0];
-				front1 = depth.y < depthDataUpper[1];
-				front2 = depth.z < depthDataLower[0];
-				front3 = depth.w < depthDataLower[1];
-			}
-		} else {
-			if (CLIP_SIDES) {
-				front0 = clip0 ? depth.x > depthDataUpper[0] : false;
-				front1 = clip1 ? depth.y > depthDataUpper[1] : false;
-				front2 = clip2 ? depth.z > depthDataLower[0] : false;
-				front3 = clip3 ? depth.w > depthDataLower[1] : false;
-			} else {
-				front0 = depth.x > depthDataUpper[0];
-				front1 = depth.y > depthDataUpper[1];
-				front2 = depth.z > depthDataLower[0];
-				front3 = depth.w > depthDataLower[1];
-			}
-		}
-	} else {
-		front0 = true;
-		front1 = true;
-		front2 = true;
-		front3 = true;
-	}
-	// Decide visibility
-	vis0 = clip0 && front0;
-	vis1 = clip1 && front1;
-	vis2 = clip2 && front2;
-	vis3 = clip3 && front3;
-}
-
-template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
-inline static void fillQuadSuper(const Shader& shader, int x, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder, const FVector4D &depth, const F32x4x3 &weights) {
-	// Get visibility
-	bool vis0, vis1, vis2, vis3;
-	getVisibility<CLIP_SIDES, DEPTH_READ, AFFINE>(x, upperRow, lowerRow, depth, depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3);
-	// Draw if something is visible
-	if (vis0 || vis1 || vis2 || vis3) {
-		if (COLOR_WRITE) {
-			// Get the color
-			U32x4 packedColor(0u); // Allow uninitialized memory?
-			// Execute the shader
-			Rgba_F32 planarSourceColor = shader.getPixels_2x2(weights);
-			// Apply alpha filtering
-			if (FILTER == Filter::Alpha) {
-				// Get opacity from the source color
-				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);
-				// Read the packed colors for alpha blending
-				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);
-				// Unpack the target color into planar RGBA format so that it can be mixed with the source color
-				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);
-				// Blend linearly using floats
-				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));
-			}
-			// Apply channel swapping while packing to bytes
-			packedColor = planarSourceColor.toSaturatedByte(targetPackingOrder);
-			// Write colors
-			clippedWrite(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3, packedColor);
-		}
-		// Write depth for visible pixels
-		if (DEPTH_WRITE) {
-			clippedWrite(depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3, depth);
-		}
-	}
-}
-
-// CLIP_SIDES will use upperRow and lowerRow to clip pixels based on the x value. Only x values inside the ranges can be drawn.
-//   This is used along the triangle edges.
-// COLOR_WRITE can be disabled to skip writing to the color buffer. Usually when none is given.
-// DEPTH_READ can be disabled to draw without caring if there is something already closer in the depth buffer.
-// DEPTH_WRITE can be disabled to skip writing to the depth buffer so that it does not occlude following draw calls.
-// FILTER can be set to Filter::Alpha to use the output alpha as the opacity.
-template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
-static inline void fillRowSuper(const Shader& shader, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {
-	if (AFFINE) {
-		FVector3D dx2 = pWeightDx * 2.0f;
-		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
-		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
-		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
-		for (int x = startX; x < endX; x += 2) {
-			// Get the linear depth
-			FVector4D depth = vLinearDepth.get();
-			// Calculate the weight of the first vertex from the other two
-			F32x4 weightA = 1.0f - (weightB + weightC);
-			F32x4x3 weights(weightA, weightB, weightC);
-			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
-			// Iterate projection
-			vLinearDepth = vLinearDepth + dx2.x;
-			weightB = weightB + dx2.y;
-			weightC = weightC + dx2.z;
-			// Iterate buffer pointers
-			pixelDataUpper += 2; pixelDataLower += 2;
-			depthDataUpper += 2; depthDataLower += 2;
-		}
-	} else {
-		FVector3D dx2 = pWeightDx * 2.0f;
-		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
-		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
-		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
-		for (int x = startX; x < endX; x += 2) {
-			// Get the reciprocal depth
-			FVector4D depth = vRecDepth.get();
-			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...
-			// Divide 1 by 1 / W to get the linear depth W
-			F32x4 vLinearDepth = vRecDepth.reciprocal();
-			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.
-			F32x4 weightB = vRecU * vLinearDepth;
-			F32x4 weightC = vRecV * vLinearDepth;
-			// Calculate the weight of the first vertex from the other two
-			F32x4 weightA = 1.0f - (weightB + weightC);
-			F32x4x3 weights(weightA, weightB, weightC);
-			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
-			// Iterate projection
-			vRecDepth = vRecDepth + dx2.x;
-			vRecU = vRecU + dx2.y;
-			vRecV = vRecV + dx2.z;
-			// Iterate buffer pointers
-			pixelDataUpper += 2; pixelDataLower += 2;
-			depthDataUpper += 2; depthDataLower += 2;
-		}
-	}
-}
-
-template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
-inline static void fillShapeSuper(const Shader& shader, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {
-	// Prepare constants
-	const int targetStride = imageInternal::getStride(colorBuffer);
-	const int depthBufferStride = imageInternal::getStride(depthBuffer);
-	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;
-	const int colorRowSize = imageInternal::getRowSize(colorBuffer);
-	const int depthRowSize = imageInternal::getRowSize(depthBuffer);
-	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);
-	const int colorHeight = imageInternal::getHeight(colorBuffer);
-	const int depthHeight = imageInternal::getHeight(depthBuffer);
-	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;
-
-	// Initialize row pointers for color buffer
-	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;
-	if (COLOR_WRITE) {
-		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);
-		pixelDataUpperRow = targetData;
-		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);
-		pixelDataLowerRow = targetData;
-		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);
-	} else {
-		pixelDataUpperRow = SafePointer<uint32_t>();
-		pixelDataLowerRow = SafePointer<uint32_t>();
-	}
-
-	// Initialize row pointers for depth buffer
-	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;
-	if (DEPTH_READ || DEPTH_WRITE) {
-		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);
-		depthDataUpperRow = depthBufferData;
-		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);
-		depthDataLowerRow = depthBufferData;
-		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);
-	} else {
-		depthDataUpperRow = SafePointer<float>();
-		depthDataLowerRow = SafePointer<float>();
-	}
-	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {
-		int y2 = y1 + 1;
-		RowInterval upperRow = shape.rows[y1 - shape.startRow];
-		RowInterval lowerRow = shape.rows[y2 - shape.startRow];
-		int outerStart = min(upperRow.left, lowerRow.left);
-		int outerEnd = max(upperRow.right, lowerRow.right);
-		int innerStart = max(upperRow.left, lowerRow.left);
-		int innerEnd = min(upperRow.right, lowerRow.right);
-		// Round exclusive intervals to multiples of two pixels
-		int outerBlockStart = roundDownEven(outerStart);
-		int outerBlockEnd = roundUpEven(outerEnd);
-		int innerBlockStart = roundUpEven(innerStart);
-		int innerBlockEnd = roundDownEven(innerEnd);
-		// Clip last row if outside on odd height
-		if (y2 >= maxHeight) {
-			lowerRow.right = lowerRow.left;
-		}
-		// Avoid reading outside of the given bound
-		bool hasTop = upperRow.right > upperRow.left;
-		bool hasBottom = lowerRow.right > lowerRow.left;
-		if (hasTop || hasBottom) {
-			// Initialize pointers
-			if (COLOR_WRITE) {
-				if (hasTop) {
-					pixelDataUpper = pixelDataUpperRow.slice("pixelDataUpper", 0, colorRowSize);
-				} else {
-					// Repeat the lower row to avoid reading outside
-					pixelDataUpper = pixelDataLowerRow.slice("pixelDataUpper (from lower)", 0, colorRowSize);
-				}
-				if (hasBottom) {
-					pixelDataLower = pixelDataLowerRow.slice("pixelDataLower", 0, colorRowSize);
-				} else {
-					// Repeat the upper row to avoid reading outside
-					pixelDataLower = pixelDataUpperRow.slice("pixelDataLower (from upper)", 0, colorRowSize);
-				}
-				int startColorOffset = outerBlockStart * sizeof(uint32_t);
-				pixelDataUpper.increaseBytes(startColorOffset);
-				pixelDataLower.increaseBytes(startColorOffset);
-			}
-			if (DEPTH_READ || DEPTH_WRITE) {
-				if (hasTop) {
-					depthDataUpper = depthDataUpperRow.slice("depthDataUpper", 0, depthRowSize);
-				} else {
-					// Repeat the upper row to avoid reading outside
-					depthDataUpper = depthDataLowerRow.slice("depthDataUpper (from lower)", 0, depthRowSize);
-				}
-				if (hasBottom) {
-					depthDataLower = depthDataLowerRow.slice("depthDataLower", 0, depthRowSize);
-				} else {
-					// Repeat the upper row to avoid reading outside
-					depthDataLower = depthDataUpperRow.slice("depthDataLower (from upper)", 0, depthRowSize);
-				}
-				depthDataUpper += outerBlockStart;
-				depthDataLower += outerBlockStart;
-			} else {
-				depthDataUpper = SafePointer<float>();
-				depthDataLower = SafePointer<float>();
-			}
-			// Initialize projection
-			FVector3D pWeightUpperRow;
-			if (AFFINE) {
-				pWeightUpperRow = projection.getWeight_affine(IVector2D(outerBlockStart, y1));
-			} else {
-				pWeightUpperRow = projection.getDepthDividedWeight_perspective(IVector2D(outerBlockStart, y1));
-			}
-			FVector3D pWeightUpper = pWeightUpperRow;
-			FVector3D pWeightLowerRow = pWeightUpperRow + projection.pWeightDy;
-			FVector3D pWeightLower = pWeightLowerRow;
-			// Render the pixels
-			if (innerBlockEnd <= innerBlockStart) {
-				// Clipped from left and right
-				for (int32_t x = outerBlockStart; x < outerBlockEnd; x += 2) {
-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
-				}
-			} else {
-				// Left edge
-				for (int32_t x = outerBlockStart; x < innerBlockStart; x += 2) {
-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
-				}
-				// Full quads
-				int width = innerBlockEnd - innerBlockStart;
-				int quadCount = width / 2;
-				fillRowSuper<false, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
-				  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, innerBlockStart, innerBlockEnd, RowInterval(), RowInterval(), targetPackingOrder);
-				if (COLOR_WRITE) { pixelDataUpper += 2 * quadCount; pixelDataLower += 2 * quadCount; }
-				if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2 * quadCount; depthDataLower += 2 * quadCount; }
-				pWeightUpper = pWeightUpper + (doublePWeightDx * quadCount); pWeightLower = pWeightLower + (doublePWeightDx * quadCount);
-				// Right edge
-				for (int32_t x = innerBlockEnd; x < outerBlockEnd; x += 2) {
-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
-				}
-			}
-		}
-		// Iterate to the next row
-		if (COLOR_WRITE) {
-			pixelDataUpperRow.increaseBytes(targetStride * 2);
-			pixelDataLowerRow.increaseBytes(targetStride * 2);
-		}
-		if (DEPTH_READ || DEPTH_WRITE) {
-			depthDataUpperRow.increaseBytes(depthBufferStride * 2);
-			depthDataLowerRow.increaseBytes(depthBufferStride * 2);
-		}
-	}
-}
-
-void Shader::fillShape(ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
-	bool hasColorBuffer = colorBuffer != nullptr;
-	bool hasDepthBuffer = depthBuffer != nullptr;
-	if (projection.affine) {
-		if (hasDepthBuffer) {
-			if (hasColorBuffer) {
-				if (filter != Filter::Solid) {
-					// Alpha filtering with read only depth buffer
-					fillShapeSuper<true, true, false, Filter::Alpha, true>(*this, colorBuffer, depthBuffer, triangle, projection, shape);
-				} else {
-					// Solid with depth buffer
-					fillShapeSuper<true, true, true, Filter::Solid, true>(*this, colorBuffer, depthBuffer, triangle, projection, shape);
-				}
-			} else {
-				// Solid depth
-				// TODO: Use for orthogonal depth based shadows
-				fillShapeSuper<false, true, true, Filter::Solid, true>(*this, nullptr, depthBuffer, triangle, projection, shape);
-			}
-		} else {
-			if (hasColorBuffer) {
-				if (filter != Filter::Solid) {
-					// Alpha filtering without depth buffer
-					fillShapeSuper<true, false, false, Filter::Alpha, true>(*this, colorBuffer, nullptr, triangle, projection, shape);
-				} else {
-					// Solid without depth buffer
-					fillShapeSuper<true, false, false, Filter::Solid, true>(*this, colorBuffer, nullptr, triangle, projection, shape);
-				}
-			}
-		}
-	} else {
-		if (hasDepthBuffer) {
-			if (hasColorBuffer) {
-				if (filter != Filter::Solid) {
-					// Alpha filtering with read only depth buffer
-					fillShapeSuper<true, true, false, Filter::Alpha, false>(*this, colorBuffer, depthBuffer, triangle, projection, shape);
-				} else {
-					// Solid with depth buffer
-					fillShapeSuper<true, true, true, Filter::Solid, false>(*this, colorBuffer, depthBuffer, triangle, projection, shape);
-				}
-			} else {
-				// Solid depth
-				// TODO: Use for depth based shadows with perspective projection
-				fillShapeSuper<false, true, true, Filter::Solid, false>(*this, nullptr, depthBuffer, triangle, projection, shape);
-			}
-		} else {
-			if (hasColorBuffer) {
-				if (filter != Filter::Solid) {
-					// Alpha filtering without depth buffer
-					fillShapeSuper<true, false, false, Filter::Alpha, false>(*this, colorBuffer, nullptr, triangle, projection, shape);
-				} else {
-					// Solid without depth buffer
-					fillShapeSuper<true, false, false, Filter::Solid, false>(*this, colorBuffer, nullptr, triangle, projection, shape);
-				}
-			}
-		}
-	}
-}
-
+// zlib open source license
+//
+// Copyright (c) 2017 to 2023 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_RENDER_FILLER_TEMPLATES
+#define DFPSR_RENDER_FILLER_TEMPLATES
+
+#include <stdint.h>
+#include "../../image/PackOrder.h"
+#include "../../image/ImageRgbaU8.h"
+#include "../../image/ImageF32.h"
+#include "../ITriangle2D.h"
+#include "shaderTypes.h"
+
+namespace dsr {
+
+// Function for filling pixels
+using PixelShadingCallback = std::function<Rgba_F32(void *data, const F32x4x3 &vertexWeights)>;
+
+inline bool almostZero(float value) {
+	return value > -0.001f && value < 0.001f;
+}
+
+inline bool almostZero(const FVector3D &channel) {
+	return almostZero(channel.x) && almostZero(channel.y) && almostZero(channel.z);
+}
+
+inline bool almostOne(float value) {
+	return value > 0.999f && value < 1.001f;
+}
+
+inline bool almostOne(const FVector3D &channel) {
+	return almostOne(channel.x) && almostOne(channel.y) && almostOne(channel.z);
+}
+
+inline bool almostSame(const FVector3D &channel) {
+	return almostZero(channel.x - channel.y) && almostZero(channel.x - channel.z) && almostZero(channel.y - channel.z);
+}
+
+inline const uint32_t roundUpEven(uint32_t x) {
+	return (x + 1u) & ~1u;
+}
+
+inline const uint32_t roundDownEven(uint32_t x) {
+	return x & ~1u;
+}
+
+template<bool CLIP_SIDES>
+inline U32x4 clippedRead(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3) {
+	if (CLIP_SIDES) {
+		return U32x4(vis0 ? upperLeft[0] : 0, vis1 ? upperLeft[1] : 0, vis2 ? lowerLeft[0] : 0, vis3 ? lowerLeft[1] : 0);
+	} else {
+		return U32x4(upperLeft[0], upperLeft[1], lowerLeft[0], lowerLeft[1]);
+	}
+}
+
+inline void clippedWrite(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, U32x4 vColor) {
+	// Read back SIMD vector to scalar type
+	UVector4D color = vColor.get();
+	// Write colors for visible pixels
+	if (vis0) { upperLeft[0] = color.x; }
+	if (vis1) { upperLeft[1] = color.y; }
+	if (vis2) { lowerLeft[0] = color.z; }
+	if (vis3) { lowerLeft[1] = color.w; }
+}
+
+inline void clippedWrite(SafePointer<float> upperLeft, SafePointer<float> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, FVector4D depth) {
+	// Write colors for visible pixels
+	if (vis0) { upperLeft[0] = depth.x; }
+	if (vis1) { upperLeft[1] = depth.y; }
+	if (vis2) { lowerLeft[0] = depth.z; }
+	if (vis3) { lowerLeft[1] = depth.w; }
+}
+
+template<bool CLIP_SIDES>
+inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lowerRow, bool &clip0, bool &clip1, bool &clip2, bool &clip3) {
+	if (CLIP_SIDES) {
+		int x2 = x + 1;
+		clip0 = x >= upperRow.left && x < upperRow.right;
+		clip1 = x2 >= upperRow.left && x2 < upperRow.right;
+		clip2 = x >= lowerRow.left && x < lowerRow.right;
+		clip3 = x2 >= lowerRow.left && x2 < lowerRow.right;
+	} else {
+		clip0 = true;
+		clip1 = true;
+		clip2 = true;
+		clip3 = true;
+	}
+}
+
+template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>
+inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {
+	// Clip pixels
+	bool clip0, clip1, clip2, clip3;
+	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);
+	// Compare to depth buffer
+	bool front0, front1, front2, front3;
+	if (DEPTH_READ) {
+		if (AFFINE) {
+			if (CLIP_SIDES) {
+				front0 = clip0 ? depth.x < depthDataUpper[0] : false;
+				front1 = clip1 ? depth.y < depthDataUpper[1] : false;
+				front2 = clip2 ? depth.z < depthDataLower[0] : false;
+				front3 = clip3 ? depth.w < depthDataLower[1] : false;
+			} else {
+				front0 = depth.x < depthDataUpper[0];
+				front1 = depth.y < depthDataUpper[1];
+				front2 = depth.z < depthDataLower[0];
+				front3 = depth.w < depthDataLower[1];
+			}
+		} else {
+			if (CLIP_SIDES) {
+				front0 = clip0 ? depth.x > depthDataUpper[0] : false;
+				front1 = clip1 ? depth.y > depthDataUpper[1] : false;
+				front2 = clip2 ? depth.z > depthDataLower[0] : false;
+				front3 = clip3 ? depth.w > depthDataLower[1] : false;
+			} else {
+				front0 = depth.x > depthDataUpper[0];
+				front1 = depth.y > depthDataUpper[1];
+				front2 = depth.z > depthDataLower[0];
+				front3 = depth.w > depthDataLower[1];
+			}
+		}
+	} else {
+		front0 = true;
+		front1 = true;
+		front2 = true;
+		front3 = true;
+	}
+	// Decide visibility
+	vis0 = clip0 && front0;
+	vis1 = clip1 && front1;
+	vis2 = clip2 && front2;
+	vis3 = clip3 && front3;
+}
+
+template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
+inline void fillQuadSuper(void *data, PixelShadingCallback pixelShaderFunction, int x, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder, const FVector4D &depth, const F32x4x3 &weights) {
+	// Get visibility
+	bool vis0, vis1, vis2, vis3;
+	getVisibility<CLIP_SIDES, DEPTH_READ, AFFINE>(x, upperRow, lowerRow, depth, depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3);
+	// Draw if something is visible
+	if (vis0 || vis1 || vis2 || vis3) {
+		if (COLOR_WRITE) {
+			// Get the color
+			U32x4 packedColor(0u); // Allow uninitialized memory?
+			// Execute the shader
+			Rgba_F32 planarSourceColor = pixelShaderFunction(data, weights);
+			// Apply alpha filtering
+			if (FILTER == Filter::Alpha) {
+				// Get opacity from the source color
+				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);
+				// Read the packed colors for alpha blending
+				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);
+				// Unpack the target color into planar RGBA format so that it can be mixed with the source color
+				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);
+				// Blend linearly using floats
+				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));
+			}
+			// Apply channel swapping while packing to bytes
+			packedColor = planarSourceColor.toSaturatedByte(targetPackingOrder);
+			// Write colors
+			clippedWrite(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3, packedColor);
+		}
+		// Write depth for visible pixels
+		if (DEPTH_WRITE) {
+			clippedWrite(depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3, depth);
+		}
+	}
+}
+
+// CLIP_SIDES will use upperRow and lowerRow to clip pixels based on the x value. Only x values inside the ranges can be drawn.
+//   This is used along the triangle edges.
+// COLOR_WRITE can be disabled to skip writing to the color buffer. Usually when none is given.
+// DEPTH_READ can be disabled to draw without caring if there is something already closer in the depth buffer.
+// DEPTH_WRITE can be disabled to skip writing to the depth buffer so that it does not occlude following draw calls.
+// FILTER can be set to Filter::Alpha to use the output alpha as the opacity.
+template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
+inline void fillRowSuper(void *data, PixelShadingCallback pixelShaderFunction, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {
+	if (AFFINE) {
+		FVector3D dx2 = pWeightDx * 2.0f;
+		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
+		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
+		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
+		for (int x = startX; x < endX; x += 2) {
+			// Get the linear depth
+			FVector4D depth = vLinearDepth.get();
+			// Calculate the weight of the first vertex from the other two
+			F32x4 weightA = 1.0f - (weightB + weightC);
+			F32x4x3 weights(weightA, weightB, weightC);
+			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(data, pixelShaderFunction, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
+			// Iterate projection
+			vLinearDepth = vLinearDepth + dx2.x;
+			weightB = weightB + dx2.y;
+			weightC = weightC + dx2.z;
+			// Iterate buffer pointers
+			pixelDataUpper += 2; pixelDataLower += 2;
+			depthDataUpper += 2; depthDataLower += 2;
+		}
+	} else {
+		FVector3D dx2 = pWeightDx * 2.0f;
+		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
+		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
+		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
+		for (int x = startX; x < endX; x += 2) {
+			// Get the reciprocal depth
+			FVector4D depth = vRecDepth.get();
+			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...
+			// Divide 1 by 1 / W to get the linear depth W
+			F32x4 vLinearDepth = vRecDepth.reciprocal();
+			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.
+			F32x4 weightB = vRecU * vLinearDepth;
+			F32x4 weightC = vRecV * vLinearDepth;
+			// Calculate the weight of the first vertex from the other two
+			F32x4 weightA = 1.0f - (weightB + weightC);
+			F32x4x3 weights(weightA, weightB, weightC);
+			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(data, pixelShaderFunction, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
+			// Iterate projection
+			vRecDepth = vRecDepth + dx2.x;
+			vRecU = vRecU + dx2.y;
+			vRecV = vRecV + dx2.z;
+			// Iterate buffer pointers
+			pixelDataUpper += 2; pixelDataLower += 2;
+			depthDataUpper += 2; depthDataLower += 2;
+		}
+	}
+}
+
+template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
+inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {
+	// Prepare constants
+	const int targetStride = imageInternal::getStride(colorBuffer);
+	const int depthBufferStride = imageInternal::getStride(depthBuffer);
+	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;
+	const int colorRowSize = imageInternal::getRowSize(colorBuffer);
+	const int depthRowSize = imageInternal::getRowSize(depthBuffer);
+	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);
+	const int colorHeight = imageInternal::getHeight(colorBuffer);
+	const int depthHeight = imageInternal::getHeight(depthBuffer);
+	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;
+
+	// Initialize row pointers for color buffer
+	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;
+	if (COLOR_WRITE) {
+		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);
+		pixelDataUpperRow = targetData;
+		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);
+		pixelDataLowerRow = targetData;
+		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);
+	} else {
+		pixelDataUpperRow = SafePointer<uint32_t>();
+		pixelDataLowerRow = SafePointer<uint32_t>();
+	}
+
+	// Initialize row pointers for depth buffer
+	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;
+	if (DEPTH_READ || DEPTH_WRITE) {
+		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);
+		depthDataUpperRow = depthBufferData;
+		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);
+		depthDataLowerRow = depthBufferData;
+		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);
+	} else {
+		depthDataUpperRow = SafePointer<float>();
+		depthDataLowerRow = SafePointer<float>();
+	}
+	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {
+		int y2 = y1 + 1;
+		RowInterval upperRow = shape.rows[y1 - shape.startRow];
+		RowInterval lowerRow = shape.rows[y2 - shape.startRow];
+		int outerStart = min(upperRow.left, lowerRow.left);
+		int outerEnd = max(upperRow.right, lowerRow.right);
+		int innerStart = max(upperRow.left, lowerRow.left);
+		int innerEnd = min(upperRow.right, lowerRow.right);
+		// Round exclusive intervals to multiples of two pixels
+		int outerBlockStart = roundDownEven(outerStart);
+		int outerBlockEnd = roundUpEven(outerEnd);
+		int innerBlockStart = roundUpEven(innerStart);
+		int innerBlockEnd = roundDownEven(innerEnd);
+		// Clip last row if outside on odd height
+		if (y2 >= maxHeight) {
+			lowerRow.right = lowerRow.left;
+		}
+		// Avoid reading outside of the given bound
+		bool hasTop = upperRow.right > upperRow.left;
+		bool hasBottom = lowerRow.right > lowerRow.left;
+		if (hasTop || hasBottom) {
+			// Initialize pointers
+			if (COLOR_WRITE) {
+				if (hasTop) {
+					pixelDataUpper = pixelDataUpperRow.slice("pixelDataUpper", 0, colorRowSize);
+				} else {
+					// Repeat the lower row to avoid reading outside
+					pixelDataUpper = pixelDataLowerRow.slice("pixelDataUpper (from lower)", 0, colorRowSize);
+				}
+				if (hasBottom) {
+					pixelDataLower = pixelDataLowerRow.slice("pixelDataLower", 0, colorRowSize);
+				} else {
+					// Repeat the upper row to avoid reading outside
+					pixelDataLower = pixelDataUpperRow.slice("pixelDataLower (from upper)", 0, colorRowSize);
+				}
+				int startColorOffset = outerBlockStart * sizeof(uint32_t);
+				pixelDataUpper.increaseBytes(startColorOffset);
+				pixelDataLower.increaseBytes(startColorOffset);
+			}
+			if (DEPTH_READ || DEPTH_WRITE) {
+				if (hasTop) {
+					depthDataUpper = depthDataUpperRow.slice("depthDataUpper", 0, depthRowSize);
+				} else {
+					// Repeat the upper row to avoid reading outside
+					depthDataUpper = depthDataLowerRow.slice("depthDataUpper (from lower)", 0, depthRowSize);
+				}
+				if (hasBottom) {
+					depthDataLower = depthDataLowerRow.slice("depthDataLower", 0, depthRowSize);
+				} else {
+					// Repeat the upper row to avoid reading outside
+					depthDataLower = depthDataUpperRow.slice("depthDataLower (from upper)", 0, depthRowSize);
+				}
+				depthDataUpper += outerBlockStart;
+				depthDataLower += outerBlockStart;
+			} else {
+				depthDataUpper = SafePointer<float>();
+				depthDataLower = SafePointer<float>();
+			}
+			// Initialize projection
+			FVector3D pWeightUpperRow;
+			if (AFFINE) {
+				pWeightUpperRow = projection.getWeight_affine(IVector2D(outerBlockStart, y1));
+			} else {
+				pWeightUpperRow = projection.getDepthDividedWeight_perspective(IVector2D(outerBlockStart, y1));
+			}
+			FVector3D pWeightUpper = pWeightUpperRow;
+			FVector3D pWeightLowerRow = pWeightUpperRow + projection.pWeightDy;
+			FVector3D pWeightLower = pWeightLowerRow;
+			// Render the pixels
+			if (innerBlockEnd <= innerBlockStart) {
+				// Clipped from left and right
+				for (int32_t x = outerBlockStart; x < outerBlockEnd; x += 2) {
+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
+				}
+			} else {
+				// Left edge
+				for (int32_t x = outerBlockStart; x < innerBlockStart; x += 2) {
+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
+				}
+				// Full quads
+				int width = innerBlockEnd - innerBlockStart;
+				int quadCount = width / 2;
+				fillRowSuper<false, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
+				  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, innerBlockStart, innerBlockEnd, RowInterval(), RowInterval(), targetPackingOrder);
+				if (COLOR_WRITE) { pixelDataUpper += 2 * quadCount; pixelDataLower += 2 * quadCount; }
+				if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2 * quadCount; depthDataLower += 2 * quadCount; }
+				pWeightUpper = pWeightUpper + (doublePWeightDx * quadCount); pWeightLower = pWeightLower + (doublePWeightDx * quadCount);
+				// Right edge
+				for (int32_t x = innerBlockEnd; x < outerBlockEnd; x += 2) {
+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
+				}
+			}
+		}
+		// Iterate to the next row
+		if (COLOR_WRITE) {
+			pixelDataUpperRow.increaseBytes(targetStride * 2);
+			pixelDataLowerRow.increaseBytes(targetStride * 2);
+		}
+		if (DEPTH_READ || DEPTH_WRITE) {
+			depthDataUpperRow.increaseBytes(depthBufferStride * 2);
+			depthDataLowerRow.increaseBytes(depthBufferStride * 2);
+		}
+	}
+}
+
+inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
+	bool hasColorBuffer = colorBuffer != nullptr;
+	bool hasDepthBuffer = depthBuffer != nullptr;
+	if (projection.affine) {
+		if (hasDepthBuffer) {
+			if (hasColorBuffer) {
+				if (filter != Filter::Solid) {
+					// Alpha filtering with read only depth buffer
+					fillShapeSuper<true, true, false, Filter::Alpha, true>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
+				} else {
+					// Solid with depth buffer
+					fillShapeSuper<true, true, true, Filter::Solid, true>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
+				}
+			} else {
+				// Solid depth
+				// TODO: Use for orthogonal depth based shadows
+				fillShapeSuper<false, true, true, Filter::Solid, true>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
+			}
+		} else {
+			if (hasColorBuffer) {
+				if (filter != Filter::Solid) {
+					// Alpha filtering without depth buffer
+					fillShapeSuper<true, false, false, Filter::Alpha, true>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
+				} else {
+					// Solid without depth buffer
+					fillShapeSuper<true, false, false, Filter::Solid, true>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
+				}
+			}
+		}
+	} else {
+		if (hasDepthBuffer) {
+			if (hasColorBuffer) {
+				if (filter != Filter::Solid) {
+					// Alpha filtering with read only depth buffer
+					fillShapeSuper<true, true, false, Filter::Alpha, false>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
+				} else {
+					// Solid with depth buffer
+					fillShapeSuper<true, true, true, Filter::Solid, false>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
+				}
+			} else {
+				// Solid depth
+				// TODO: Use for depth based shadows with perspective projection
+				fillShapeSuper<false, true, true, Filter::Solid, false>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
+			}
+		} else {
+			if (hasColorBuffer) {
+				if (filter != Filter::Solid) {
+					// Alpha filtering without depth buffer
+					fillShapeSuper<true, false, false, Filter::Alpha, false>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
+				} else {
+					// Solid without depth buffer
+					fillShapeSuper<true, false, false, Filter::Solid, false>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
+				}
+			}
+		}
+	}
+}
+
+}
+
+#endif