2 anni fa · d901a326eb
--- a/Source/DFPSR/render/renderCore.cpp
+++ b/Source/DFPSR/render/renderCore.cpp
@@ -30,10 +30,6 @@
 
				 

			
 
				 using namespace dsr;

			
 
				 

			
 
				-//#define DISABLE_VERTEX_COLOR

			
 
				-//#define DISABLE_DIFFUSE_MAP

			
 
				-//#define DISABLE_LIGHT_MAP

			
 
				-

			
 
				 class SubVertex {

			
 
				 public:

			
 
				 	FVector3D cs; // Camera space position based on the weights

			
@@ -201,26 +197,6 @@ Visibility dsr::getTriangleVisibility(const ITriangle2D &triangle, const Camera
 
				 	return Visibility::Full;

			
 
				 }

			
 
				 

			
 
				-static bool almostZero(float value) {

			
 
				-	return value > -0.001f && value < 0.001f;

			
 
				-}

			
 
				-

			
 
				-static bool almostZero(const FVector3D &channel) {

			
 
				-	return almostZero(channel.x) && almostZero(channel.y) && almostZero(channel.z);

			
 
				-}

			
 
				-

			
 
				-static bool almostOne(float value) {

			
 
				-	return value > 0.999f && value < 1.001f;

			
 
				-}

			
 
				-

			
 
				-static bool almostOne(const FVector3D &channel) {

			
 
				-	return almostOne(channel.x) && almostOne(channel.y) && almostOne(channel.z);

			
 
				-}

			
 
				-

			
 
				-static bool almostSame(const FVector3D &channel) {

			
 
				-	return almostZero(channel.x - channel.y) && almostZero(channel.x - channel.z) && almostZero(channel.y - channel.z);

			
 
				-}

			
 
				-

			
 
				 static const int alignX = 2;

			
 
				 static const int alignY = 2;

			
 
				 

			
@@ -338,81 +314,9 @@ void dsr::renderTriangleFromData(
 
				 	// Only draw visible triangles

			
 
				 	Visibility visibility = getTriangleVisibility(triangle, camera, false);

			
 
				 	if (visibility != Visibility::Hidden) {

			
 
				-		// Disable features when debugging

			
 
				-		#ifdef DISABLE_VERTEX_COLOR

			
 
				-			colors = TriangleColors(1.0f);

			
 
				-		#endif

			
 
				-		#ifdef DISABLE_DIFFUSE_MAP

			
 
				-			diffuse = nullptr;

			
 
				-		#endif

			
 
				-		#ifdef DISABLE_LIGHT_MAP

			
 
				-			light = nullptr;

			
 
				-		#endif

			
 
				 		// Select an instance of the default shader

			
 
				 		if (!(filter == Filter::Alpha && almostZero(colors.alpha))) {

			
 
				-			bool hasVertexFade = !(almostSame(colors.red) && almostSame(colors.green) && almostSame(colors.blue) && almostSame(colors.alpha));

			
 
				-			bool colorless = almostOne(colors.red) && almostOne(colors.green) && almostOne(colors.blue) && almostOne(colors.alpha);

			
 
				-			// Get the function pointer to the correct shader

			
 
				-			DRAW_CALLBACK_TYPE drawTask = &drawCallbackTemplate;

			
 
				-			if (diffuse) {

			
 
				-				bool hasDiffusePyramid = diffuse->texture.hasMipBuffer();

			
 
				-				if (light) {

			
 
				-					if (hasVertexFade) { // DiffuseLightVertex

			
 
				-						if (hasDiffusePyramid) { // With mipmap

			
 
				-							drawTask = &(Shader_RgbaMultiply<true, true, true, false, false>::processTriangle);

			
 
				-						} else { // Without mipmap

			
 
				-							drawTask = &(Shader_RgbaMultiply<true, true, true, false, true>::processTriangle);

			
 
				-						}

			
 
				-					} else { // DiffuseLight

			
 
				-						if (hasDiffusePyramid) { // With mipmap

			
 
				-							drawTask = &(Shader_RgbaMultiply<true, true, false, false, false>::processTriangle);

			
 
				-						} else { // Without mipmap

			
 
				-							drawTask = &(Shader_RgbaMultiply<true, true, false, false, true>::processTriangle);

			
 
				-						}

			
 
				-					}

			
 
				-				} else {

			
 
				-					if (hasVertexFade) { // DiffuseVertex

			
 
				-						if (hasDiffusePyramid) { // With mipmap

			
 
				-							drawTask = &(Shader_RgbaMultiply<true, false, true, false, false>::processTriangle);

			
 
				-						} else { // Without mipmap

			
 
				-							drawTask = &(Shader_RgbaMultiply<true, false, true, false, true>::processTriangle);

			
 
				-						}

			
 
				-					} else {

			
 
				-						if (colorless) { // Diffuse without normalization

			
 
				-							if (hasDiffusePyramid) { // With mipmap

			
 
				-								drawTask = &(Shader_RgbaMultiply<true, false, false, true, false>::processTriangle);

			
 
				-							} else { // Without mipmap

			
 
				-								drawTask = &(Shader_RgbaMultiply<true, false, false, true, true>::processTriangle);

			
 
				-							}

			
 
				-						} else { // Diffuse

			
 
				-							if (hasDiffusePyramid) { // With mipmap

			
 
				-								drawTask = &(Shader_RgbaMultiply<true, false, false, false, false>::processTriangle);

			
 
				-							} else { // Without mipmap

			
 
				-								drawTask = &(Shader_RgbaMultiply<true, false, false, false, true>::processTriangle);

			
 
				-							}

			
 
				-						}

			
 
				-					}

			
 
				-				}

			
 
				-			} else {

			
 
				-				if (light) {

			
 
				-					if (hasVertexFade) { // LightVertex

			
 
				-						drawTask = &(Shader_RgbaMultiply<false, true, true, false, false>::processTriangle);

			
 
				-					} else {

			
 
				-						if (colorless) { // Light without normalization

			
 
				-							drawTask = &(Shader_RgbaMultiply<false, true, false, true, false>::processTriangle);

			
 
				-						} else { // Light

			
 
				-							drawTask = &(Shader_RgbaMultiply<false, true, false, false, false>::processTriangle);

			
 
				-						}

			
 
				-					}

			
 
				-				} else {

			
 
				-					if (hasVertexFade) { // Vertex

			
 
				-						drawTask = &(Shader_RgbaMultiply<false, false, true, false, false>::processTriangle);

			
 
				-					} else { // Single color

			
 
				-						drawTask = &(Shader_RgbaMultiply<false, false, false, false, false>::processTriangle);

			
 
				-					}

			
 
				-				}

			
 
				-			}

			
 
				-			renderTriangleWithShader(commandQueue, TriangleDrawData(targetImage, depthBuffer, camera.perspective, filter, TriangleInput(diffuse, light, texCoords, colors), drawTask), camera, triangle, clipBound);

			
 
				+			renderTriangleWithShader(commandQueue, TriangleDrawData(targetImage, depthBuffer, camera.perspective, filter, TriangleInput(diffuse, light, texCoords, colors), &processTriangle_RgbaMultiply), camera, triangle, clipBound);

			
 
				 		}

			
 
				 	}

			
 
				 }

			
--- a/Source/DFPSR/render/shader/RgbaMultiply.h
+++ b/Source/DFPSR/render/shader/RgbaMultiply.h
@@ -29,13 +29,12 @@
 
				 #include <cassert>

			
 
				 #include <algorithm>

			
 
				 #include "Shader.h"

			
 
				+#include "fillerTemplates.h"

			
 
				 #include "../../image/ImageRgbaU8.h"

			
 
				 

			
 
				 namespace dsr {

			
 
				 

			
 
				-template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>

			
 
				-class Shader_RgbaMultiply : public Shader {

			
 
				-private:

			
 
				+struct RgbaMultiply_data {

			
 
				 	const TextureRgba *diffuseMap; // Mip-mapping is allowed for diffuse textures.

			
 
				 	const TextureRgba *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.

			
 
				 	// Planar format with each vector representing the three triangle corners

			
@@ -44,67 +43,128 @@ private:
 
				 	// Normalize the color product by pre-multiplying the vertex colors

			
 
				 	float getVertexScale() {

			
 
				 		float result = 255.0f; // Scale from normalized to byte for the output

			
 
				-		if (HAS_DIFFUSE_MAP) {

			
 
				+		if (this->diffuseMap) {

			
 
				 			result *= 1.0f / 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color

			
 
				 		}

			
 
				-		if (HAS_LIGHT_MAP) {

			
 
				+		if (this->lightMap) {

			
 
				 			result *= 1.0f / 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color

			
 
				 		}

			
 
				 		return result;

			
 
				 	}

			
 
				-	explicit Shader_RgbaMultiply(const TriangleInput &triangleInput) :

			
 
				+	explicit RgbaMultiply_data(const TriangleInput &triangleInput) :

			
 
				 	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),

			
 
				 	  lightMap(triangleInput.lightImage ? &(triangleInput.lightImage->texture) : nullptr),

			
 
				 	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {

			
 
				 		// Texture coordinates must be on the positive side to allow using truncation as a floor function

			
 
				-		if (HAS_DIFFUSE_MAP) {

			
 
				+		if (this->diffuseMap) {

			
 
				 			assert(this->diffuseMap != nullptr); // Cannot sample null

			
 
				 			assert(this->diffuseMap->exists()); // Cannot sample regular images

			
 
				 		}

			
 
				-		if (HAS_LIGHT_MAP) {

			
 
				+		if (this->lightMap) {

			
 
				 			assert(this->lightMap != nullptr); // Cannot sample null

			
 
				 			assert(this->lightMap->exists()); // Cannot sample regular images

			
 
				 		}

			
 
				 	}

			
 
				-public:

			
 
				-	// The process method to take a function pointer to.

			
 
				-	//    Must have the same signature as drawCallbackTemplate in Shader.h.

			
 
				-	static void processTriangle(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {

			
 
				-		Shader_RgbaMultiply tempShader(triangleInput);

			
 
				-		tempShader.fillShape(colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+};

			
 
				+

			
 
				+template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>

			
 
				+static Rgba_F32 getPixels_2x2(void *data, const F32x4x3 &vertexWeights) {

			
 
				+	if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {

			
 
				+		// Optimized for diffuse only

			
 
				+		F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);

			
 
				+		F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);

			
 
				+		return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);

			
 
				+	} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {

			
 
				+		// Optimized for light only

			
 
				+		F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);

			
 
				+		F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);

			
 
				+		return shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);

			
 
				+	} else {

			
 
				+		// Interpolate the vertex color

			
 
				+		Rgba_F32 color = HAS_VERTEX_FADING ?

			
 
				+		  shaderMethods::interpolateVertexColor(((RgbaMultiply_data*)data)->colors.red, ((RgbaMultiply_data*)data)->colors.green, ((RgbaMultiply_data*)data)->colors.blue, ((RgbaMultiply_data*)data)->colors.alpha, vertexWeights) :

			
 
				+		  Rgba_F32(F32x4(((RgbaMultiply_data*)data)->colors.red.x), F32x4(((RgbaMultiply_data*)data)->colors.green.x), F32x4(((RgbaMultiply_data*)data)->colors.blue.x), F32x4(((RgbaMultiply_data*)data)->colors.alpha.x));

			
 
				+		// Sample diffuse

			
 
				+		if (HAS_DIFFUSE_MAP) {

			
 
				+			F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);

			
 
				+			F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);

			
 
				+			color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);

			
 
				+		}

			
 
				+		// Sample lightmap

			
 
				+		if (HAS_LIGHT_MAP) {

			
 
				+			F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);

			
 
				+			F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);

			
 
				+			color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);

			
 
				+		}

			
 
				+		return color;

			
 
				 	}

			
 
				-	Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const override {

			
 
				-		if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {

			
 
				-			// Optimized for diffuse only

			
 
				-			F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

			
 
				-			F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

			
 
				-			return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);

			
 
				-		} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {

			
 
				-			// Optimized for light only

			
 
				-			F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

			
 
				-			F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

			
 
				-			return shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);

			
 
				+}

			
 
				+

			
 
				+// The process method to take a function pointer to.

			
 
				+//    Must have the same signature as drawCallbackTemplate in Shader.h.

			
 
				+static void processTriangle_RgbaMultiply(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {

			
 
				+	RgbaMultiply_data data = RgbaMultiply_data(triangleInput);

			
 
				+	bool hasVertexFade = !(almostSame(data.colors.red) && almostSame(data.colors.green) && almostSame(data.colors.blue) && almostSame(data.colors.alpha));

			
 
				+	bool colorless = almostOne(data.colors.red) && almostOne(data.colors.green) && almostOne(data.colors.blue) && almostOne(data.colors.alpha);

			
 
				+	if (data.diffuseMap) {

			
 
				+		bool hasDiffusePyramid = data.diffuseMap->hasMipBuffer();

			
 
				+		if (data.lightMap) {

			
 
				+			if (hasVertexFade) { // DiffuseLightVertex

			
 
				+				if (hasDiffusePyramid) { // With mipmap

			
 
				+					fillShape(&data, getPixels_2x2<true, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				} else { // Without mipmap

			
 
				+					fillShape(&data, getPixels_2x2<true, true, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				}

			
 
				+			} else { // DiffuseLight

			
 
				+				if (hasDiffusePyramid) { // With mipmap

			
 
				+					fillShape(&data, getPixels_2x2<true, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				} else { // Without mipmap

			
 
				+					fillShape(&data, getPixels_2x2<true, true, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				}

			
 
				+			}

			
 
				 		} else {

			
 
				-			// Interpolate the vertex color

			
 
				-			Rgba_F32 color = HAS_VERTEX_FADING ?

			
 
				-			  shaderMethods::interpolateVertexColor(this->colors.red, this->colors.green, this->colors.blue, this->colors.alpha, vertexWeights) :

			
 
				-			  Rgba_F32(F32x4(this->colors.red.x), F32x4(this->colors.green.x), F32x4(this->colors.blue.x), F32x4(this->colors.alpha.x));

			
 
				-			// Sample diffuse

			
 
				-			if (HAS_DIFFUSE_MAP) {

			
 
				-				F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

			
 
				-				F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

			
 
				-				color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);

			
 
				+			if (hasVertexFade) { // DiffuseVertex

			
 
				+				if (hasDiffusePyramid) { // With mipmap

			
 
				+					fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				} else { // Without mipmap

			
 
				+					fillShape(&data, getPixels_2x2<true, false, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				}

			
 
				+			} else {

			
 
				+				if (colorless) { // Diffuse without normalization

			
 
				+					if (hasDiffusePyramid) { // With mipmap

			
 
				+						fillShape(&data, getPixels_2x2<true, false, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+					} else { // Without mipmap

			
 
				+					fillShape(&data, getPixels_2x2<true, false, false, true, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+					}

			
 
				+				} else { // Diffuse

			
 
				+					if (hasDiffusePyramid) { // With mipmap

			
 
				+						fillShape(&data, getPixels_2x2<true, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+					} else { // Without mipmap

			
 
				+						fillShape(&data, getPixels_2x2<true, false, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+					}

			
 
				+				}

			
 
				 			}

			
 
				-			// Sample lightmap

			
 
				-			if (HAS_LIGHT_MAP) {

			
 
				-				F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

			
 
				-				F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

			
 
				-				color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);

			
 
				+		}

			
 
				+	} else {

			
 
				+		if (data.lightMap) {

			
 
				+			if (hasVertexFade) { // LightVertex

			
 
				+				fillShape(&data, getPixels_2x2<false, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+			} else {

			
 
				+				if (colorless) { // Light without normalization

			
 
				+					fillShape(&data, getPixels_2x2<false, true, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				} else { // Light

			
 
				+					fillShape(&data, getPixels_2x2<false, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+				}

			
 
				+			}

			
 
				+		} else {

			
 
				+			if (hasVertexFade) { // Vertex

			
 
				+				fillShape(&data, getPixels_2x2<false, false, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				+			} else { // Single color

			
 
				+				fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

			
 
				 			}

			
 
				-			return color;

			
 
				 		}

			
 
				 	}

			
 
				-};

			
 
				+}

			
 
				 

			
 
				 }

			
 
				 

			
--- a/Source/DFPSR/render/shader/Shader.h
+++ b/Source/DFPSR/render/shader/Shader.h
@@ -1,6 +1,6 @@
 
				 // zlib open source license

			
 
				 //

			
 
				-// Copyright (c) 2017 to 2019 David Forsgren Piuva

			
 
				+// Copyright (c) 2017 to 2023 David Forsgren Piuva

			
 
				 // 

			
 
				 // This software is provided 'as-is', without any express or implied

			
 
				 // warranty. In no event will the authors be held liable for any damages

			
@@ -66,15 +66,7 @@ struct TriangleInput {
 
				 

			
 
				 // The template for function pointers doing the work

			
 
				 inline void drawCallbackTemplate(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {}

			
 
				-#define DRAW_CALLBACK_TYPE decltype(&drawCallbackTemplate)

			
 
				-

			
 
				-// Inherit this class for pixel shaders

			
 
				-class Shader {

			
 
				-public:

			
 
				-	void fillShape(ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter);

			
 
				-	// The main call that defines the pixel shader

			
 
				-	virtual Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const = 0;

			
 
				-};

			
 
				+using DRAW_CALLBACK_TYPE = decltype(&drawCallbackTemplate);

			
 
				 

			
 
				 }

			
 
				 

			
--- a/Source/DFPSR/render/shader/fillerTemplates.h
+++ b/Source/DFPSR/render/shader/fillerTemplates.h
@@ -1,434 +1,463 @@
 
				-// zlib open source license

			
 
				-//

			
 
				-// Copyright (c) 2017 to 2019 David Forsgren Piuva

			
 
				-// 

			
 
				-// This software is provided 'as-is', without any express or implied

			
 
				-// warranty. In no event will the authors be held liable for any damages

			
 
				-// arising from the use of this software.

			
 
				-// 

			
 
				-// Permission is granted to anyone to use this software for any purpose,

			
 
				-// including commercial applications, and to alter it and redistribute it

			
 
				-// freely, subject to the following restrictions:

			
 
				-// 

			
 
				-//    1. The origin of this software must not be misrepresented; you must not

			
 
				-//    claim that you wrote the original software. If you use this software

			
 
				-//    in a product, an acknowledgment in the product documentation would be

			
 
				-//    appreciated but is not required.

			
 
				-// 

			
 
				-//    2. Altered source versions must be plainly marked as such, and must not be

			
 
				-//    misrepresented as being the original software.

			
 
				-// 

			
 
				-//    3. This notice may not be removed or altered from any source

			
 
				-//    distribution.

			
 
				-

			
 
				-#include "Shader.h"

			
 
				-#include <stdio.h>

			
 
				-#include <algorithm>

			
 
				-#include "../../image/internal/imageInternal.h"

			
 
				-#include "../../image/ImageRgbaU8.h"

			
 
				-#include "../../image/ImageF32.h"

			
 
				-

			
 
				-using namespace dsr;

			
 
				-

			
 
				-inline static const uint32_t roundUpEven(uint32_t x) {

			
 
				-	return (x + 1u) & ~1u;

			
 
				-}

			
 
				-

			
 
				-inline static const uint32_t roundDownEven(uint32_t x) {

			
 
				-	return x & ~1u;

			
 
				-}

			
 
				-

			
 
				-template<bool CLIP_SIDES>

			
 
				-static inline U32x4 clippedRead(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3) {

			
 
				-	if (CLIP_SIDES) {

			
 
				-		return U32x4(vis0 ? upperLeft[0] : 0, vis1 ? upperLeft[1] : 0, vis2 ? lowerLeft[0] : 0, vis3 ? lowerLeft[1] : 0);

			
 
				-	} else {

			
 
				-		return U32x4(upperLeft[0], upperLeft[1], lowerLeft[0], lowerLeft[1]);

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-static inline void clippedWrite(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, U32x4 vColor) {

			
 
				-	// Read back SIMD vector to scalar type

			
 
				-	UVector4D color = vColor.get();

			
 
				-	// Write colors for visible pixels

			
 
				-	if (vis0) { upperLeft[0] = color.x; }

			
 
				-	if (vis1) { upperLeft[1] = color.y; }

			
 
				-	if (vis2) { lowerLeft[0] = color.z; }

			
 
				-	if (vis3) { lowerLeft[1] = color.w; }

			
 
				-}

			
 
				-

			
 
				-static inline void clippedWrite(SafePointer<float> upperLeft, SafePointer<float> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, FVector4D depth) {

			
 
				-	// Write colors for visible pixels

			
 
				-	if (vis0) { upperLeft[0] = depth.x; }

			
 
				-	if (vis1) { upperLeft[1] = depth.y; }

			
 
				-	if (vis2) { lowerLeft[0] = depth.z; }

			
 
				-	if (vis3) { lowerLeft[1] = depth.w; }

			
 
				-}

			
 
				-

			
 
				-template<bool CLIP_SIDES>

			
 
				-static inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lowerRow, bool &clip0, bool &clip1, bool &clip2, bool &clip3) {

			
 
				-	if (CLIP_SIDES) {

			
 
				-		int x2 = x + 1;

			
 
				-		clip0 = x >= upperRow.left && x < upperRow.right;

			
 
				-		clip1 = x2 >= upperRow.left && x2 < upperRow.right;

			
 
				-		clip2 = x >= lowerRow.left && x < lowerRow.right;

			
 
				-		clip3 = x2 >= lowerRow.left && x2 < lowerRow.right;

			
 
				-	} else {

			
 
				-		clip0 = true;

			
 
				-		clip1 = true;

			
 
				-		clip2 = true;

			
 
				-		clip3 = true;

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>

			
 
				-static inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {

			
 
				-	// Clip pixels

			
 
				-	bool clip0, clip1, clip2, clip3;

			
 
				-	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);

			
 
				-	// Compare to depth buffer

			
 
				-	bool front0, front1, front2, front3;

			
 
				-	if (DEPTH_READ) {

			
 
				-		if (AFFINE) {

			
 
				-			if (CLIP_SIDES) {

			
 
				-				front0 = clip0 ? depth.x < depthDataUpper[0] : false;

			
 
				-				front1 = clip1 ? depth.y < depthDataUpper[1] : false;

			
 
				-				front2 = clip2 ? depth.z < depthDataLower[0] : false;

			
 
				-				front3 = clip3 ? depth.w < depthDataLower[1] : false;

			
 
				-			} else {

			
 
				-				front0 = depth.x < depthDataUpper[0];

			
 
				-				front1 = depth.y < depthDataUpper[1];

			
 
				-				front2 = depth.z < depthDataLower[0];

			
 
				-				front3 = depth.w < depthDataLower[1];

			
 
				-			}

			
 
				-		} else {

			
 
				-			if (CLIP_SIDES) {

			
 
				-				front0 = clip0 ? depth.x > depthDataUpper[0] : false;

			
 
				-				front1 = clip1 ? depth.y > depthDataUpper[1] : false;

			
 
				-				front2 = clip2 ? depth.z > depthDataLower[0] : false;

			
 
				-				front3 = clip3 ? depth.w > depthDataLower[1] : false;

			
 
				-			} else {

			
 
				-				front0 = depth.x > depthDataUpper[0];

			
 
				-				front1 = depth.y > depthDataUpper[1];

			
 
				-				front2 = depth.z > depthDataLower[0];

			
 
				-				front3 = depth.w > depthDataLower[1];

			
 
				-			}

			
 
				-		}

			
 
				-	} else {

			
 
				-		front0 = true;

			
 
				-		front1 = true;

			
 
				-		front2 = true;

			
 
				-		front3 = true;

			
 
				-	}

			
 
				-	// Decide visibility

			
 
				-	vis0 = clip0 && front0;

			
 
				-	vis1 = clip1 && front1;

			
 
				-	vis2 = clip2 && front2;

			
 
				-	vis3 = clip3 && front3;

			
 
				-}

			
 
				-

			
 
				-template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>

			
 
				-inline static void fillQuadSuper(const Shader& shader, int x, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder, const FVector4D &depth, const F32x4x3 &weights) {

			
 
				-	// Get visibility

			
 
				-	bool vis0, vis1, vis2, vis3;

			
 
				-	getVisibility<CLIP_SIDES, DEPTH_READ, AFFINE>(x, upperRow, lowerRow, depth, depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3);

			
 
				-	// Draw if something is visible

			
 
				-	if (vis0 || vis1 || vis2 || vis3) {

			
 
				-		if (COLOR_WRITE) {

			
 
				-			// Get the color

			
 
				-			U32x4 packedColor(0u); // Allow uninitialized memory?

			
 
				-			// Execute the shader

			
 
				-			Rgba_F32 planarSourceColor = shader.getPixels_2x2(weights);

			
 
				-			// Apply alpha filtering

			
 
				-			if (FILTER == Filter::Alpha) {

			
 
				-				// Get opacity from the source color

			
 
				-				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);

			
 
				-				// Read the packed colors for alpha blending

			
 
				-				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);

			
 
				-				// Unpack the target color into planar RGBA format so that it can be mixed with the source color

			
 
				-				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);

			
 
				-				// Blend linearly using floats

			
 
				-				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));

			
 
				-			}

			
 
				-			// Apply channel swapping while packing to bytes

			
 
				-			packedColor = planarSourceColor.toSaturatedByte(targetPackingOrder);

			
 
				-			// Write colors

			
 
				-			clippedWrite(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3, packedColor);

			
 
				-		}

			
 
				-		// Write depth for visible pixels

			
 
				-		if (DEPTH_WRITE) {

			
 
				-			clippedWrite(depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3, depth);

			
 
				-		}

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-// CLIP_SIDES will use upperRow and lowerRow to clip pixels based on the x value. Only x values inside the ranges can be drawn.

			
 
				-//   This is used along the triangle edges.

			
 
				-// COLOR_WRITE can be disabled to skip writing to the color buffer. Usually when none is given.

			
 
				-// DEPTH_READ can be disabled to draw without caring if there is something already closer in the depth buffer.

			
 
				-// DEPTH_WRITE can be disabled to skip writing to the depth buffer so that it does not occlude following draw calls.

			
 
				-// FILTER can be set to Filter::Alpha to use the output alpha as the opacity.

			
 
				-template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>

			
 
				-static inline void fillRowSuper(const Shader& shader, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {

			
 
				-	if (AFFINE) {

			
 
				-		FVector3D dx2 = pWeightDx * 2.0f;

			
 
				-		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

			
 
				-		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

			
 
				-		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

			
 
				-		for (int x = startX; x < endX; x += 2) {

			
 
				-			// Get the linear depth

			
 
				-			FVector4D depth = vLinearDepth.get();

			
 
				-			// Calculate the weight of the first vertex from the other two

			
 
				-			F32x4 weightA = 1.0f - (weightB + weightC);

			
 
				-			F32x4x3 weights(weightA, weightB, weightC);

			
 
				-			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);

			
 
				-			// Iterate projection

			
 
				-			vLinearDepth = vLinearDepth + dx2.x;

			
 
				-			weightB = weightB + dx2.y;

			
 
				-			weightC = weightC + dx2.z;

			
 
				-			// Iterate buffer pointers

			
 
				-			pixelDataUpper += 2; pixelDataLower += 2;

			
 
				-			depthDataUpper += 2; depthDataLower += 2;

			
 
				-		}

			
 
				-	} else {

			
 
				-		FVector3D dx2 = pWeightDx * 2.0f;

			
 
				-		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

			
 
				-		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

			
 
				-		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

			
 
				-		for (int x = startX; x < endX; x += 2) {

			
 
				-			// Get the reciprocal depth

			
 
				-			FVector4D depth = vRecDepth.get();

			
 
				-			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...

			
 
				-			// Divide 1 by 1 / W to get the linear depth W

			
 
				-			F32x4 vLinearDepth = vRecDepth.reciprocal();

			
 
				-			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.

			
 
				-			F32x4 weightB = vRecU * vLinearDepth;

			
 
				-			F32x4 weightC = vRecV * vLinearDepth;

			
 
				-			// Calculate the weight of the first vertex from the other two

			
 
				-			F32x4 weightA = 1.0f - (weightB + weightC);

			
 
				-			F32x4x3 weights(weightA, weightB, weightC);

			
 
				-			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);

			
 
				-			// Iterate projection

			
 
				-			vRecDepth = vRecDepth + dx2.x;

			
 
				-			vRecU = vRecU + dx2.y;

			
 
				-			vRecV = vRecV + dx2.z;

			
 
				-			// Iterate buffer pointers

			
 
				-			pixelDataUpper += 2; pixelDataLower += 2;

			
 
				-			depthDataUpper += 2; depthDataLower += 2;

			
 
				-		}

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>

			
 
				-inline static void fillShapeSuper(const Shader& shader, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {

			
 
				-	// Prepare constants

			
 
				-	const int targetStride = imageInternal::getStride(colorBuffer);

			
 
				-	const int depthBufferStride = imageInternal::getStride(depthBuffer);

			
 
				-	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;

			
 
				-	const int colorRowSize = imageInternal::getRowSize(colorBuffer);

			
 
				-	const int depthRowSize = imageInternal::getRowSize(depthBuffer);

			
 
				-	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);

			
 
				-	const int colorHeight = imageInternal::getHeight(colorBuffer);

			
 
				-	const int depthHeight = imageInternal::getHeight(depthBuffer);

			
 
				-	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;

			
 
				-

			
 
				-	// Initialize row pointers for color buffer

			
 
				-	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;

			
 
				-	if (COLOR_WRITE) {

			
 
				-		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);

			
 
				-		pixelDataUpperRow = targetData;

			
 
				-		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);

			
 
				-		pixelDataLowerRow = targetData;

			
 
				-		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);

			
 
				-	} else {

			
 
				-		pixelDataUpperRow = SafePointer<uint32_t>();

			
 
				-		pixelDataLowerRow = SafePointer<uint32_t>();

			
 
				-	}

			
 
				-

			
 
				-	// Initialize row pointers for depth buffer

			
 
				-	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;

			
 
				-	if (DEPTH_READ || DEPTH_WRITE) {

			
 
				-		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);

			
 
				-		depthDataUpperRow = depthBufferData;

			
 
				-		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);

			
 
				-		depthDataLowerRow = depthBufferData;

			
 
				-		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);

			
 
				-	} else {

			
 
				-		depthDataUpperRow = SafePointer<float>();

			
 
				-		depthDataLowerRow = SafePointer<float>();

			
 
				-	}

			
 
				-	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {

			
 
				-		int y2 = y1 + 1;

			
 
				-		RowInterval upperRow = shape.rows[y1 - shape.startRow];

			
 
				-		RowInterval lowerRow = shape.rows[y2 - shape.startRow];

			
 
				-		int outerStart = min(upperRow.left, lowerRow.left);

			
 
				-		int outerEnd = max(upperRow.right, lowerRow.right);

			
 
				-		int innerStart = max(upperRow.left, lowerRow.left);

			
 
				-		int innerEnd = min(upperRow.right, lowerRow.right);

			
 
				-		// Round exclusive intervals to multiples of two pixels

			
 
				-		int outerBlockStart = roundDownEven(outerStart);

			
 
				-		int outerBlockEnd = roundUpEven(outerEnd);

			
 
				-		int innerBlockStart = roundUpEven(innerStart);

			
 
				-		int innerBlockEnd = roundDownEven(innerEnd);

			
 
				-		// Clip last row if outside on odd height

			
 
				-		if (y2 >= maxHeight) {

			
 
				-			lowerRow.right = lowerRow.left;

			
 
				-		}

			
 
				-		// Avoid reading outside of the given bound

			
 
				-		bool hasTop = upperRow.right > upperRow.left;

			
 
				-		bool hasBottom = lowerRow.right > lowerRow.left;

			
 
				-		if (hasTop || hasBottom) {

			
 
				-			// Initialize pointers

			
 
				-			if (COLOR_WRITE) {

			
 
				-				if (hasTop) {

			
 
				-					pixelDataUpper = pixelDataUpperRow.slice("pixelDataUpper", 0, colorRowSize);

			
 
				-				} else {

			
 
				-					// Repeat the lower row to avoid reading outside

			
 
				-					pixelDataUpper = pixelDataLowerRow.slice("pixelDataUpper (from lower)", 0, colorRowSize);

			
 
				-				}

			
 
				-				if (hasBottom) {

			
 
				-					pixelDataLower = pixelDataLowerRow.slice("pixelDataLower", 0, colorRowSize);

			
 
				-				} else {

			
 
				-					// Repeat the upper row to avoid reading outside

			
 
				-					pixelDataLower = pixelDataUpperRow.slice("pixelDataLower (from upper)", 0, colorRowSize);

			
 
				-				}

			
 
				-				int startColorOffset = outerBlockStart * sizeof(uint32_t);

			
 
				-				pixelDataUpper.increaseBytes(startColorOffset);

			
 
				-				pixelDataLower.increaseBytes(startColorOffset);

			
 
				-			}

			
 
				-			if (DEPTH_READ || DEPTH_WRITE) {

			
 
				-				if (hasTop) {

			
 
				-					depthDataUpper = depthDataUpperRow.slice("depthDataUpper", 0, depthRowSize);

			
 
				-				} else {

			
 
				-					// Repeat the upper row to avoid reading outside

			
 
				-					depthDataUpper = depthDataLowerRow.slice("depthDataUpper (from lower)", 0, depthRowSize);

			
 
				-				}

			
 
				-				if (hasBottom) {

			
 
				-					depthDataLower = depthDataLowerRow.slice("depthDataLower", 0, depthRowSize);

			
 
				-				} else {

			
 
				-					// Repeat the upper row to avoid reading outside

			
 
				-					depthDataLower = depthDataUpperRow.slice("depthDataLower (from upper)", 0, depthRowSize);

			
 
				-				}

			
 
				-				depthDataUpper += outerBlockStart;

			
 
				-				depthDataLower += outerBlockStart;

			
 
				-			} else {

			
 
				-				depthDataUpper = SafePointer<float>();

			
 
				-				depthDataLower = SafePointer<float>();

			
 
				-			}

			
 
				-			// Initialize projection

			
 
				-			FVector3D pWeightUpperRow;

			
 
				-			if (AFFINE) {

			
 
				-				pWeightUpperRow = projection.getWeight_affine(IVector2D(outerBlockStart, y1));

			
 
				-			} else {

			
 
				-				pWeightUpperRow = projection.getDepthDividedWeight_perspective(IVector2D(outerBlockStart, y1));

			
 
				-			}

			
 
				-			FVector3D pWeightUpper = pWeightUpperRow;

			
 
				-			FVector3D pWeightLowerRow = pWeightUpperRow + projection.pWeightDy;

			
 
				-			FVector3D pWeightLower = pWeightLowerRow;

			
 
				-			// Render the pixels

			
 
				-			if (innerBlockEnd <= innerBlockStart) {

			
 
				-				// Clipped from left and right

			
 
				-				for (int32_t x = outerBlockStart; x < outerBlockEnd; x += 2) {

			
 
				-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

			
 
				-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);

			
 
				-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }

			
 
				-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }

			
 
				-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;

			
 
				-				}

			
 
				-			} else {

			
 
				-				// Left edge

			
 
				-				for (int32_t x = outerBlockStart; x < innerBlockStart; x += 2) {

			
 
				-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

			
 
				-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);

			
 
				-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }

			
 
				-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }

			
 
				-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;

			
 
				-				}

			
 
				-				// Full quads

			
 
				-				int width = innerBlockEnd - innerBlockStart;

			
 
				-				int quadCount = width / 2;

			
 
				-				fillRowSuper<false, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

			
 
				-				  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, innerBlockStart, innerBlockEnd, RowInterval(), RowInterval(), targetPackingOrder);

			
 
				-				if (COLOR_WRITE) { pixelDataUpper += 2 * quadCount; pixelDataLower += 2 * quadCount; }

			
 
				-				if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2 * quadCount; depthDataLower += 2 * quadCount; }

			
 
				-				pWeightUpper = pWeightUpper + (doublePWeightDx * quadCount); pWeightLower = pWeightLower + (doublePWeightDx * quadCount);

			
 
				-				// Right edge

			
 
				-				for (int32_t x = innerBlockEnd; x < outerBlockEnd; x += 2) {

			
 
				-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

			
 
				-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);

			
 
				-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }

			
 
				-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }

			
 
				-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;

			
 
				-				}

			
 
				-			}

			
 
				-		}

			
 
				-		// Iterate to the next row

			
 
				-		if (COLOR_WRITE) {

			
 
				-			pixelDataUpperRow.increaseBytes(targetStride * 2);

			
 
				-			pixelDataLowerRow.increaseBytes(targetStride * 2);

			
 
				-		}

			
 
				-		if (DEPTH_READ || DEPTH_WRITE) {

			
 
				-			depthDataUpperRow.increaseBytes(depthBufferStride * 2);

			
 
				-			depthDataLowerRow.increaseBytes(depthBufferStride * 2);

			
 
				-		}

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				-void Shader::fillShape(ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {

			
 
				-	bool hasColorBuffer = colorBuffer != nullptr;

			
 
				-	bool hasDepthBuffer = depthBuffer != nullptr;

			
 
				-	if (projection.affine) {

			
 
				-		if (hasDepthBuffer) {

			
 
				-			if (hasColorBuffer) {

			
 
				-				if (filter != Filter::Solid) {

			
 
				-					// Alpha filtering with read only depth buffer

			
 
				-					fillShapeSuper<true, true, false, Filter::Alpha, true>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

			
 
				-				} else {

			
 
				-					// Solid with depth buffer

			
 
				-					fillShapeSuper<true, true, true, Filter::Solid, true>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

			
 
				-				}

			
 
				-			} else {

			
 
				-				// Solid depth

			
 
				-				// TODO: Use for orthogonal depth based shadows

			
 
				-				fillShapeSuper<false, true, true, Filter::Solid, true>(*this, nullptr, depthBuffer, triangle, projection, shape);

			
 
				-			}

			
 
				-		} else {

			
 
				-			if (hasColorBuffer) {

			
 
				-				if (filter != Filter::Solid) {

			
 
				-					// Alpha filtering without depth buffer

			
 
				-					fillShapeSuper<true, false, false, Filter::Alpha, true>(*this, colorBuffer, nullptr, triangle, projection, shape);

			
 
				-				} else {

			
 
				-					// Solid without depth buffer

			
 
				-					fillShapeSuper<true, false, false, Filter::Solid, true>(*this, colorBuffer, nullptr, triangle, projection, shape);

			
 
				-				}

			
 
				-			}

			
 
				-		}

			
 
				-	} else {

			
 
				-		if (hasDepthBuffer) {

			
 
				-			if (hasColorBuffer) {

			
 
				-				if (filter != Filter::Solid) {

			
 
				-					// Alpha filtering with read only depth buffer

			
 
				-					fillShapeSuper<true, true, false, Filter::Alpha, false>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

			
 
				-				} else {

			
 
				-					// Solid with depth buffer

			
 
				-					fillShapeSuper<true, true, true, Filter::Solid, false>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

			
 
				-				}

			
 
				-			} else {

			
 
				-				// Solid depth

			
 
				-				// TODO: Use for depth based shadows with perspective projection

			
 
				-				fillShapeSuper<false, true, true, Filter::Solid, false>(*this, nullptr, depthBuffer, triangle, projection, shape);

			
 
				-			}

			
 
				-		} else {

			
 
				-			if (hasColorBuffer) {

			
 
				-				if (filter != Filter::Solid) {

			
 
				-					// Alpha filtering without depth buffer

			
 
				-					fillShapeSuper<true, false, false, Filter::Alpha, false>(*this, colorBuffer, nullptr, triangle, projection, shape);

			
 
				-				} else {

			
 
				-					// Solid without depth buffer

			
 
				-					fillShapeSuper<true, false, false, Filter::Solid, false>(*this, colorBuffer, nullptr, triangle, projection, shape);

			
 
				-				}

			
 
				-			}

			
 
				-		}

			
 
				-	}

			
 
				-}

			
 
				-

			
 
				+// zlib open source license
			
 
				+//
			
 
				+// Copyright (c) 2017 to 2023 David Forsgren Piuva
			
 
				+// 
			
 
				+// This software is provided 'as-is', without any express or implied
			
 
				+// warranty. In no event will the authors be held liable for any damages
			
 
				+// arising from the use of this software.
			
 
				+// 
			
 
				+// Permission is granted to anyone to use this software for any purpose,
			
 
				+// including commercial applications, and to alter it and redistribute it
			
 
				+// freely, subject to the following restrictions:
			
 
				+// 
			
 
				+//    1. The origin of this software must not be misrepresented; you must not
			
 
				+//    claim that you wrote the original software. If you use this software
			
 
				+//    in a product, an acknowledgment in the product documentation would be
			
 
				+//    appreciated but is not required.
			
 
				+// 
			
 
				+//    2. Altered source versions must be plainly marked as such, and must not be
			
 
				+//    misrepresented as being the original software.
			
 
				+// 
			
 
				+//    3. This notice may not be removed or altered from any source
			
 
				+//    distribution.
			
 
				+
			
 
				+#ifndef DFPSR_RENDER_FILLER_TEMPLATES
			
 
				+#define DFPSR_RENDER_FILLER_TEMPLATES
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+#include "../../image/PackOrder.h"
			
 
				+#include "../../image/ImageRgbaU8.h"
			
 
				+#include "../../image/ImageF32.h"
			
 
				+#include "../ITriangle2D.h"
			
 
				+#include "shaderTypes.h"
			
 
				+
			
 
				+namespace dsr {
			
 
				+
			
 
				+// Function for filling pixels
			
 
				+using PixelShadingCallback = std::function<Rgba_F32(void *data, const F32x4x3 &vertexWeights)>;
			
 
				+
			
 
				+inline bool almostZero(float value) {
			
 
				+	return value > -0.001f && value < 0.001f;
			
 
				+}
			
 
				+
			
 
				+inline bool almostZero(const FVector3D &channel) {
			
 
				+	return almostZero(channel.x) && almostZero(channel.y) && almostZero(channel.z);
			
 
				+}
			
 
				+
			
 
				+inline bool almostOne(float value) {
			
 
				+	return value > 0.999f && value < 1.001f;
			
 
				+}
			
 
				+
			
 
				+inline bool almostOne(const FVector3D &channel) {
			
 
				+	return almostOne(channel.x) && almostOne(channel.y) && almostOne(channel.z);
			
 
				+}
			
 
				+
			
 
				+inline bool almostSame(const FVector3D &channel) {
			
 
				+	return almostZero(channel.x - channel.y) && almostZero(channel.x - channel.z) && almostZero(channel.y - channel.z);
			
 
				+}
			
 
				+
			
 
				+inline const uint32_t roundUpEven(uint32_t x) {
			
 
				+	return (x + 1u) & ~1u;
			
 
				+}
			
 
				+
			
 
				+inline const uint32_t roundDownEven(uint32_t x) {
			
 
				+	return x & ~1u;
			
 
				+}
			
 
				+
			
 
				+template<bool CLIP_SIDES>
			
 
				+inline U32x4 clippedRead(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3) {
			
 
				+	if (CLIP_SIDES) {
			
 
				+		return U32x4(vis0 ? upperLeft[0] : 0, vis1 ? upperLeft[1] : 0, vis2 ? lowerLeft[0] : 0, vis3 ? lowerLeft[1] : 0);
			
 
				+	} else {
			
 
				+		return U32x4(upperLeft[0], upperLeft[1], lowerLeft[0], lowerLeft[1]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inline void clippedWrite(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, U32x4 vColor) {
			
 
				+	// Read back SIMD vector to scalar type
			
 
				+	UVector4D color = vColor.get();
			
 
				+	// Write colors for visible pixels
			
 
				+	if (vis0) { upperLeft[0] = color.x; }
			
 
				+	if (vis1) { upperLeft[1] = color.y; }
			
 
				+	if (vis2) { lowerLeft[0] = color.z; }
			
 
				+	if (vis3) { lowerLeft[1] = color.w; }
			
 
				+}
			
 
				+
			
 
				+inline void clippedWrite(SafePointer<float> upperLeft, SafePointer<float> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, FVector4D depth) {
			
 
				+	// Write colors for visible pixels
			
 
				+	if (vis0) { upperLeft[0] = depth.x; }
			
 
				+	if (vis1) { upperLeft[1] = depth.y; }
			
 
				+	if (vis2) { lowerLeft[0] = depth.z; }
			
 
				+	if (vis3) { lowerLeft[1] = depth.w; }
			
 
				+}
			
 
				+
			
 
				+template<bool CLIP_SIDES>
			
 
				+inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lowerRow, bool &clip0, bool &clip1, bool &clip2, bool &clip3) {
			
 
				+	if (CLIP_SIDES) {
			
 
				+		int x2 = x + 1;
			
 
				+		clip0 = x >= upperRow.left && x < upperRow.right;
			
 
				+		clip1 = x2 >= upperRow.left && x2 < upperRow.right;
			
 
				+		clip2 = x >= lowerRow.left && x < lowerRow.right;
			
 
				+		clip3 = x2 >= lowerRow.left && x2 < lowerRow.right;
			
 
				+	} else {
			
 
				+		clip0 = true;
			
 
				+		clip1 = true;
			
 
				+		clip2 = true;
			
 
				+		clip3 = true;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>
			
 
				+inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {
			
 
				+	// Clip pixels
			
 
				+	bool clip0, clip1, clip2, clip3;
			
 
				+	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);
			
 
				+	// Compare to depth buffer
			
 
				+	bool front0, front1, front2, front3;
			
 
				+	if (DEPTH_READ) {
			
 
				+		if (AFFINE) {
			
 
				+			if (CLIP_SIDES) {
			
 
				+				front0 = clip0 ? depth.x < depthDataUpper[0] : false;
			
 
				+				front1 = clip1 ? depth.y < depthDataUpper[1] : false;
			
 
				+				front2 = clip2 ? depth.z < depthDataLower[0] : false;
			
 
				+				front3 = clip3 ? depth.w < depthDataLower[1] : false;
			
 
				+			} else {
			
 
				+				front0 = depth.x < depthDataUpper[0];
			
 
				+				front1 = depth.y < depthDataUpper[1];
			
 
				+				front2 = depth.z < depthDataLower[0];
			
 
				+				front3 = depth.w < depthDataLower[1];
			
 
				+			}
			
 
				+		} else {
			
 
				+			if (CLIP_SIDES) {
			
 
				+				front0 = clip0 ? depth.x > depthDataUpper[0] : false;
			
 
				+				front1 = clip1 ? depth.y > depthDataUpper[1] : false;
			
 
				+				front2 = clip2 ? depth.z > depthDataLower[0] : false;
			
 
				+				front3 = clip3 ? depth.w > depthDataLower[1] : false;
			
 
				+			} else {
			
 
				+				front0 = depth.x > depthDataUpper[0];
			
 
				+				front1 = depth.y > depthDataUpper[1];
			
 
				+				front2 = depth.z > depthDataLower[0];
			
 
				+				front3 = depth.w > depthDataLower[1];
			
 
				+			}
			
 
				+		}
			
 
				+	} else {
			
 
				+		front0 = true;
			
 
				+		front1 = true;
			
 
				+		front2 = true;
			
 
				+		front3 = true;
			
 
				+	}
			
 
				+	// Decide visibility
			
 
				+	vis0 = clip0 && front0;
			
 
				+	vis1 = clip1 && front1;
			
 
				+	vis2 = clip2 && front2;
			
 
				+	vis3 = clip3 && front3;
			
 
				+}
			
 
				+
			
 
				+template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
			
 
				+inline void fillQuadSuper(void *data, PixelShadingCallback pixelShaderFunction, int x, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder, const FVector4D &depth, const F32x4x3 &weights) {
			
 
				+	// Get visibility
			
 
				+	bool vis0, vis1, vis2, vis3;
			
 
				+	getVisibility<CLIP_SIDES, DEPTH_READ, AFFINE>(x, upperRow, lowerRow, depth, depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3);
			
 
				+	// Draw if something is visible
			
 
				+	if (vis0 || vis1 || vis2 || vis3) {
			
 
				+		if (COLOR_WRITE) {
			
 
				+			// Get the color
			
 
				+			U32x4 packedColor(0u); // Allow uninitialized memory?
			
 
				+			// Execute the shader
			
 
				+			Rgba_F32 planarSourceColor = pixelShaderFunction(data, weights);
			
 
				+			// Apply alpha filtering
			
 
				+			if (FILTER == Filter::Alpha) {
			
 
				+				// Get opacity from the source color
			
 
				+				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);
			
 
				+				// Read the packed colors for alpha blending
			
 
				+				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);
			
 
				+				// Unpack the target color into planar RGBA format so that it can be mixed with the source color
			
 
				+				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);
			
 
				+				// Blend linearly using floats
			
 
				+				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));
			
 
				+			}
			
 
				+			// Apply channel swapping while packing to bytes
			
 
				+			packedColor = planarSourceColor.toSaturatedByte(targetPackingOrder);
			
 
				+			// Write colors
			
 
				+			clippedWrite(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3, packedColor);
			
 
				+		}
			
 
				+		// Write depth for visible pixels
			
 
				+		if (DEPTH_WRITE) {
			
 
				+			clippedWrite(depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3, depth);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// CLIP_SIDES will use upperRow and lowerRow to clip pixels based on the x value. Only x values inside the ranges can be drawn.
			
 
				+//   This is used along the triangle edges.
			
 
				+// COLOR_WRITE can be disabled to skip writing to the color buffer. Usually when none is given.
			
 
				+// DEPTH_READ can be disabled to draw without caring if there is something already closer in the depth buffer.
			
 
				+// DEPTH_WRITE can be disabled to skip writing to the depth buffer so that it does not occlude following draw calls.
			
 
				+// FILTER can be set to Filter::Alpha to use the output alpha as the opacity.
			
 
				+template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
			
 
				+inline void fillRowSuper(void *data, PixelShadingCallback pixelShaderFunction, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {
			
 
				+	if (AFFINE) {
			
 
				+		FVector3D dx2 = pWeightDx * 2.0f;
			
 
				+		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
			
 
				+		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
			
 
				+		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
			
 
				+		for (int x = startX; x < endX; x += 2) {
			
 
				+			// Get the linear depth
			
 
				+			FVector4D depth = vLinearDepth.get();
			
 
				+			// Calculate the weight of the first vertex from the other two
			
 
				+			F32x4 weightA = 1.0f - (weightB + weightC);
			
 
				+			F32x4x3 weights(weightA, weightB, weightC);
			
 
				+			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(data, pixelShaderFunction, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
			
 
				+			// Iterate projection
			
 
				+			vLinearDepth = vLinearDepth + dx2.x;
			
 
				+			weightB = weightB + dx2.y;
			
 
				+			weightC = weightC + dx2.z;
			
 
				+			// Iterate buffer pointers
			
 
				+			pixelDataUpper += 2; pixelDataLower += 2;
			
 
				+			depthDataUpper += 2; depthDataLower += 2;
			
 
				+		}
			
 
				+	} else {
			
 
				+		FVector3D dx2 = pWeightDx * 2.0f;
			
 
				+		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
			
 
				+		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
			
 
				+		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
			
 
				+		for (int x = startX; x < endX; x += 2) {
			
 
				+			// Get the reciprocal depth
			
 
				+			FVector4D depth = vRecDepth.get();
			
 
				+			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...
			
 
				+			// Divide 1 by 1 / W to get the linear depth W
			
 
				+			F32x4 vLinearDepth = vRecDepth.reciprocal();
			
 
				+			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.
			
 
				+			F32x4 weightB = vRecU * vLinearDepth;
			
 
				+			F32x4 weightC = vRecV * vLinearDepth;
			
 
				+			// Calculate the weight of the first vertex from the other two
			
 
				+			F32x4 weightA = 1.0f - (weightB + weightC);
			
 
				+			F32x4x3 weights(weightA, weightB, weightC);
			
 
				+			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(data, pixelShaderFunction, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
			
 
				+			// Iterate projection
			
 
				+			vRecDepth = vRecDepth + dx2.x;
			
 
				+			vRecU = vRecU + dx2.y;
			
 
				+			vRecV = vRecV + dx2.z;
			
 
				+			// Iterate buffer pointers
			
 
				+			pixelDataUpper += 2; pixelDataLower += 2;
			
 
				+			depthDataUpper += 2; depthDataLower += 2;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
			
 
				+inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {
			
 
				+	// Prepare constants
			
 
				+	const int targetStride = imageInternal::getStride(colorBuffer);
			
 
				+	const int depthBufferStride = imageInternal::getStride(depthBuffer);
			
 
				+	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;
			
 
				+	const int colorRowSize = imageInternal::getRowSize(colorBuffer);
			
 
				+	const int depthRowSize = imageInternal::getRowSize(depthBuffer);
			
 
				+	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);
			
 
				+	const int colorHeight = imageInternal::getHeight(colorBuffer);
			
 
				+	const int depthHeight = imageInternal::getHeight(depthBuffer);
			
 
				+	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;
			
 
				+
			
 
				+	// Initialize row pointers for color buffer
			
 
				+	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;
			
 
				+	if (COLOR_WRITE) {
			
 
				+		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);
			
 
				+		pixelDataUpperRow = targetData;
			
 
				+		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);
			
 
				+		pixelDataLowerRow = targetData;
			
 
				+		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);
			
 
				+	} else {
			
 
				+		pixelDataUpperRow = SafePointer<uint32_t>();
			
 
				+		pixelDataLowerRow = SafePointer<uint32_t>();
			
 
				+	}
			
 
				+
			
 
				+	// Initialize row pointers for depth buffer
			
 
				+	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;
			
 
				+	if (DEPTH_READ || DEPTH_WRITE) {
			
 
				+		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);
			
 
				+		depthDataUpperRow = depthBufferData;
			
 
				+		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);
			
 
				+		depthDataLowerRow = depthBufferData;
			
 
				+		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);
			
 
				+	} else {
			
 
				+		depthDataUpperRow = SafePointer<float>();
			
 
				+		depthDataLowerRow = SafePointer<float>();
			
 
				+	}
			
 
				+	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {
			
 
				+		int y2 = y1 + 1;
			
 
				+		RowInterval upperRow = shape.rows[y1 - shape.startRow];
			
 
				+		RowInterval lowerRow = shape.rows[y2 - shape.startRow];
			
 
				+		int outerStart = min(upperRow.left, lowerRow.left);
			
 
				+		int outerEnd = max(upperRow.right, lowerRow.right);
			
 
				+		int innerStart = max(upperRow.left, lowerRow.left);
			
 
				+		int innerEnd = min(upperRow.right, lowerRow.right);
			
 
				+		// Round exclusive intervals to multiples of two pixels
			
 
				+		int outerBlockStart = roundDownEven(outerStart);
			
 
				+		int outerBlockEnd = roundUpEven(outerEnd);
			
 
				+		int innerBlockStart = roundUpEven(innerStart);
			
 
				+		int innerBlockEnd = roundDownEven(innerEnd);
			
 
				+		// Clip last row if outside on odd height
			
 
				+		if (y2 >= maxHeight) {
			
 
				+			lowerRow.right = lowerRow.left;
			
 
				+		}
			
 
				+		// Avoid reading outside of the given bound
			
 
				+		bool hasTop = upperRow.right > upperRow.left;
			
 
				+		bool hasBottom = lowerRow.right > lowerRow.left;
			
 
				+		if (hasTop || hasBottom) {
			
 
				+			// Initialize pointers
			
 
				+			if (COLOR_WRITE) {
			
 
				+				if (hasTop) {
			
 
				+					pixelDataUpper = pixelDataUpperRow.slice("pixelDataUpper", 0, colorRowSize);
			
 
				+				} else {
			
 
				+					// Repeat the lower row to avoid reading outside
			
 
				+					pixelDataUpper = pixelDataLowerRow.slice("pixelDataUpper (from lower)", 0, colorRowSize);
			
 
				+				}
			
 
				+				if (hasBottom) {
			
 
				+					pixelDataLower = pixelDataLowerRow.slice("pixelDataLower", 0, colorRowSize);
			
 
				+				} else {
			
 
				+					// Repeat the upper row to avoid reading outside
			
 
				+					pixelDataLower = pixelDataUpperRow.slice("pixelDataLower (from upper)", 0, colorRowSize);
			
 
				+				}
			
 
				+				int startColorOffset = outerBlockStart * sizeof(uint32_t);
			
 
				+				pixelDataUpper.increaseBytes(startColorOffset);
			
 
				+				pixelDataLower.increaseBytes(startColorOffset);
			
 
				+			}
			
 
				+			if (DEPTH_READ || DEPTH_WRITE) {
			
 
				+				if (hasTop) {
			
 
				+					depthDataUpper = depthDataUpperRow.slice("depthDataUpper", 0, depthRowSize);
			
 
				+				} else {
			
 
				+					// Repeat the upper row to avoid reading outside
			
 
				+					depthDataUpper = depthDataLowerRow.slice("depthDataUpper (from lower)", 0, depthRowSize);
			
 
				+				}
			
 
				+				if (hasBottom) {
			
 
				+					depthDataLower = depthDataLowerRow.slice("depthDataLower", 0, depthRowSize);
			
 
				+				} else {
			
 
				+					// Repeat the upper row to avoid reading outside
			
 
				+					depthDataLower = depthDataUpperRow.slice("depthDataLower (from upper)", 0, depthRowSize);
			
 
				+				}
			
 
				+				depthDataUpper += outerBlockStart;
			
 
				+				depthDataLower += outerBlockStart;
			
 
				+			} else {
			
 
				+				depthDataUpper = SafePointer<float>();
			
 
				+				depthDataLower = SafePointer<float>();
			
 
				+			}
			
 
				+			// Initialize projection
			
 
				+			FVector3D pWeightUpperRow;
			
 
				+			if (AFFINE) {
			
 
				+				pWeightUpperRow = projection.getWeight_affine(IVector2D(outerBlockStart, y1));
			
 
				+			} else {
			
 
				+				pWeightUpperRow = projection.getDepthDividedWeight_perspective(IVector2D(outerBlockStart, y1));
			
 
				+			}
			
 
				+			FVector3D pWeightUpper = pWeightUpperRow;
			
 
				+			FVector3D pWeightLowerRow = pWeightUpperRow + projection.pWeightDy;
			
 
				+			FVector3D pWeightLower = pWeightLowerRow;
			
 
				+			// Render the pixels
			
 
				+			if (innerBlockEnd <= innerBlockStart) {
			
 
				+				// Clipped from left and right
			
 
				+				for (int32_t x = outerBlockStart; x < outerBlockEnd; x += 2) {
			
 
				+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
			
 
				+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
			
 
				+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
			
 
				+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
			
 
				+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
			
 
				+				}
			
 
				+			} else {
			
 
				+				// Left edge
			
 
				+				for (int32_t x = outerBlockStart; x < innerBlockStart; x += 2) {
			
 
				+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
			
 
				+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
			
 
				+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
			
 
				+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
			
 
				+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
			
 
				+				}
			
 
				+				// Full quads
			
 
				+				int width = innerBlockEnd - innerBlockStart;
			
 
				+				int quadCount = width / 2;
			
 
				+				fillRowSuper<false, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
			
 
				+				  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, innerBlockStart, innerBlockEnd, RowInterval(), RowInterval(), targetPackingOrder);
			
 
				+				if (COLOR_WRITE) { pixelDataUpper += 2 * quadCount; pixelDataLower += 2 * quadCount; }
			
 
				+				if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2 * quadCount; depthDataLower += 2 * quadCount; }
			
 
				+				pWeightUpper = pWeightUpper + (doublePWeightDx * quadCount); pWeightLower = pWeightLower + (doublePWeightDx * quadCount);
			
 
				+				// Right edge
			
 
				+				for (int32_t x = innerBlockEnd; x < outerBlockEnd; x += 2) {
			
 
				+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
			
 
				+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
			
 
				+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
			
 
				+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
			
 
				+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		// Iterate to the next row
			
 
				+		if (COLOR_WRITE) {
			
 
				+			pixelDataUpperRow.increaseBytes(targetStride * 2);
			
 
				+			pixelDataLowerRow.increaseBytes(targetStride * 2);
			
 
				+		}
			
 
				+		if (DEPTH_READ || DEPTH_WRITE) {
			
 
				+			depthDataUpperRow.increaseBytes(depthBufferStride * 2);
			
 
				+			depthDataLowerRow.increaseBytes(depthBufferStride * 2);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
			
 
				+	bool hasColorBuffer = colorBuffer != nullptr;
			
 
				+	bool hasDepthBuffer = depthBuffer != nullptr;
			
 
				+	if (projection.affine) {
			
 
				+		if (hasDepthBuffer) {
			
 
				+			if (hasColorBuffer) {
			
 
				+				if (filter != Filter::Solid) {
			
 
				+					// Alpha filtering with read only depth buffer
			
 
				+					fillShapeSuper<true, true, false, Filter::Alpha, true>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
			
 
				+				} else {
			
 
				+					// Solid with depth buffer
			
 
				+					fillShapeSuper<true, true, true, Filter::Solid, true>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
			
 
				+				}
			
 
				+			} else {
			
 
				+				// Solid depth
			
 
				+				// TODO: Use for orthogonal depth based shadows
			
 
				+				fillShapeSuper<false, true, true, Filter::Solid, true>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
			
 
				+			}
			
 
				+		} else {
			
 
				+			if (hasColorBuffer) {
			
 
				+				if (filter != Filter::Solid) {
			
 
				+					// Alpha filtering without depth buffer
			
 
				+					fillShapeSuper<true, false, false, Filter::Alpha, true>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
			
 
				+				} else {
			
 
				+					// Solid without depth buffer
			
 
				+					fillShapeSuper<true, false, false, Filter::Solid, true>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	} else {
			
 
				+		if (hasDepthBuffer) {
			
 
				+			if (hasColorBuffer) {
			
 
				+				if (filter != Filter::Solid) {
			
 
				+					// Alpha filtering with read only depth buffer
			
 
				+					fillShapeSuper<true, true, false, Filter::Alpha, false>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
			
 
				+				} else {
			
 
				+					// Solid with depth buffer
			
 
				+					fillShapeSuper<true, true, true, Filter::Solid, false>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
			
 
				+				}
			
 
				+			} else {
			
 
				+				// Solid depth
			
 
				+				// TODO: Use for depth based shadows with perspective projection
			
 
				+				fillShapeSuper<false, true, true, Filter::Solid, false>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
			
 
				+			}
			
 
				+		} else {
			
 
				+			if (hasColorBuffer) {
			
 
				+				if (filter != Filter::Solid) {
			
 
				+					// Alpha filtering without depth buffer
			
 
				+					fillShapeSuper<true, false, false, Filter::Alpha, false>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
			
 
				+				} else {
			
 
				+					// Solid without depth buffer
			
 
				+					fillShapeSuper<true, false, false, Filter::Solid, false>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+#endif