2 年之前 · d901a326eb
--- a/Source/DFPSR/render/renderCore.cpp
+++ b/Source/DFPSR/render/renderCore.cpp
@@ -30,10 +30,6 @@
 
															 using namespace dsr;

														
 
															-//#define DISABLE_VERTEX_COLOR

														
 
															-//#define DISABLE_DIFFUSE_MAP

														
 
															-//#define DISABLE_LIGHT_MAP

														
 
															-

														
 
															 class SubVertex {

														
 
															 public:

														
 
															 	FVector3D cs; // Camera space position based on the weights

														
@@ -201,26 +197,6 @@ Visibility dsr::getTriangleVisibility(const ITriangle2D &triangle, const Camera
 
															 	return Visibility::Full;

														
 
															 }

														
 
															-static bool almostZero(float value) {

														
 
															-	return value > -0.001f && value < 0.001f;

														
 
															-}

														
 
															-

														
 
															-static bool almostZero(const FVector3D &channel) {

														
 
															-	return almostZero(channel.x) && almostZero(channel.y) && almostZero(channel.z);

														
 
															-}

														
 
															-

														
 
															-static bool almostOne(float value) {

														
 
															-	return value > 0.999f && value < 1.001f;

														
 
															-}

														
 
															-

														
 
															-static bool almostOne(const FVector3D &channel) {

														
 
															-	return almostOne(channel.x) && almostOne(channel.y) && almostOne(channel.z);

														
 
															-}

														
 
															-

														
 
															-static bool almostSame(const FVector3D &channel) {

														
 
															-	return almostZero(channel.x - channel.y) && almostZero(channel.x - channel.z) && almostZero(channel.y - channel.z);

														
 
															-}

														
 
															-

														
 
															 static const int alignX = 2;

														
 
															 static const int alignY = 2;

														
@@ -338,81 +314,9 @@ void dsr::renderTriangleFromData(
 
															 	// Only draw visible triangles

														
 
															 	Visibility visibility = getTriangleVisibility(triangle, camera, false);

														
 
															 	if (visibility != Visibility::Hidden) {

														
 
															-		// Disable features when debugging

														
 
															-		#ifdef DISABLE_VERTEX_COLOR

														
 
															-			colors = TriangleColors(1.0f);

														
 
															-		#endif

														
 
															-		#ifdef DISABLE_DIFFUSE_MAP

														
 
															-			diffuse = nullptr;

														
 
															-		#endif

														
 
															-		#ifdef DISABLE_LIGHT_MAP

														
 
															-			light = nullptr;

														
 
															-		#endif

														
 
															 		// Select an instance of the default shader

														
 
															 		if (!(filter == Filter::Alpha && almostZero(colors.alpha))) {

														
 
															-			bool hasVertexFade = !(almostSame(colors.red) && almostSame(colors.green) && almostSame(colors.blue) && almostSame(colors.alpha));

														
 
															-			bool colorless = almostOne(colors.red) && almostOne(colors.green) && almostOne(colors.blue) && almostOne(colors.alpha);

														
 
															-			// Get the function pointer to the correct shader

														
 
															-			DRAW_CALLBACK_TYPE drawTask = &drawCallbackTemplate;

														
 
															-			if (diffuse) {

														
 
															-				bool hasDiffusePyramid = diffuse->texture.hasMipBuffer();

														
 
															-				if (light) {

														
 
															-					if (hasVertexFade) { // DiffuseLightVertex

														
 
															-						if (hasDiffusePyramid) { // With mipmap

														
 
															-							drawTask = &(Shader_RgbaMultiply<true, true, true, false, false>::processTriangle);

														
 
															-						} else { // Without mipmap

														
 
															-							drawTask = &(Shader_RgbaMultiply<true, true, true, false, true>::processTriangle);

														
 
															-						}

														
 
															-					} else { // DiffuseLight

														
 
															-						if (hasDiffusePyramid) { // With mipmap

														
 
															-							drawTask = &(Shader_RgbaMultiply<true, true, false, false, false>::processTriangle);

														
 
															-						} else { // Without mipmap

														
 
															-							drawTask = &(Shader_RgbaMultiply<true, true, false, false, true>::processTriangle);

														
 
															-						}

														
 
															-					}

														
 
															-				} else {

														
 
															-					if (hasVertexFade) { // DiffuseVertex

														
 
															-						if (hasDiffusePyramid) { // With mipmap

														
 
															-							drawTask = &(Shader_RgbaMultiply<true, false, true, false, false>::processTriangle);

														
 
															-						} else { // Without mipmap

														
 
															-							drawTask = &(Shader_RgbaMultiply<true, false, true, false, true>::processTriangle);

														
 
															-						}

														
 
															-					} else {

														
 
															-						if (colorless) { // Diffuse without normalization

														
 
															-							if (hasDiffusePyramid) { // With mipmap

														
 
															-								drawTask = &(Shader_RgbaMultiply<true, false, false, true, false>::processTriangle);

														
 
															-							} else { // Without mipmap

														
 
															-								drawTask = &(Shader_RgbaMultiply<true, false, false, true, true>::processTriangle);

														
 
															-							}

														
 
															-						} else { // Diffuse

														
 
															-							if (hasDiffusePyramid) { // With mipmap

														
 
															-								drawTask = &(Shader_RgbaMultiply<true, false, false, false, false>::processTriangle);

														
 
															-							} else { // Without mipmap

														
 
															-								drawTask = &(Shader_RgbaMultiply<true, false, false, false, true>::processTriangle);

														
 
															-							}

														
 
															-						}

														
 
															-					}

														
 
															-				}

														
 
															-			} else {

														
 
															-				if (light) {

														
 
															-					if (hasVertexFade) { // LightVertex

														
 
															-						drawTask = &(Shader_RgbaMultiply<false, true, true, false, false>::processTriangle);

														
 
															-					} else {

														
 
															-						if (colorless) { // Light without normalization

														
 
															-							drawTask = &(Shader_RgbaMultiply<false, true, false, true, false>::processTriangle);

														
 
															-						} else { // Light

														
 
															-							drawTask = &(Shader_RgbaMultiply<false, true, false, false, false>::processTriangle);

														
 
															-						}

														
 
															-					}

														
 
															-				} else {

														
 
															-					if (hasVertexFade) { // Vertex

														
 
															-						drawTask = &(Shader_RgbaMultiply<false, false, true, false, false>::processTriangle);

														
 
															-					} else { // Single color

														
 
															-						drawTask = &(Shader_RgbaMultiply<false, false, false, false, false>::processTriangle);

														
 
															-					}

														
 
															-				}

														
 
															-			}

														
 
															-			renderTriangleWithShader(commandQueue, TriangleDrawData(targetImage, depthBuffer, camera.perspective, filter, TriangleInput(diffuse, light, texCoords, colors), drawTask), camera, triangle, clipBound);

														
 
															+			renderTriangleWithShader(commandQueue, TriangleDrawData(targetImage, depthBuffer, camera.perspective, filter, TriangleInput(diffuse, light, texCoords, colors), &processTriangle_RgbaMultiply), camera, triangle, clipBound);

														
 
															 		}

														
 
															 	}

														
 
															 }

														
--- a/Source/DFPSR/render/shader/RgbaMultiply.h
+++ b/Source/DFPSR/render/shader/RgbaMultiply.h
@@ -29,13 +29,12 @@
 
															 #include <cassert>

														
 
															 #include <algorithm>

														
 
															 #include "Shader.h"

														
 
															+#include "fillerTemplates.h"

														
 
															 #include "../../image/ImageRgbaU8.h"

														
 
															 namespace dsr {

														
 
															-template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>

														
 
															-class Shader_RgbaMultiply : public Shader {

														
 
															-private:

														
 
															+struct RgbaMultiply_data {

														
 
															 	const TextureRgba *diffuseMap; // Mip-mapping is allowed for diffuse textures.

														
 
															 	const TextureRgba *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.

														
 
															 	// Planar format with each vector representing the three triangle corners

														
@@ -44,67 +43,128 @@ private:
 
															 	// Normalize the color product by pre-multiplying the vertex colors

														
 
															 	float getVertexScale() {

														
 
															 		float result = 255.0f; // Scale from normalized to byte for the output

														
 
															-		if (HAS_DIFFUSE_MAP) {

														
 
															+		if (this->diffuseMap) {

														
 
															 			result *= 1.0f / 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color

														
 
															 		}

														
 
															-		if (HAS_LIGHT_MAP) {

														
 
															+		if (this->lightMap) {

														
 
															 			result *= 1.0f / 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color

														
 
															 		}

														
 
															 		return result;

														
 
															 	}

														
 
															-	explicit Shader_RgbaMultiply(const TriangleInput &triangleInput) :

														
 
															+	explicit RgbaMultiply_data(const TriangleInput &triangleInput) :

														
 
															 	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),

														
 
															 	  lightMap(triangleInput.lightImage ? &(triangleInput.lightImage->texture) : nullptr),

														
 
															 	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {

														
 
															 		// Texture coordinates must be on the positive side to allow using truncation as a floor function

														
 
															-		if (HAS_DIFFUSE_MAP) {

														
 
															+		if (this->diffuseMap) {

														
 
															 			assert(this->diffuseMap != nullptr); // Cannot sample null

														
 
															 			assert(this->diffuseMap->exists()); // Cannot sample regular images

														
 
															 		}

														
 
															-		if (HAS_LIGHT_MAP) {

														
 
															+		if (this->lightMap) {

														
 
															 			assert(this->lightMap != nullptr); // Cannot sample null

														
 
															 			assert(this->lightMap->exists()); // Cannot sample regular images

														
 
															 		}

														
 
															 	}

														
 
															-public:

														
 
															-	// The process method to take a function pointer to.

														
 
															-	//    Must have the same signature as drawCallbackTemplate in Shader.h.

														
 
															-	static void processTriangle(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {

														
 
															-		Shader_RgbaMultiply tempShader(triangleInput);

														
 
															-		tempShader.fillShape(colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+};

														
 
															+

														
 
															+template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>

														
 
															+static Rgba_F32 getPixels_2x2(void *data, const F32x4x3 &vertexWeights) {

														
 
															+	if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {

														
 
															+		// Optimized for diffuse only

														
 
															+		F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);

														
 
															+		F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);

														
 
															+		return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);

														
 
															+	} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {

														
 
															+		// Optimized for light only

														
 
															+		F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);

														
 
															+		F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);

														
 
															+		return shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);

														
 
															+	} else {

														
 
															+		// Interpolate the vertex color

														
 
															+		Rgba_F32 color = HAS_VERTEX_FADING ?

														
 
															+		  shaderMethods::interpolateVertexColor(((RgbaMultiply_data*)data)->colors.red, ((RgbaMultiply_data*)data)->colors.green, ((RgbaMultiply_data*)data)->colors.blue, ((RgbaMultiply_data*)data)->colors.alpha, vertexWeights) :

														
 
															+		  Rgba_F32(F32x4(((RgbaMultiply_data*)data)->colors.red.x), F32x4(((RgbaMultiply_data*)data)->colors.green.x), F32x4(((RgbaMultiply_data*)data)->colors.blue.x), F32x4(((RgbaMultiply_data*)data)->colors.alpha.x));

														
 
															+		// Sample diffuse

														
 
															+		if (HAS_DIFFUSE_MAP) {

														
 
															+			F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);

														
 
															+			F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);

														
 
															+			color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);

														
 
															+		}

														
 
															+		// Sample lightmap

														
 
															+		if (HAS_LIGHT_MAP) {

														
 
															+			F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);

														
 
															+			F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);

														
 
															+			color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);

														
 
															+		}

														
 
															+		return color;

														
 
															 	}

														
 
															-	Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const override {

														
 
															-		if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {

														
 
															-			// Optimized for diffuse only

														
 
															-			F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

														
 
															-			F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

														
 
															-			return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);

														
 
															-		} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {

														
 
															-			// Optimized for light only

														
 
															-			F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

														
 
															-			F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

														
 
															-			return shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);

														
 
															+}

														
 
															+

														
 
															+// The process method to take a function pointer to.

														
 
															+//    Must have the same signature as drawCallbackTemplate in Shader.h.

														
 
															+static void processTriangle_RgbaMultiply(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {

														
 
															+	RgbaMultiply_data data = RgbaMultiply_data(triangleInput);

														
 
															+	bool hasVertexFade = !(almostSame(data.colors.red) && almostSame(data.colors.green) && almostSame(data.colors.blue) && almostSame(data.colors.alpha));

														
 
															+	bool colorless = almostOne(data.colors.red) && almostOne(data.colors.green) && almostOne(data.colors.blue) && almostOne(data.colors.alpha);

														
 
															+	if (data.diffuseMap) {

														
 
															+		bool hasDiffusePyramid = data.diffuseMap->hasMipBuffer();

														
 
															+		if (data.lightMap) {

														
 
															+			if (hasVertexFade) { // DiffuseLightVertex

														
 
															+				if (hasDiffusePyramid) { // With mipmap

														
 
															+					fillShape(&data, getPixels_2x2<true, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				} else { // Without mipmap

														
 
															+					fillShape(&data, getPixels_2x2<true, true, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				}

														
 
															+			} else { // DiffuseLight

														
 
															+				if (hasDiffusePyramid) { // With mipmap

														
 
															+					fillShape(&data, getPixels_2x2<true, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				} else { // Without mipmap

														
 
															+					fillShape(&data, getPixels_2x2<true, true, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				}

														
 
															+			}

														
 
															 		} else {

														
 
															-			// Interpolate the vertex color

														
 
															-			Rgba_F32 color = HAS_VERTEX_FADING ?

														
 
															-			  shaderMethods::interpolateVertexColor(this->colors.red, this->colors.green, this->colors.blue, this->colors.alpha, vertexWeights) :

														
 
															-			  Rgba_F32(F32x4(this->colors.red.x), F32x4(this->colors.green.x), F32x4(this->colors.blue.x), F32x4(this->colors.alpha.x));

														
 
															-			// Sample diffuse

														
 
															-			if (HAS_DIFFUSE_MAP) {

														
 
															-				F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

														
 
															-				F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

														
 
															-				color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);

														
 
															+			if (hasVertexFade) { // DiffuseVertex

														
 
															+				if (hasDiffusePyramid) { // With mipmap

														
 
															+					fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				} else { // Without mipmap

														
 
															+					fillShape(&data, getPixels_2x2<true, false, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				}

														
 
															+			} else {

														
 
															+				if (colorless) { // Diffuse without normalization

														
 
															+					if (hasDiffusePyramid) { // With mipmap

														
 
															+						fillShape(&data, getPixels_2x2<true, false, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+					} else { // Without mipmap

														
 
															+					fillShape(&data, getPixels_2x2<true, false, false, true, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+					}

														
 
															+				} else { // Diffuse

														
 
															+					if (hasDiffusePyramid) { // With mipmap

														
 
															+						fillShape(&data, getPixels_2x2<true, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+					} else { // Without mipmap

														
 
															+						fillShape(&data, getPixels_2x2<true, false, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+					}

														
 
															+				}

														
 
															 			}

														
 
															-			// Sample lightmap

														
 
															-			if (HAS_LIGHT_MAP) {

														
 
															-				F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

														
 
															-				F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

														
 
															-				color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);

														
 
															+		}

														
 
															+	} else {

														
 
															+		if (data.lightMap) {

														
 
															+			if (hasVertexFade) { // LightVertex

														
 
															+				fillShape(&data, getPixels_2x2<false, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+			} else {

														
 
															+				if (colorless) { // Light without normalization

														
 
															+					fillShape(&data, getPixels_2x2<false, true, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				} else { // Light

														
 
															+					fillShape(&data, getPixels_2x2<false, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+				}

														
 
															+			}

														
 
															+		} else {

														
 
															+			if (hasVertexFade) { // Vertex

														
 
															+				fillShape(&data, getPixels_2x2<false, false, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															+			} else { // Single color

														
 
															+				fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);

														
 
															 			}

														
 
															-			return color;

														
 
															 		}

														
 
															 	}

														
 
															-};

														
 
															+}

														
 
															 }

														
--- a/Source/DFPSR/render/shader/Shader.h
+++ b/Source/DFPSR/render/shader/Shader.h
@@ -1,6 +1,6 @@
 
															 // zlib open source license

														
 
															 //

														
 
															-// Copyright (c) 2017 to 2019 David Forsgren Piuva

														
 
															+// Copyright (c) 2017 to 2023 David Forsgren Piuva

														
 
															 // 

														
 
															 // This software is provided 'as-is', without any express or implied

														
 
															 // warranty. In no event will the authors be held liable for any damages

														
@@ -66,15 +66,7 @@ struct TriangleInput {
 
															 // The template for function pointers doing the work

														
 
															 inline void drawCallbackTemplate(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {}

														
 
															-#define DRAW_CALLBACK_TYPE decltype(&drawCallbackTemplate)

														
 
															-

														
 
															-// Inherit this class for pixel shaders

														
 
															-class Shader {

														
 
															-public:

														
 
															-	void fillShape(ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter);

														
 
															-	// The main call that defines the pixel shader

														
 
															-	virtual Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const = 0;

														
 
															-};

														
 
															+using DRAW_CALLBACK_TYPE = decltype(&drawCallbackTemplate);

														
 
															 }

														
--- a/Source/DFPSR/render/shader/fillerTemplates.h
+++ b/Source/DFPSR/render/shader/fillerTemplates.h
@@ -1,434 +1,463 @@
 
															-// zlib open source license

														
 
															-//

														
 
															-// Copyright (c) 2017 to 2019 David Forsgren Piuva

														
 
															-// 

														
 
															-// This software is provided 'as-is', without any express or implied

														
 
															-// warranty. In no event will the authors be held liable for any damages

														
 
															-// arising from the use of this software.

														
 
															-// 

														
 
															-// Permission is granted to anyone to use this software for any purpose,

														
 
															-// including commercial applications, and to alter it and redistribute it

														
 
															-// freely, subject to the following restrictions:

														
 
															-// 

														
 
															-//    1. The origin of this software must not be misrepresented; you must not

														
 
															-//    claim that you wrote the original software. If you use this software

														
 
															-//    in a product, an acknowledgment in the product documentation would be

														
 
															-//    appreciated but is not required.

														
 
															-// 

														
 
															-//    2. Altered source versions must be plainly marked as such, and must not be

														
 
															-//    misrepresented as being the original software.

														
 
															-// 

														
 
															-//    3. This notice may not be removed or altered from any source

														
 
															-//    distribution.

														
 
															-

														
 
															-#include "Shader.h"

														
 
															-#include <stdio.h>

														
 
															-#include <algorithm>

														
 
															-#include "../../image/internal/imageInternal.h"

														
 
															-#include "../../image/ImageRgbaU8.h"

														
 
															-#include "../../image/ImageF32.h"

														
 
															-

														
 
															-using namespace dsr;

														
 
															-

														
 
															-inline static const uint32_t roundUpEven(uint32_t x) {

														
 
															-	return (x + 1u) & ~1u;

														
 
															-}

														
 
															-

														
 
															-inline static const uint32_t roundDownEven(uint32_t x) {

														
 
															-	return x & ~1u;

														
 
															-}

														
 
															-

														
 
															-template<bool CLIP_SIDES>

														
 
															-static inline U32x4 clippedRead(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3) {

														
 
															-	if (CLIP_SIDES) {

														
 
															-		return U32x4(vis0 ? upperLeft[0] : 0, vis1 ? upperLeft[1] : 0, vis2 ? lowerLeft[0] : 0, vis3 ? lowerLeft[1] : 0);

														
 
															-	} else {

														
 
															-		return U32x4(upperLeft[0], upperLeft[1], lowerLeft[0], lowerLeft[1]);

														
 
															-	}

														
 
															-}

														
 
															-

														
 
															-static inline void clippedWrite(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, U32x4 vColor) {

														
 
															-	// Read back SIMD vector to scalar type

														
 
															-	UVector4D color = vColor.get();

														
 
															-	// Write colors for visible pixels

														
 
															-	if (vis0) { upperLeft[0] = color.x; }

														
 
															-	if (vis1) { upperLeft[1] = color.y; }

														
 
															-	if (vis2) { lowerLeft[0] = color.z; }

														
 
															-	if (vis3) { lowerLeft[1] = color.w; }

														
 
															-}

														
 
															-

														
 
															-static inline void clippedWrite(SafePointer<float> upperLeft, SafePointer<float> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, FVector4D depth) {

														
 
															-	// Write colors for visible pixels

														
 
															-	if (vis0) { upperLeft[0] = depth.x; }

														
 
															-	if (vis1) { upperLeft[1] = depth.y; }

														
 
															-	if (vis2) { lowerLeft[0] = depth.z; }

														
 
															-	if (vis3) { lowerLeft[1] = depth.w; }

														
 
															-}

														
 
															-

														
 
															-template<bool CLIP_SIDES>

														
 
															-static inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lowerRow, bool &clip0, bool &clip1, bool &clip2, bool &clip3) {

														
 
															-	if (CLIP_SIDES) {

														
 
															-		int x2 = x + 1;

														
 
															-		clip0 = x >= upperRow.left && x < upperRow.right;

														
 
															-		clip1 = x2 >= upperRow.left && x2 < upperRow.right;

														
 
															-		clip2 = x >= lowerRow.left && x < lowerRow.right;

														
 
															-		clip3 = x2 >= lowerRow.left && x2 < lowerRow.right;

														
 
															-	} else {

														
 
															-		clip0 = true;

														
 
															-		clip1 = true;

														
 
															-		clip2 = true;

														
 
															-		clip3 = true;

														
 
															-	}

														
 
															-}

														
 
															-

														
 
															-template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>

														
 
															-static inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {

														
 
															-	// Clip pixels

														
 
															-	bool clip0, clip1, clip2, clip3;

														
 
															-	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);

														
 
															-	// Compare to depth buffer

														
 
															-	bool front0, front1, front2, front3;

														
 
															-	if (DEPTH_READ) {

														
 
															-		if (AFFINE) {

														
 
															-			if (CLIP_SIDES) {

														
 
															-				front0 = clip0 ? depth.x < depthDataUpper[0] : false;

														
 
															-				front1 = clip1 ? depth.y < depthDataUpper[1] : false;

														
 
															-				front2 = clip2 ? depth.z < depthDataLower[0] : false;

														
 
															-				front3 = clip3 ? depth.w < depthDataLower[1] : false;

														
 
															-			} else {

														
 
															-				front0 = depth.x < depthDataUpper[0];

														
 
															-				front1 = depth.y < depthDataUpper[1];

														
 
															-				front2 = depth.z < depthDataLower[0];

														
 
															-				front3 = depth.w < depthDataLower[1];

														
 
															-			}

														
 
															-		} else {

														
 
															-			if (CLIP_SIDES) {

														
 
															-				front0 = clip0 ? depth.x > depthDataUpper[0] : false;

														
 
															-				front1 = clip1 ? depth.y > depthDataUpper[1] : false;

														
 
															-				front2 = clip2 ? depth.z > depthDataLower[0] : false;

														
 
															-				front3 = clip3 ? depth.w > depthDataLower[1] : false;

														
 
															-			} else {

														
 
															-				front0 = depth.x > depthDataUpper[0];

														
 
															-				front1 = depth.y > depthDataUpper[1];

														
 
															-				front2 = depth.z > depthDataLower[0];

														
 
															-				front3 = depth.w > depthDataLower[1];

														
 
															-			}

														
 
															-		}

														
 
															-	} else {

														
 
															-		front0 = true;

														
 
															-		front1 = true;

														
 
															-		front2 = true;

														
 
															-		front3 = true;

														
 
															-	}

														
 
															-	// Decide visibility

														
 
															-	vis0 = clip0 && front0;

														
 
															-	vis1 = clip1 && front1;

														
 
															-	vis2 = clip2 && front2;

														
 
															-	vis3 = clip3 && front3;

														
 
															-}

														
 
															-

														
 
															-template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>

														
 
															-inline static void fillQuadSuper(const Shader& shader, int x, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder, const FVector4D &depth, const F32x4x3 &weights) {

														
 
															-	// Get visibility

														
 
															-	bool vis0, vis1, vis2, vis3;

														
 
															-	getVisibility<CLIP_SIDES, DEPTH_READ, AFFINE>(x, upperRow, lowerRow, depth, depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3);

														
 
															-	// Draw if something is visible

														
 
															-	if (vis0 || vis1 || vis2 || vis3) {

														
 
															-		if (COLOR_WRITE) {

														
 
															-			// Get the color

														
 
															-			U32x4 packedColor(0u); // Allow uninitialized memory?

														
 
															-			// Execute the shader

														
 
															-			Rgba_F32 planarSourceColor = shader.getPixels_2x2(weights);

														
 
															-			// Apply alpha filtering

														
 
															-			if (FILTER == Filter::Alpha) {

														
 
															-				// Get opacity from the source color

														
 
															-				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);

														
 
															-				// Read the packed colors for alpha blending

														
 
															-				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);

														
 
															-				// Unpack the target color into planar RGBA format so that it can be mixed with the source color

														
 
															-				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);

														
 
															-				// Blend linearly using floats

														
 
															-				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));

														
 
															-			}

														
 
															-			// Apply channel swapping while packing to bytes

														
 
															-			packedColor = planarSourceColor.toSaturatedByte(targetPackingOrder);

														
 
															-			// Write colors

														
 
															-			clippedWrite(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3, packedColor);

														
 
															-		}

														
 
															-		// Write depth for visible pixels

														
 
															-		if (DEPTH_WRITE) {

														
 
															-			clippedWrite(depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3, depth);

														
 
															-		}

														
 
															-	}

														
 
															-}

														
 
															-

														
 
															-// CLIP_SIDES will use upperRow and lowerRow to clip pixels based on the x value. Only x values inside the ranges can be drawn.

														
 
															-//   This is used along the triangle edges.

														
 
															-// COLOR_WRITE can be disabled to skip writing to the color buffer. Usually when none is given.

														
 
															-// DEPTH_READ can be disabled to draw without caring if there is something already closer in the depth buffer.

														
 
															-// DEPTH_WRITE can be disabled to skip writing to the depth buffer so that it does not occlude following draw calls.

														
 
															-// FILTER can be set to Filter::Alpha to use the output alpha as the opacity.

														
 
															-template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>

														
 
															-static inline void fillRowSuper(const Shader& shader, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {

														
 
															-	if (AFFINE) {

														
 
															-		FVector3D dx2 = pWeightDx * 2.0f;

														
 
															-		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

														
 
															-		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

														
 
															-		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

														
 
															-		for (int x = startX; x < endX; x += 2) {

														
 
															-			// Get the linear depth

														
 
															-			FVector4D depth = vLinearDepth.get();

														
 
															-			// Calculate the weight of the first vertex from the other two

														
 
															-			F32x4 weightA = 1.0f - (weightB + weightC);

														
 
															-			F32x4x3 weights(weightA, weightB, weightC);

														
 
															-			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);

														
 
															-			// Iterate projection

														
 
															-			vLinearDepth = vLinearDepth + dx2.x;

														
 
															-			weightB = weightB + dx2.y;

														
 
															-			weightC = weightC + dx2.z;

														
 
															-			// Iterate buffer pointers

														
 
															-			pixelDataUpper += 2; pixelDataLower += 2;

														
 
															-			depthDataUpper += 2; depthDataLower += 2;

														
 
															-		}

														
 
															-	} else {

														
 
															-		FVector3D dx2 = pWeightDx * 2.0f;

														
 
															-		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

														
 
															-		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

														
 
															-		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

														
 
															-		for (int x = startX; x < endX; x += 2) {

														
 
															-			// Get the reciprocal depth

														
 
															-			FVector4D depth = vRecDepth.get();

														
 
															-			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...

														
 
															-			// Divide 1 by 1 / W to get the linear depth W

														
 
															-			F32x4 vLinearDepth = vRecDepth.reciprocal();

														
 
															-			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.

														
 
															-			F32x4 weightB = vRecU * vLinearDepth;

														
 
															-			F32x4 weightC = vRecV * vLinearDepth;

														
 
															-			// Calculate the weight of the first vertex from the other two

														
 
															-			F32x4 weightA = 1.0f - (weightB + weightC);

														
 
															-			F32x4x3 weights(weightA, weightB, weightC);

														
 
															-			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);

														
 
															-			// Iterate projection

														
 
															-			vRecDepth = vRecDepth + dx2.x;

														
 
															-			vRecU = vRecU + dx2.y;

														
 
															-			vRecV = vRecV + dx2.z;

														
 
															-			// Iterate buffer pointers

														
 
															-			pixelDataUpper += 2; pixelDataLower += 2;

														
 
															-			depthDataUpper += 2; depthDataLower += 2;

														
 
															-		}

														
 
															-	}

														
 
															-}

														
 
															-

														
 
															-template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>

														
 
															-inline static void fillShapeSuper(const Shader& shader, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {

														
 
															-	// Prepare constants

														
 
															-	const int targetStride = imageInternal::getStride(colorBuffer);

														
 
															-	const int depthBufferStride = imageInternal::getStride(depthBuffer);

														
 
															-	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;

														
 
															-	const int colorRowSize = imageInternal::getRowSize(colorBuffer);

														
 
															-	const int depthRowSize = imageInternal::getRowSize(depthBuffer);

														
 
															-	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);

														
 
															-	const int colorHeight = imageInternal::getHeight(colorBuffer);

														
 
															-	const int depthHeight = imageInternal::getHeight(depthBuffer);

														
 
															-	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;

														
 
															-

														
 
															-	// Initialize row pointers for color buffer

														
 
															-	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;

														
 
															-	if (COLOR_WRITE) {

														
 
															-		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);

														
 
															-		pixelDataUpperRow = targetData;

														
 
															-		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);

														
 
															-		pixelDataLowerRow = targetData;

														
 
															-		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);

														
 
															-	} else {

														
 
															-		pixelDataUpperRow = SafePointer<uint32_t>();

														
 
															-		pixelDataLowerRow = SafePointer<uint32_t>();

														
 
															-	}

														
 
															-

														
 
															-	// Initialize row pointers for depth buffer

														
 
															-	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;

														
 
															-	if (DEPTH_READ || DEPTH_WRITE) {

														
 
															-		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);

														
 
															-		depthDataUpperRow = depthBufferData;

														
 
															-		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);

														
 
															-		depthDataLowerRow = depthBufferData;

														
 
															-		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);

														
 
															-	} else {

														
 
															-		depthDataUpperRow = SafePointer<float>();

														
 
															-		depthDataLowerRow = SafePointer<float>();

														
 
															-	}

														
 
															-	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {

														
 
															-		int y2 = y1 + 1;

														
 
															-		RowInterval upperRow = shape.rows[y1 - shape.startRow];

														
 
															-		RowInterval lowerRow = shape.rows[y2 - shape.startRow];

														
 
															-		int outerStart = min(upperRow.left, lowerRow.left);

														
 
															-		int outerEnd = max(upperRow.right, lowerRow.right);

														
 
															-		int innerStart = max(upperRow.left, lowerRow.left);

														
 
															-		int innerEnd = min(upperRow.right, lowerRow.right);

														
 
															-		// Round exclusive intervals to multiples of two pixels

														
 
															-		int outerBlockStart = roundDownEven(outerStart);

														
 
															-		int outerBlockEnd = roundUpEven(outerEnd);

														
 
															-		int innerBlockStart = roundUpEven(innerStart);

														
 
															-		int innerBlockEnd = roundDownEven(innerEnd);

														
 
															-		// Clip last row if outside on odd height

														
 
															-		if (y2 >= maxHeight) {

														
 
															-			lowerRow.right = lowerRow.left;

														
 
															-		}

														
 
															-		// Avoid reading outside of the given bound

														
 
															-		bool hasTop = upperRow.right > upperRow.left;

														
 
															-		bool hasBottom = lowerRow.right > lowerRow.left;

														
 
															-		if (hasTop || hasBottom) {

														
 
															-			// Initialize pointers

														
 
															-			if (COLOR_WRITE) {

														
 
															-				if (hasTop) {

														
 
															-					pixelDataUpper = pixelDataUpperRow.slice("pixelDataUpper", 0, colorRowSize);

														
 
															-				} else {

														
 
															-					// Repeat the lower row to avoid reading outside

														
 
															-					pixelDataUpper = pixelDataLowerRow.slice("pixelDataUpper (from lower)", 0, colorRowSize);

														
 
															-				}

														
 
															-				if (hasBottom) {

														
 
															-					pixelDataLower = pixelDataLowerRow.slice("pixelDataLower", 0, colorRowSize);

														
 
															-				} else {

														
 
															-					// Repeat the upper row to avoid reading outside

														
 
															-					pixelDataLower = pixelDataUpperRow.slice("pixelDataLower (from upper)", 0, colorRowSize);

														
 
															-				}

														
 
															-				int startColorOffset = outerBlockStart * sizeof(uint32_t);

														
 
															-				pixelDataUpper.increaseBytes(startColorOffset);

														
 
															-				pixelDataLower.increaseBytes(startColorOffset);

														
 
															-			}

														
 
															-			if (DEPTH_READ || DEPTH_WRITE) {

														
 
															-				if (hasTop) {

														
 
															-					depthDataUpper = depthDataUpperRow.slice("depthDataUpper", 0, depthRowSize);

														
 
															-				} else {

														
 
															-					// Repeat the upper row to avoid reading outside

														
 
															-					depthDataUpper = depthDataLowerRow.slice("depthDataUpper (from lower)", 0, depthRowSize);

														
 
															-				}

														
 
															-				if (hasBottom) {

														
 
															-					depthDataLower = depthDataLowerRow.slice("depthDataLower", 0, depthRowSize);

														
 
															-				} else {

														
 
															-					// Repeat the upper row to avoid reading outside

														
 
															-					depthDataLower = depthDataUpperRow.slice("depthDataLower (from upper)", 0, depthRowSize);

														
 
															-				}

														
 
															-				depthDataUpper += outerBlockStart;

														
 
															-				depthDataLower += outerBlockStart;

														
 
															-			} else {

														
 
															-				depthDataUpper = SafePointer<float>();

														
 
															-				depthDataLower = SafePointer<float>();

														
 
															-			}

														
 
															-			// Initialize projection

														
 
															-			FVector3D pWeightUpperRow;

														
 
															-			if (AFFINE) {

														
 
															-				pWeightUpperRow = projection.getWeight_affine(IVector2D(outerBlockStart, y1));

														
 
															-			} else {

														
 
															-				pWeightUpperRow = projection.getDepthDividedWeight_perspective(IVector2D(outerBlockStart, y1));

														
 
															-			}

														
 
															-			FVector3D pWeightUpper = pWeightUpperRow;

														
 
															-			FVector3D pWeightLowerRow = pWeightUpperRow + projection.pWeightDy;

														
 
															-			FVector3D pWeightLower = pWeightLowerRow;

														
 
															-			// Render the pixels

														
 
															-			if (innerBlockEnd <= innerBlockStart) {

														
 
															-				// Clipped from left and right

														
 
															-				for (int32_t x = outerBlockStart; x < outerBlockEnd; x += 2) {

														
 
															-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

														
 
															-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);

														
 
															-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }

														
 
															-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }

														
 
															-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;

														
 
															-				}

														
 
															-			} else {

														
 
															-				// Left edge

														
 
															-				for (int32_t x = outerBlockStart; x < innerBlockStart; x += 2) {

														
 
															-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

														
 
															-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);

														
 
															-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }

														
 
															-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }

														
 
															-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;

														
 
															-				}

														
 
															-				// Full quads

														
 
															-				int width = innerBlockEnd - innerBlockStart;

														
 
															-				int quadCount = width / 2;

														
 
															-				fillRowSuper<false, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

														
 
															-				  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, innerBlockStart, innerBlockEnd, RowInterval(), RowInterval(), targetPackingOrder);

														
 
															-				if (COLOR_WRITE) { pixelDataUpper += 2 * quadCount; pixelDataLower += 2 * quadCount; }

														
 
															-				if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2 * quadCount; depthDataLower += 2 * quadCount; }

														
 
															-				pWeightUpper = pWeightUpper + (doublePWeightDx * quadCount); pWeightLower = pWeightLower + (doublePWeightDx * quadCount);

														
 
															-				// Right edge

														
 
															-				for (int32_t x = innerBlockEnd; x < outerBlockEnd; x += 2) {

														
 
															-					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>

														
 
															-					  (shader, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);

														
 
															-					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }

														
 
															-					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }

														
 
															-					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;

														
 
															-				}

														
 
															-			}

														
 
															-		}

														
 
															-		// Iterate to the next row

														
 
															-		if (COLOR_WRITE) {

														
 
															-			pixelDataUpperRow.increaseBytes(targetStride * 2);

														
 
															-			pixelDataLowerRow.increaseBytes(targetStride * 2);

														
 
															-		}

														
 
															-		if (DEPTH_READ || DEPTH_WRITE) {

														
 
															-			depthDataUpperRow.increaseBytes(depthBufferStride * 2);

														
 
															-			depthDataLowerRow.increaseBytes(depthBufferStride * 2);

														
 
															-		}

														
 
															-	}

														
 
															-}

														
 
															-

														
 
															-void Shader::fillShape(ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {

														
 
															-	bool hasColorBuffer = colorBuffer != nullptr;

														
 
															-	bool hasDepthBuffer = depthBuffer != nullptr;

														
 
															-	if (projection.affine) {

														
 
															-		if (hasDepthBuffer) {

														
 
															-			if (hasColorBuffer) {

														
 
															-				if (filter != Filter::Solid) {

														
 
															-					// Alpha filtering with read only depth buffer

														
 
															-					fillShapeSuper<true, true, false, Filter::Alpha, true>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

														
 
															-				} else {

														
 
															-					// Solid with depth buffer

														
 
															-					fillShapeSuper<true, true, true, Filter::Solid, true>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

														
 
															-				}

														
 
															-			} else {

														
 
															-				// Solid depth

														
 
															-				// TODO: Use for orthogonal depth based shadows

														
 
															-				fillShapeSuper<false, true, true, Filter::Solid, true>(*this, nullptr, depthBuffer, triangle, projection, shape);

														
 
															-			}

														
 
															-		} else {

														
 
															-			if (hasColorBuffer) {

														
 
															-				if (filter != Filter::Solid) {

														
 
															-					// Alpha filtering without depth buffer

														
 
															-					fillShapeSuper<true, false, false, Filter::Alpha, true>(*this, colorBuffer, nullptr, triangle, projection, shape);

														
 
															-				} else {

														
 
															-					// Solid without depth buffer

														
 
															-					fillShapeSuper<true, false, false, Filter::Solid, true>(*this, colorBuffer, nullptr, triangle, projection, shape);

														
 
															-				}

														
 
															-			}

														
 
															-		}

														
 
															-	} else {

														
 
															-		if (hasDepthBuffer) {

														
 
															-			if (hasColorBuffer) {

														
 
															-				if (filter != Filter::Solid) {

														
 
															-					// Alpha filtering with read only depth buffer

														
 
															-					fillShapeSuper<true, true, false, Filter::Alpha, false>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

														
 
															-				} else {

														
 
															-					// Solid with depth buffer

														
 
															-					fillShapeSuper<true, true, true, Filter::Solid, false>(*this, colorBuffer, depthBuffer, triangle, projection, shape);

														
 
															-				}

														
 
															-			} else {

														
 
															-				// Solid depth

														
 
															-				// TODO: Use for depth based shadows with perspective projection

														
 
															-				fillShapeSuper<false, true, true, Filter::Solid, false>(*this, nullptr, depthBuffer, triangle, projection, shape);

														
 
															-			}

														
 
															-		} else {

														
 
															-			if (hasColorBuffer) {

														
 
															-				if (filter != Filter::Solid) {

														
 
															-					// Alpha filtering without depth buffer

														
 
															-					fillShapeSuper<true, false, false, Filter::Alpha, false>(*this, colorBuffer, nullptr, triangle, projection, shape);

														
 
															-				} else {

														
 
															-					// Solid without depth buffer

														
 
															-					fillShapeSuper<true, false, false, Filter::Solid, false>(*this, colorBuffer, nullptr, triangle, projection, shape);

														
 
															-				}

														
 
															-			}

														
 
															-		}

														
 
															-	}

														
 
															-}

														
 
															-

														
 
															+// zlib open source license
														
 
															+//
														
 
															+// Copyright (c) 2017 to 2023 David Forsgren Piuva
														
 
															+// 
														
 
															+// This software is provided 'as-is', without any express or implied
														
 
															+// warranty. In no event will the authors be held liable for any damages
														
 
															+// arising from the use of this software.
														
 
															+// 
														
 
															+// Permission is granted to anyone to use this software for any purpose,
														
 
															+// including commercial applications, and to alter it and redistribute it
														
 
															+// freely, subject to the following restrictions:
														
 
															+// 
														
 
															+//    1. The origin of this software must not be misrepresented; you must not
														
 
															+//    claim that you wrote the original software. If you use this software
														
 
															+//    in a product, an acknowledgment in the product documentation would be
														
 
															+//    appreciated but is not required.
														
 
															+// 
														
 
															+//    2. Altered source versions must be plainly marked as such, and must not be
														
 
															+//    misrepresented as being the original software.
														
 
															+// 
														
 
															+//    3. This notice may not be removed or altered from any source
														
 
															+//    distribution.
														
 
															+
														
 
															+#ifndef DFPSR_RENDER_FILLER_TEMPLATES
														
 
															+#define DFPSR_RENDER_FILLER_TEMPLATES
														
 
															+
														
 
															+#include <stdint.h>
														
 
															+#include "../../image/PackOrder.h"
														
 
															+#include "../../image/ImageRgbaU8.h"
														
 
															+#include "../../image/ImageF32.h"
														
 
															+#include "../ITriangle2D.h"
														
 
															+#include "shaderTypes.h"
														
 
															+
														
 
															+namespace dsr {
														
 
															+
														
 
															+// Function for filling pixels
														
 
															+using PixelShadingCallback = std::function<Rgba_F32(void *data, const F32x4x3 &vertexWeights)>;
														
 
															+
														
 
															+inline bool almostZero(float value) {
														
 
															+	return value > -0.001f && value < 0.001f;
														
 
															+}
														
 
															+
														
 
															+inline bool almostZero(const FVector3D &channel) {
														
 
															+	return almostZero(channel.x) && almostZero(channel.y) && almostZero(channel.z);
														
 
															+}
														
 
															+
														
 
															+inline bool almostOne(float value) {
														
 
															+	return value > 0.999f && value < 1.001f;
														
 
															+}
														
 
															+
														
 
															+inline bool almostOne(const FVector3D &channel) {
														
 
															+	return almostOne(channel.x) && almostOne(channel.y) && almostOne(channel.z);
														
 
															+}
														
 
															+
														
 
															+inline bool almostSame(const FVector3D &channel) {
														
 
															+	return almostZero(channel.x - channel.y) && almostZero(channel.x - channel.z) && almostZero(channel.y - channel.z);
														
 
															+}
														
 
															+
														
 
															+inline const uint32_t roundUpEven(uint32_t x) {
														
 
															+	return (x + 1u) & ~1u;
														
 
															+}
														
 
															+
														
 
															+inline const uint32_t roundDownEven(uint32_t x) {
														
 
															+	return x & ~1u;
														
 
															+}
														
 
															+
														
 
															+template<bool CLIP_SIDES>
														
 
															+inline U32x4 clippedRead(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3) {
														
 
															+	if (CLIP_SIDES) {
														
 
															+		return U32x4(vis0 ? upperLeft[0] : 0, vis1 ? upperLeft[1] : 0, vis2 ? lowerLeft[0] : 0, vis3 ? lowerLeft[1] : 0);
														
 
															+	} else {
														
 
															+		return U32x4(upperLeft[0], upperLeft[1], lowerLeft[0], lowerLeft[1]);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+inline void clippedWrite(SafePointer<uint32_t> upperLeft, SafePointer<uint32_t> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, U32x4 vColor) {
														
 
															+	// Read back SIMD vector to scalar type
														
 
															+	UVector4D color = vColor.get();
														
 
															+	// Write colors for visible pixels
														
 
															+	if (vis0) { upperLeft[0] = color.x; }
														
 
															+	if (vis1) { upperLeft[1] = color.y; }
														
 
															+	if (vis2) { lowerLeft[0] = color.z; }
														
 
															+	if (vis3) { lowerLeft[1] = color.w; }
														
 
															+}
														
 
															+
														
 
															+inline void clippedWrite(SafePointer<float> upperLeft, SafePointer<float> lowerLeft, bool vis0, bool vis1, bool vis2, bool vis3, FVector4D depth) {
														
 
															+	// Write colors for visible pixels
														
 
															+	if (vis0) { upperLeft[0] = depth.x; }
														
 
															+	if (vis1) { upperLeft[1] = depth.y; }
														
 
															+	if (vis2) { lowerLeft[0] = depth.z; }
														
 
															+	if (vis3) { lowerLeft[1] = depth.w; }
														
 
															+}
														
 
															+
														
 
															+template<bool CLIP_SIDES>
														
 
															+inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lowerRow, bool &clip0, bool &clip1, bool &clip2, bool &clip3) {
														
 
															+	if (CLIP_SIDES) {
														
 
															+		int x2 = x + 1;
														
 
															+		clip0 = x >= upperRow.left && x < upperRow.right;
														
 
															+		clip1 = x2 >= upperRow.left && x2 < upperRow.right;
														
 
															+		clip2 = x >= lowerRow.left && x < lowerRow.right;
														
 
															+		clip3 = x2 >= lowerRow.left && x2 < lowerRow.right;
														
 
															+	} else {
														
 
															+		clip0 = true;
														
 
															+		clip1 = true;
														
 
															+		clip2 = true;
														
 
															+		clip3 = true;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>
														
 
															+inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {
														
 
															+	// Clip pixels
														
 
															+	bool clip0, clip1, clip2, clip3;
														
 
															+	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);
														
 
															+	// Compare to depth buffer
														
 
															+	bool front0, front1, front2, front3;
														
 
															+	if (DEPTH_READ) {
														
 
															+		if (AFFINE) {
														
 
															+			if (CLIP_SIDES) {
														
 
															+				front0 = clip0 ? depth.x < depthDataUpper[0] : false;
														
 
															+				front1 = clip1 ? depth.y < depthDataUpper[1] : false;
														
 
															+				front2 = clip2 ? depth.z < depthDataLower[0] : false;
														
 
															+				front3 = clip3 ? depth.w < depthDataLower[1] : false;
														
 
															+			} else {
														
 
															+				front0 = depth.x < depthDataUpper[0];
														
 
															+				front1 = depth.y < depthDataUpper[1];
														
 
															+				front2 = depth.z < depthDataLower[0];
														
 
															+				front3 = depth.w < depthDataLower[1];
														
 
															+			}
														
 
															+		} else {
														
 
															+			if (CLIP_SIDES) {
														
 
															+				front0 = clip0 ? depth.x > depthDataUpper[0] : false;
														
 
															+				front1 = clip1 ? depth.y > depthDataUpper[1] : false;
														
 
															+				front2 = clip2 ? depth.z > depthDataLower[0] : false;
														
 
															+				front3 = clip3 ? depth.w > depthDataLower[1] : false;
														
 
															+			} else {
														
 
															+				front0 = depth.x > depthDataUpper[0];
														
 
															+				front1 = depth.y > depthDataUpper[1];
														
 
															+				front2 = depth.z > depthDataLower[0];
														
 
															+				front3 = depth.w > depthDataLower[1];
														
 
															+			}
														
 
															+		}
														
 
															+	} else {
														
 
															+		front0 = true;
														
 
															+		front1 = true;
														
 
															+		front2 = true;
														
 
															+		front3 = true;
														
 
															+	}
														
 
															+	// Decide visibility
														
 
															+	vis0 = clip0 && front0;
														
 
															+	vis1 = clip1 && front1;
														
 
															+	vis2 = clip2 && front2;
														
 
															+	vis3 = clip3 && front3;
														
 
															+}
														
 
															+
														
 
															+template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
														
 
															+inline void fillQuadSuper(void *data, PixelShadingCallback pixelShaderFunction, int x, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder, const FVector4D &depth, const F32x4x3 &weights) {
														
 
															+	// Get visibility
														
 
															+	bool vis0, vis1, vis2, vis3;
														
 
															+	getVisibility<CLIP_SIDES, DEPTH_READ, AFFINE>(x, upperRow, lowerRow, depth, depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3);
														
 
															+	// Draw if something is visible
														
 
															+	if (vis0 || vis1 || vis2 || vis3) {
														
 
															+		if (COLOR_WRITE) {
														
 
															+			// Get the color
														
 
															+			U32x4 packedColor(0u); // Allow uninitialized memory?
														
 
															+			// Execute the shader
														
 
															+			Rgba_F32 planarSourceColor = pixelShaderFunction(data, weights);
														
 
															+			// Apply alpha filtering
														
 
															+			if (FILTER == Filter::Alpha) {
														
 
															+				// Get opacity from the source color
														
 
															+				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);
														
 
															+				// Read the packed colors for alpha blending
														
 
															+				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);
														
 
															+				// Unpack the target color into planar RGBA format so that it can be mixed with the source color
														
 
															+				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);
														
 
															+				// Blend linearly using floats
														
 
															+				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));
														
 
															+			}
														
 
															+			// Apply channel swapping while packing to bytes
														
 
															+			packedColor = planarSourceColor.toSaturatedByte(targetPackingOrder);
														
 
															+			// Write colors
														
 
															+			clippedWrite(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3, packedColor);
														
 
															+		}
														
 
															+		// Write depth for visible pixels
														
 
															+		if (DEPTH_WRITE) {
														
 
															+			clippedWrite(depthDataUpper, depthDataLower, vis0, vis1, vis2, vis3, depth);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// CLIP_SIDES will use upperRow and lowerRow to clip pixels based on the x value. Only x values inside the ranges can be drawn.
														
 
															+//   This is used along the triangle edges.
														
 
															+// COLOR_WRITE can be disabled to skip writing to the color buffer. Usually when none is given.
														
 
															+// DEPTH_READ can be disabled to draw without caring if there is something already closer in the depth buffer.
														
 
															+// DEPTH_WRITE can be disabled to skip writing to the depth buffer so that it does not occlude following draw calls.
														
 
															+// FILTER can be set to Filter::Alpha to use the output alpha as the opacity.
														
 
															+template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
														
 
															+inline void fillRowSuper(void *data, PixelShadingCallback pixelShaderFunction, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {
														
 
															+	if (AFFINE) {
														
 
															+		FVector3D dx2 = pWeightDx * 2.0f;
														
 
															+		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
														
 
															+		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
														
 
															+		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
														
 
															+		for (int x = startX; x < endX; x += 2) {
														
 
															+			// Get the linear depth
														
 
															+			FVector4D depth = vLinearDepth.get();
														
 
															+			// Calculate the weight of the first vertex from the other two
														
 
															+			F32x4 weightA = 1.0f - (weightB + weightC);
														
 
															+			F32x4x3 weights(weightA, weightB, weightC);
														
 
															+			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(data, pixelShaderFunction, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
														
 
															+			// Iterate projection
														
 
															+			vLinearDepth = vLinearDepth + dx2.x;
														
 
															+			weightB = weightB + dx2.y;
														
 
															+			weightC = weightC + dx2.z;
														
 
															+			// Iterate buffer pointers
														
 
															+			pixelDataUpper += 2; pixelDataLower += 2;
														
 
															+			depthDataUpper += 2; depthDataLower += 2;
														
 
															+		}
														
 
															+	} else {
														
 
															+		FVector3D dx2 = pWeightDx * 2.0f;
														
 
															+		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);
														
 
															+		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);
														
 
															+		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);
														
 
															+		for (int x = startX; x < endX; x += 2) {
														
 
															+			// Get the reciprocal depth
														
 
															+			FVector4D depth = vRecDepth.get();
														
 
															+			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...
														
 
															+			// Divide 1 by 1 / W to get the linear depth W
														
 
															+			F32x4 vLinearDepth = vRecDepth.reciprocal();
														
 
															+			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.
														
 
															+			F32x4 weightB = vRecU * vLinearDepth;
														
 
															+			F32x4 weightC = vRecV * vLinearDepth;
														
 
															+			// Calculate the weight of the first vertex from the other two
														
 
															+			F32x4 weightA = 1.0f - (weightB + weightC);
														
 
															+			F32x4x3 weights(weightA, weightB, weightC);
														
 
															+			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(data, pixelShaderFunction, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);
														
 
															+			// Iterate projection
														
 
															+			vRecDepth = vRecDepth + dx2.x;
														
 
															+			vRecU = vRecU + dx2.y;
														
 
															+			vRecV = vRecV + dx2.z;
														
 
															+			// Iterate buffer pointers
														
 
															+			pixelDataUpper += 2; pixelDataLower += 2;
														
 
															+			depthDataUpper += 2; depthDataLower += 2;
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
														
 
															+inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {
														
 
															+	// Prepare constants
														
 
															+	const int targetStride = imageInternal::getStride(colorBuffer);
														
 
															+	const int depthBufferStride = imageInternal::getStride(depthBuffer);
														
 
															+	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;
														
 
															+	const int colorRowSize = imageInternal::getRowSize(colorBuffer);
														
 
															+	const int depthRowSize = imageInternal::getRowSize(depthBuffer);
														
 
															+	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);
														
 
															+	const int colorHeight = imageInternal::getHeight(colorBuffer);
														
 
															+	const int depthHeight = imageInternal::getHeight(depthBuffer);
														
 
															+	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;
														
 
															+
														
 
															+	// Initialize row pointers for color buffer
														
 
															+	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;
														
 
															+	if (COLOR_WRITE) {
														
 
															+		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);
														
 
															+		pixelDataUpperRow = targetData;
														
 
															+		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);
														
 
															+		pixelDataLowerRow = targetData;
														
 
															+		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);
														
 
															+	} else {
														
 
															+		pixelDataUpperRow = SafePointer<uint32_t>();
														
 
															+		pixelDataLowerRow = SafePointer<uint32_t>();
														
 
															+	}
														
 
															+
														
 
															+	// Initialize row pointers for depth buffer
														
 
															+	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;
														
 
															+	if (DEPTH_READ || DEPTH_WRITE) {
														
 
															+		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);
														
 
															+		depthDataUpperRow = depthBufferData;
														
 
															+		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);
														
 
															+		depthDataLowerRow = depthBufferData;
														
 
															+		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);
														
 
															+	} else {
														
 
															+		depthDataUpperRow = SafePointer<float>();
														
 
															+		depthDataLowerRow = SafePointer<float>();
														
 
															+	}
														
 
															+	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {
														
 
															+		int y2 = y1 + 1;
														
 
															+		RowInterval upperRow = shape.rows[y1 - shape.startRow];
														
 
															+		RowInterval lowerRow = shape.rows[y2 - shape.startRow];
														
 
															+		int outerStart = min(upperRow.left, lowerRow.left);
														
 
															+		int outerEnd = max(upperRow.right, lowerRow.right);
														
 
															+		int innerStart = max(upperRow.left, lowerRow.left);
														
 
															+		int innerEnd = min(upperRow.right, lowerRow.right);
														
 
															+		// Round exclusive intervals to multiples of two pixels
														
 
															+		int outerBlockStart = roundDownEven(outerStart);
														
 
															+		int outerBlockEnd = roundUpEven(outerEnd);
														
 
															+		int innerBlockStart = roundUpEven(innerStart);
														
 
															+		int innerBlockEnd = roundDownEven(innerEnd);
														
 
															+		// Clip last row if outside on odd height
														
 
															+		if (y2 >= maxHeight) {
														
 
															+			lowerRow.right = lowerRow.left;
														
 
															+		}
														
 
															+		// Avoid reading outside of the given bound
														
 
															+		bool hasTop = upperRow.right > upperRow.left;
														
 
															+		bool hasBottom = lowerRow.right > lowerRow.left;
														
 
															+		if (hasTop || hasBottom) {
														
 
															+			// Initialize pointers
														
 
															+			if (COLOR_WRITE) {
														
 
															+				if (hasTop) {
														
 
															+					pixelDataUpper = pixelDataUpperRow.slice("pixelDataUpper", 0, colorRowSize);
														
 
															+				} else {
														
 
															+					// Repeat the lower row to avoid reading outside
														
 
															+					pixelDataUpper = pixelDataLowerRow.slice("pixelDataUpper (from lower)", 0, colorRowSize);
														
 
															+				}
														
 
															+				if (hasBottom) {
														
 
															+					pixelDataLower = pixelDataLowerRow.slice("pixelDataLower", 0, colorRowSize);
														
 
															+				} else {
														
 
															+					// Repeat the upper row to avoid reading outside
														
 
															+					pixelDataLower = pixelDataUpperRow.slice("pixelDataLower (from upper)", 0, colorRowSize);
														
 
															+				}
														
 
															+				int startColorOffset = outerBlockStart * sizeof(uint32_t);
														
 
															+				pixelDataUpper.increaseBytes(startColorOffset);
														
 
															+				pixelDataLower.increaseBytes(startColorOffset);
														
 
															+			}
														
 
															+			if (DEPTH_READ || DEPTH_WRITE) {
														
 
															+				if (hasTop) {
														
 
															+					depthDataUpper = depthDataUpperRow.slice("depthDataUpper", 0, depthRowSize);
														
 
															+				} else {
														
 
															+					// Repeat the upper row to avoid reading outside
														
 
															+					depthDataUpper = depthDataLowerRow.slice("depthDataUpper (from lower)", 0, depthRowSize);
														
 
															+				}
														
 
															+				if (hasBottom) {
														
 
															+					depthDataLower = depthDataLowerRow.slice("depthDataLower", 0, depthRowSize);
														
 
															+				} else {
														
 
															+					// Repeat the upper row to avoid reading outside
														
 
															+					depthDataLower = depthDataUpperRow.slice("depthDataLower (from upper)", 0, depthRowSize);
														
 
															+				}
														
 
															+				depthDataUpper += outerBlockStart;
														
 
															+				depthDataLower += outerBlockStart;
														
 
															+			} else {
														
 
															+				depthDataUpper = SafePointer<float>();
														
 
															+				depthDataLower = SafePointer<float>();
														
 
															+			}
														
 
															+			// Initialize projection
														
 
															+			FVector3D pWeightUpperRow;
														
 
															+			if (AFFINE) {
														
 
															+				pWeightUpperRow = projection.getWeight_affine(IVector2D(outerBlockStart, y1));
														
 
															+			} else {
														
 
															+				pWeightUpperRow = projection.getDepthDividedWeight_perspective(IVector2D(outerBlockStart, y1));
														
 
															+			}
														
 
															+			FVector3D pWeightUpper = pWeightUpperRow;
														
 
															+			FVector3D pWeightLowerRow = pWeightUpperRow + projection.pWeightDy;
														
 
															+			FVector3D pWeightLower = pWeightLowerRow;
														
 
															+			// Render the pixels
														
 
															+			if (innerBlockEnd <= innerBlockStart) {
														
 
															+				// Clipped from left and right
														
 
															+				for (int32_t x = outerBlockStart; x < outerBlockEnd; x += 2) {
														
 
															+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
														
 
															+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
														
 
															+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
														
 
															+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
														
 
															+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
														
 
															+				}
														
 
															+			} else {
														
 
															+				// Left edge
														
 
															+				for (int32_t x = outerBlockStart; x < innerBlockStart; x += 2) {
														
 
															+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
														
 
															+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
														
 
															+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
														
 
															+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
														
 
															+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
														
 
															+				}
														
 
															+				// Full quads
														
 
															+				int width = innerBlockEnd - innerBlockStart;
														
 
															+				int quadCount = width / 2;
														
 
															+				fillRowSuper<false, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
														
 
															+				  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, innerBlockStart, innerBlockEnd, RowInterval(), RowInterval(), targetPackingOrder);
														
 
															+				if (COLOR_WRITE) { pixelDataUpper += 2 * quadCount; pixelDataLower += 2 * quadCount; }
														
 
															+				if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2 * quadCount; depthDataLower += 2 * quadCount; }
														
 
															+				pWeightUpper = pWeightUpper + (doublePWeightDx * quadCount); pWeightLower = pWeightLower + (doublePWeightDx * quadCount);
														
 
															+				// Right edge
														
 
															+				for (int32_t x = innerBlockEnd; x < outerBlockEnd; x += 2) {
														
 
															+					fillRowSuper<true, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>
														
 
															+					  (data, pixelShaderFunction, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, pWeightUpper, pWeightLower, projection.pWeightDx, x, x + 2, upperRow, lowerRow, targetPackingOrder);
														
 
															+					if (COLOR_WRITE) { pixelDataUpper += 2; pixelDataLower += 2; }
														
 
															+					if (DEPTH_READ || DEPTH_WRITE) { depthDataUpper += 2; depthDataLower += 2; }
														
 
															+					pWeightUpper = pWeightUpper + doublePWeightDx; pWeightLower = pWeightLower + doublePWeightDx;
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+		// Iterate to the next row
														
 
															+		if (COLOR_WRITE) {
														
 
															+			pixelDataUpperRow.increaseBytes(targetStride * 2);
														
 
															+			pixelDataLowerRow.increaseBytes(targetStride * 2);
														
 
															+		}
														
 
															+		if (DEPTH_READ || DEPTH_WRITE) {
														
 
															+			depthDataUpperRow.increaseBytes(depthBufferStride * 2);
														
 
															+			depthDataLowerRow.increaseBytes(depthBufferStride * 2);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
														
 
															+	bool hasColorBuffer = colorBuffer != nullptr;
														
 
															+	bool hasDepthBuffer = depthBuffer != nullptr;
														
 
															+	if (projection.affine) {
														
 
															+		if (hasDepthBuffer) {
														
 
															+			if (hasColorBuffer) {
														
 
															+				if (filter != Filter::Solid) {
														
 
															+					// Alpha filtering with read only depth buffer
														
 
															+					fillShapeSuper<true, true, false, Filter::Alpha, true>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
														
 
															+				} else {
														
 
															+					// Solid with depth buffer
														
 
															+					fillShapeSuper<true, true, true, Filter::Solid, true>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
														
 
															+				}
														
 
															+			} else {
														
 
															+				// Solid depth
														
 
															+				// TODO: Use for orthogonal depth based shadows
														
 
															+				fillShapeSuper<false, true, true, Filter::Solid, true>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
														
 
															+			}
														
 
															+		} else {
														
 
															+			if (hasColorBuffer) {
														
 
															+				if (filter != Filter::Solid) {
														
 
															+					// Alpha filtering without depth buffer
														
 
															+					fillShapeSuper<true, false, false, Filter::Alpha, true>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
														
 
															+				} else {
														
 
															+					// Solid without depth buffer
														
 
															+					fillShapeSuper<true, false, false, Filter::Solid, true>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+	} else {
														
 
															+		if (hasDepthBuffer) {
														
 
															+			if (hasColorBuffer) {
														
 
															+				if (filter != Filter::Solid) {
														
 
															+					// Alpha filtering with read only depth buffer
														
 
															+					fillShapeSuper<true, true, false, Filter::Alpha, false>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
														
 
															+				} else {
														
 
															+					// Solid with depth buffer
														
 
															+					fillShapeSuper<true, true, true, Filter::Solid, false>(data, pixelShaderFunction, colorBuffer, depthBuffer, triangle, projection, shape);
														
 
															+				}
														
 
															+			} else {
														
 
															+				// Solid depth
														
 
															+				// TODO: Use for depth based shadows with perspective projection
														
 
															+				fillShapeSuper<false, true, true, Filter::Solid, false>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
														
 
															+			}
														
 
															+		} else {
														
 
															+			if (hasColorBuffer) {
														
 
															+				if (filter != Filter::Solid) {
														
 
															+					// Alpha filtering without depth buffer
														
 
															+					fillShapeSuper<true, false, false, Filter::Alpha, false>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
														
 
															+				} else {
														
 
															+					// Solid without depth buffer
														
 
															+					fillShapeSuper<true, false, false, Filter::Solid, false>(data, pixelShaderFunction, colorBuffer, nullptr, triangle, projection, shape);
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+}
														
 
															+
														
 
															+#endif