2 years ago · c6dd251bff
--- a/Source/DFPSR/image/ImageRgbaU8.cpp
+++ b/Source/DFPSR/image/ImageRgbaU8.cpp
@@ -243,9 +243,7 @@ TextureRgbaLayer::TextureRgbaLayer(SafePointer<uint32_t> data, int32_t width, in
 
				   width(width),

			
 
				   height(height),

			
 
				   subWidth(width * 256),

			
 
				-  subHeight(height * 256),

			
 
				-  halfPixelOffsetU(1.0f - (0.5f / width)),

			
 
				-  halfPixelOffsetV(1.0f - (0.5f / height)) {}

			
 
				+  subHeight(height * 256) {}

			
 
				 

			
 
				 void ImageRgbaU8Impl::generatePyramidStructure(int32_t layerCount) {

			
 
				 	int32_t currentWidth = this->width;

			
--- a/Source/DFPSR/image/ImageRgbaU8.h
+++ b/Source/DFPSR/image/ImageRgbaU8.h
@@ -67,12 +67,11 @@ namespace dsr {
 
				 

			
 
				 // Pointing to the parent image using raw pointers for fast rendering. May not exceed the lifetime of the parent image!

			
 
				 struct TextureRgbaLayer {

			
 
				-	SafePointer<uint32_t> data;

			
 
				-	int32_t strideShift = 0;

			
 
				-	uint32_t widthMask = 0, heightMask = 0;

			
 
				-	int32_t width = 0, height = 0;

			
 
				-	float subWidth = 0.0f, subHeight = 0.0f; // TODO: Better names?

			
 
				-	float halfPixelOffsetU = 0.0f, halfPixelOffsetV = 0.0f;

			
 
				+	SafePointer<uint32_t> data;                                         // Replace with the start offset

			
 
				+	int32_t strideShift = 0;                                            // Subtract one per layer 

			
 
				+	uint32_t widthMask = 0, heightMask = 0;                             // Shift one bit right per layer

			
 
				+	int32_t width = 0, height = 0;                                      // Shift one bit right per layer

			
 
				+	float subWidth = 0.0f, subHeight = 0.0f;                            // Try to use integers, so that these can be shifted

			
 
				 	TextureRgbaLayer();

			
 
				 	TextureRgbaLayer(SafePointer<uint32_t> data, int32_t width, int32_t height);

			
 
				 	// Can it be sampled as a texture

			
@@ -84,6 +83,7 @@ struct TextureRgbaLayer {
 
				 

			
 
				 // Pointing to the parent image using raw pointers for fast rendering. Do not separate from the image!

			
 
				 struct TextureRgba {

			
 
				+	// TODO: Remove the array, so that any number of layers can be contained by calculating the masks and offsets.

			
 
				 	TextureRgbaLayer mips[MIP_BIN_COUNT]; // Pointing to all mip levels including the original image

			
 
				 	int32_t layerCount = 0; // 0 Means that there are no pointers, 1 means that you have a pyramid but only one layer.

			
 
				 	// Can it be sampled as a texture

			
--- a/Source/DFPSR/render/shader/RgbaMultiply.h
+++ b/Source/DFPSR/render/shader/RgbaMultiply.h
@@ -57,7 +57,7 @@ private:
 
				 	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),

			
 
				 	  diffuseLayer(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture.mips[0]) : nullptr),

			
 
				 	  lightLayer(triangleInput.lightImage ? &(triangleInput.lightImage->texture.mips[0]) : nullptr),

			
 
				-	  texCoords(triangleInput.texCoords.getPositive()), colors(triangleInput.colors.getScaled(getVertexScale())) {

			
 
				+	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {

			
 
				 		// Texture coordinates must be on the positive side to allow using truncation as a floor function

			
 
				 		if (HAS_DIFFUSE_MAP) {

			
 
				 			// Incorrect tests?

			
--- a/Source/DFPSR/render/shader/Shader.h
+++ b/Source/DFPSR/render/shader/Shader.h
@@ -34,13 +34,6 @@
 
				 

			
 
				 namespace dsr {

			
 
				 

			
 
				-inline float getMinimum(const FVector3D &coordinates) {

			
 
				-	float result = coordinates.x;

			
 
				-	if (coordinates.y < result) { result = coordinates.y; }

			
 
				-	if (coordinates.z < result) { result = coordinates.z; }

			
 
				-	return result;

			
 
				-}

			
 
				-

			
 
				 struct TriangleTexCoords {

			
 
				 	FVector3D u1, v1, u2, v2;

			
 
				 	TriangleTexCoords() {}

			
@@ -48,14 +41,6 @@ struct TriangleTexCoords {
 
				 	  u1(u1), v1(v1), u2(u2), v2(v2) {}

			
 
				 	TriangleTexCoords(const FVector4D &a, const FVector4D &b, const FVector4D &c) :

			
 
				 	  u1(FVector3D(a.x, b.x, c.x)), v1(FVector3D(a.y, b.y, c.y)), u2(FVector3D(a.z, b.z, c.z)), v2(FVector3D(a.w, b.w, c.w)) {}

			
 
				-	TriangleTexCoords getPositive() const {

			
 
				-		return TriangleTexCoords(

			
 
				-		  this->u1 + FVector3D(1 - (int)getMinimum(this->u1)),

			
 
				-		  this->v1 + FVector3D(1 - (int)getMinimum(this->v1)),

			
 
				-		  this->u2 + FVector3D(1 - (int)getMinimum(this->u2)),

			
 
				-		  this->v2 + FVector3D(1 - (int)getMinimum(this->v2))

			
 
				-		);

			
 
				-	}

			
 
				 };

			
 
				 

			
 
				 struct TriangleColors {

			
--- a/Source/DFPSR/render/shader/shaderMethods.h
+++ b/Source/DFPSR/render/shader/shaderMethods.h
@@ -1,6 +1,6 @@
 
				 // zlib open source license

			
 
				 //

			
 
				-// Copyright (c) 2017 to 2019 David Forsgren Piuva

			
 
				+// Copyright (c) 2017 to 2023 David Forsgren Piuva

			
 
				 // 

			
 
				 // This software is provided 'as-is', without any express or implied

			
 
				 // warranty. In no event will the authors be held liable for any damages

			
@@ -129,16 +129,12 @@ namespace shaderMethods {
 
				 	}

			
 
				 

			
 
				 	// Single layer sampling method

			
 
				-	// Preconditions:

			
 
				-	//   u >= -halfPixelOffsetU

			
 
				-	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION>

			
 
				 	inline U32x4 sample_U32(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		if (INTERPOLATION == Interpolation::BL) {

			
 
				-			F32x4 uLow(u + source->halfPixelOffsetU);

			
 
				-			F32x4 vLow(v + source->halfPixelOffsetV);

			
 
				-			U32x4 subPixLowX(truncateToU32(uLow * source->subWidth)); // SubPixelLowX = ULow * (Width * 256)

			
 
				-			U32x4 subPixLowY(truncateToU32(vLow * source->subHeight)); // SubPixelLowY = VLow * (Height * 256)

			
 
				+			U32x4 subPixelOffset = U32x4(1073741952); // 2 to the power of 30 + 128, adjusting to a safe part of the unsigned integer and adding half a pixel for the bi-linear interpolation.

			
 
				+			U32x4 subPixLowX(truncateToU32(u * source->subWidth) + subPixelOffset); // SubPixelLowX = u * (Width * 256) + 128

			
 
				+			U32x4 subPixLowY(truncateToU32(v * source->subHeight) + subPixelOffset); // SubPixelLowY = v * (Height * 256) + 128

			
 
				 			U32x4 weightX = subPixLowX & 255; // WeightX = SubPixelLowX % 256

			
 
				 			U32x4 weightY = subPixLowY & 255; // WeightY = SubPixelLowY % 256

			
 
				 			U32x4 pixLowX(subPixLowX >> 8); // PixelLowX = SubPixelLowX / 256

			
@@ -157,25 +153,24 @@ namespace shaderMethods {
 
				 			// Take a weighted average

			
 
				 			return shaderMethods::mix_BL(colorA, colorB, colorC, colorD, weightX, weightY);

			
 
				 		} else { // Interpolation::NN or unhandled

			
 
				-			U32x4 pixX(truncateToU32(u * source->width)); // PixelX = U * Width

			
 
				-			U32x4 pixY(truncateToU32(v * source->height)); // PixelY = V * Height

			
 
				+			// TODO: Test nearest neighbor sampling.

			
 
				+			F32x4 subPixelOffset = F32x4(1073741824.0f);

			
 
				+			// TODO: Use multiply and add instructions.

			
 
				+			U32x4 pixX(truncateToU32(u * source->width + subPixelOffset));  // PixelX = U * Width

			
 
				+			U32x4 pixY(truncateToU32(v * source->height + subPixelOffset)); // PixelY = V * Height

			
 
				 			U32x4 col(pixX & source->widthMask); // Column = PixelX % Width

			
 
				 			U32x4 row(pixY & source->heightMask); // Row = PixelY % Height

			
 
				 			return sample_U32(source, col, row);

			
 
				 		}

			
 
				 	}

			
 
				 

			
 
				-	// Preconditions:

			
 
				-	//   u >= -halfPixelOffsetU

			
 
				-	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION, bool HIGH_QUALITY>

			
 
				 	inline Rgba_F32 sample_F32(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		if (INTERPOLATION == Interpolation::BL) {

			
 
				 			if (HIGH_QUALITY) { // High quality interpolation

			
 
				-				F32x4 uLow(u + source->halfPixelOffsetU);

			
 
				-				F32x4 vLow(v + source->halfPixelOffsetV);

			
 
				-				F32x4 pixX = uLow * source->width; // PixelX = ULow * Width

			
 
				-				F32x4 pixY = vLow * source->height; // PixelY = VLow * Height

			
 
				+				F32x4 subPixelOffset = F32x4(4194304.5f); // A large power of two and half a pixel's offset for bi-linear interpolation.

			
 
				+				F32x4 pixX = u * source->width + subPixelOffset; // PixelX = ULow * Width

			
 
				+				F32x4 pixY = v * source->height + subPixelOffset; // PixelY = VLow * Height

			
 
				 				// Truncation can be used as floor for positive input

			
 
				 				U32x4 pixLowX(truncateToU32(pixX)); // PixelLowX = floor(PixelX)

			
 
				 				U32x4 pixLowY(truncateToU32(pixY)); // PixelLowY = floor(PixelY)

			
@@ -204,9 +199,6 @@ namespace shaderMethods {
 
				 	}

			
 
				 

			
 
				 	// Multi layer sampling method

			
 
				-	// Preconditions:

			
 
				-	//   u >= -halfPixelOffsetU

			
 
				-	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION>

			
 
				 	inline U32x4 sample_U32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		int mipLevel = getMipLevel(source, u, v);