7 months ago · d5aaf36e57
--- a/Source/DFPSR/api/textureAPI.h
+++ b/Source/DFPSR/api/textureAPI.h
@@ -219,8 +219,8 @@ namespace dsr {
 
				 	// TODO: Can EXISTS be an argument to disable when non-existing images should be replaced with U(255u) for fast prototyping?

			
 
				 	// Sample the nearest pixel in a normalized UV scale where one unit equals one lap around the image.

			
 
				 	// Pre-condition:

			
 
				-	//   0.0f <= u, 0.0f <= v

			
 
				-	//   Negative texture coordinates are not allowed, because they are converted to unsigned integers for bitwise operations.

			
 
				+	//   -256.0f <= u, -256.0f <= v

			
 
				+	//   Negative texture coordinates may not go below -256, or else they will be stretched out on ARM NEON.

			
 
				 	template<

			
 
				 	  bool SQUARE = false,

			
 
				 	  bool SINGLE_LAYER = false,

			
@@ -236,8 +236,10 @@ namespace dsr {
 
				 			scaleU = scaleU >> mipLevel;

			
 
				 			scaleV = scaleV >> mipLevel;

			
 
				 		}

			
 
				-		auto xPixel = truncateToU32(u * floatFromU32(scaleU));

			
 
				-		auto yPixel = truncateToU32(v * floatFromU32(scaleV));

			
 
				+		// A constant offset applied to texture coordinates to allow using negative coordinates.

			
 
				+		static const float wrapOffset = 256.0f;

			
 
				+		auto xPixel = truncateToU32((u + wrapOffset) * floatFromU32(scaleU));

			
 
				+		auto yPixel = truncateToU32((v + wrapOffset) * floatFromU32(scaleV));

			
 
				 		return texture_readPixel<SQUARE, SINGLE_LAYER, false, MIP_INSIDE, HIGHEST_RESOLUTION>(texture, xPixel, yPixel, mipLevel);

			
 
				 	}

			
 
				 

			
@@ -323,6 +325,9 @@ namespace dsr {
 
				 		return weightColors(weightColors(colorA, weightXL, colorB, weightXR), weightYT, weightColors(colorC, weightXL, colorD, weightXR), weightYB);

			
 
				 	}

			
 
				 

			
 
				+	// Pre-condition:

			
 
				+	//   -256.0f <= u, -256.0f <= v

			
 
				+	//   Negative texture coordinates may not go below -256, or else they will be stretched out on ARM NEON.

			
 
				 	template<

			
 
				 	  bool SQUARE = false,

			
 
				 	  bool SINGLE_LAYER = false,

			
@@ -341,10 +346,12 @@ namespace dsr {
 
				 			scaleU = scaleU >> mipLevel;

			
 
				 			scaleV = scaleV >> mipLevel;

			
 
				 		}

			
 
				+		// A constant offset applied to texture coordinates to allow using negative coordinates.

			
 
				+		static const float wrapOffset = 256.0f;

			
 
				 		// Convert from the normalized 0..1 scale to a 0..size*256 scale for 8 bits of sub-pixel precision.

			
 
				 		//   Half a pixel is subtracted so that the seam between bi-linear patches end up at the center of texels.

			
 
				-		auto subCenterX = truncateToU32(u * floatFromU32(scaleU)) - 128u;

			
 
				-		auto subCenterY = truncateToU32(v * floatFromU32(scaleV)) - 128u;

			
 
				+		auto subCenterX = truncateToU32((u + wrapOffset) * floatFromU32(scaleU)) - 128u;

			
 
				+		auto subCenterY = truncateToU32((v + wrapOffset) * floatFromU32(scaleV)) - 128u;

			
 
				 		// Get the remainders as interpolation weights.

			
 
				 		auto weightX = subCenterX & 0xFF;

			
 
				 		auto weightY = subCenterY & 0xFF;

			
--- a/Source/test/tests/SimdTest.cpp
+++ b/Source/test/tests/SimdTest.cpp
@@ -3,9 +3,7 @@
 
				 #include "../../DFPSR/base/simd.h"

			
 
				 #include "../../DFPSR/base/endian.h"

			
 
				 

			
 
				-// TODO: Test: allLanesNotEqual, allLanesLesser, allLanesGreater, allLanesLesserOrEqual, allLanesGreaterOrEqual, operand ~, smaller bit shifts.

			
 
				-// TODO: Test that truncateToU32 saturates to minimum and maximum values.

			
 
				-// TODO: Test that truncateToI32 saturates to minimum and maximum values.

			
 
				+// TODO: Test: allLanesNotEqual, allLanesLesser, allLanesGreater, allLanesLesserOrEqual, allLanesGreaterOrEqual, operand ~.

			
 
				 // TODO: Set up a test where SIMD is disabled to force using the reference implementation.

			
 
				 // TODO: Keep the reference implementation alongside the SIMD types during brute-force testing with millions of random inputs.

			
 
				 

			
@@ -902,6 +900,16 @@ START_TEST(Simd)
 
				 	ASSERT_EQUAL_SIMD(clampUpper(F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-35.1f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.9f));

			
 
				 	ASSERT_EQUAL_SIMD(clampLower(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f)), F32x8(-1.5f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.5f));

			
 
				 

			
 
				+	// Float to integer conversions

			
 
				+	// Underflow and overflow is undefined behavior, because NEON will clamp out of bound values while SSE will truncate away higher bits.

			
 
				+	ASSERT_EQUAL_SIMD(truncateToU32(F32x4(0.01f, 0.99f, 1.01f, 1.99f)),U32x4(0, 0, 1, 1));

			
 
				+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(0.01f, 0.99f, 1.01f, 1.99f)),I32x4(0, 0, 1, 1));

			
 
				+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(-0.01f, -0.99f, -1.01f, -1.99f)),I32x4(0, 0, -1, -1));

			
 
				+	ASSERT_EQUAL_SIMD(truncateToU32(F32x4(0.1f, 5.4f, 2.6f, 4.9f)),U32x4(0, 5, 2, 4));

			
 
				+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(0.1f, 5.4f, 2.6f, 4.9f)),I32x4(0, 5, 2, 4));

			
 
				+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(-1.1f, -0.9f, -0.1f, 0.1f)),I32x4(-1, 0, 0, 0));

			
 
				+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(-1000.9f, -23.4f, 123456.7f, 846.999f)),I32x4(-1000, -23, 123456, 846));

			
 
				+

			
 
				 	// F32x4 operations

			
 
				 	ASSERT_EQUAL_SIMD(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));

			
 
				 	ASSERT_EQUAL_SIMD(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));

			
--- a/Source/test/tests/TextureTest.cpp
+++ b/Source/test/tests/TextureTest.cpp
@@ -455,6 +455,38 @@ START_TEST(Texture)
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.75f, 0.25f, 1u), 1101u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.25f, 0.75f, 1u), 1011u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.75f, 0.75f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.01f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.49f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.01f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.49f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.01f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.49f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.01f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.49f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.51f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.99f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.51f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.99f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.51f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.99f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.51f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.99f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.01f - 256.0f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.49f - 256.0f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.01f - 256.0f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.49f - 256.0f, 1u), 1001u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.01f - 256.0f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.49f - 256.0f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.01f - 256.0f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.49f - 256.0f, 1u), 1101u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.51f - 256.0f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.99f - 256.0f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.51f - 256.0f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.99f - 256.0f, 1u), 1011u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.51f - 256.0f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.99f - 256.0f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.51f - 256.0f, 1u), 1111u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.99f - 256.0f, 1u), 1111u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.5f / 4.0f, 0.5f / 4.0f, 0u), 1002u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, 1.5f / 4.0f, 0.5f / 4.0f, 0u), 1102u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, 2.5f / 4.0f, 0.5f / 4.0f, 0u), 1202u);

			
@@ -474,6 +506,9 @@ START_TEST(Texture)
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, -53.0f, -17.0f,  2u), 1000u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, -53.0f, -17.0f,  3u), 1000u);

			
 
				 		ASSERT_EQUAL(texture_sample_nearest(texture, -53.0f, -17.0f, 15u), 1000u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, -255.7f, -255.7f, 0u), 1112u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, -100.7f, -64.7f, 0u), 1112u);

			
 
				+		ASSERT_EQUAL(texture_sample_nearest(texture, -84.7f, 0.3f, 0u), 1112u);

			
 
				 		// TODO: Test the optimization template flags.

			
 
				 	}

			
 
				 		// TODO: Test reading pixels from SafePointer with and without a specified row index.