Browse Source

Letting pyramid images store start offsets instead of pointers.

David Piuva 2 years ago
parent
commit
66502cb2ac

+ 10 - 7
Source/DFPSR/image/ImageRgbaU8.cpp

@@ -228,16 +228,16 @@ static void updatePyramid(TextureRgba &texture, int32_t layerCount) {
 		int32_t sourceIndex = targetIndex - 1;
 		int32_t targetWidth = texture.mips[targetIndex].width;
 		int32_t targetHeight = texture.mips[targetIndex].height;
-		downScaleByTwo(texture.mips[targetIndex].data, texture.mips[sourceIndex].data, targetWidth, targetHeight, targetWidth * pixelSize);
+		downScaleByTwo(texture.data + texture.mips[targetIndex].startOffset, texture.data + texture.mips[sourceIndex].startOffset, targetWidth, targetHeight, targetWidth * pixelSize);
 	}
 	texture.layerCount = layerCount;
 }
 
 TextureRgbaLayer::TextureRgbaLayer() {}
 
-TextureRgbaLayer::TextureRgbaLayer(SafePointer<uint32_t> data, int32_t width, int32_t height) :
-  data(data),
-  strideShift(getSizeGroup(width) + 2),
+TextureRgbaLayer::TextureRgbaLayer(uint32_t startOffset, int32_t width, int32_t height) :
+  startOffset(startOffset),
+  widthShift(getSizeGroup(width)),
   widthMask(width - 1),
   heightMask(height - 1),
   width(width),
@@ -249,7 +249,7 @@ void ImageRgbaU8Impl::generatePyramidStructure(int32_t layerCount) {
 	int32_t currentWidth = this->width;
 	int32_t currentHeight = this->height;
 	// Allocate smaller pyramid images within the buffer
-	SafePointer<uint32_t> currentStart = buffer_getSafeData<uint32_t>(this->buffer, "Pyramid generation target");
+	uint32_t currentStart = 0;
 	for (int32_t m = 0; m < layerCount; m++) {
 		this->texture.mips[m] = TextureRgbaLayer(currentStart, currentWidth, currentHeight);
 		currentStart += currentWidth * currentHeight;
@@ -262,14 +262,17 @@ void ImageRgbaU8Impl::generatePyramidStructure(int32_t layerCount) {
 		this->texture.mips[m] = this->texture.mips[m - 1];
 	}
 	this->texture.layerCount = layerCount;
+	this->texture.data = imageInternal::getSafeData<uint32_t>(*this);
 }
 
 void ImageRgbaU8Impl::removePyramidStructure() {
+	// The mip layers have offsets relative to the texture's data pointer, which is already compensating for any offset from any parent image.
 	for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {
-		this->texture.mips[m] = TextureRgbaLayer(imageInternal::getSafeData<uint32_t>(*this), this->width, this->height);
+		this->texture.mips[m] = TextureRgbaLayer(0, this->width, this->height);
 	}
 	// Declare the old pyramid invalid so that it will not be displayed while rendering, but keep the extra memory for next time it is generated.
 	this->texture.layerCount = 1;
+	this->texture.data = imageInternal::getSafeData<uint32_t>(*this);
 }
 
 void ImageRgbaU8Impl::makeIntoTexture() {
@@ -309,7 +312,7 @@ void ImageRgbaU8Impl::generatePyramid() {
 		int32_t currentHeight = this->height;
 		this->generatePyramidStructure(layerCount);
 		// Copy the image's old content while assuming that there is no padding.
-		safeMemoryCopy(this->texture.mips[0].data, oldData, this->width * this->height * pixelSize);
+		safeMemoryCopy(this->texture.data + this->texture.mips[0].startOffset, oldData, this->width * this->height * pixelSize);
 		// Generate smaller images.
 		updatePyramid(this->texture, layerCount);
 		// Once an image had a pyramid generated, the new buffer will remain for as long as the image exists.

+ 16 - 19
Source/DFPSR/image/ImageRgbaU8.h

@@ -30,17 +30,10 @@
 
 namespace dsr {
 
-// TODO: Figure out how to handle very small textures where the full resolution is smaller than the smallest allowed MIP level.
-//       Larger SIMD vectors will change the smallest allowed textures if the bit pattern is used, which may be inconsistent if it keeps growing.
-//       Having 32x32 pixels as the minimum size would allow using up to 1024-bit SIMD without a problem.
-// TODO: Reallocate the image's buffer, so that the pyramid images are placed into the same allocation.
-//       This allow reading a texture with multiple mip levels using different 32-bit offsets in the same SIMD vector holding multiple groups of 2x2 pixels.
-// TODO: Adapt how far down to go in mip resolutions based on DSR_DEFAULT_ALIGNMENT, so that no mip level is padded in memory.
-//       This is needed so that the stride can be calculated using bit shifting from the mip level.
-//       The visual appearance may differ between SIMD lengths for low resolution textures, but not between computers running the same executable.
-// TODO: Begin by replacing the lookup table for pyramid layers with template inline functions, because figuring out how to get start offset and stride consistently may take time.
-//       Pixel loops will later look up the masks once and store it in SIMD vectors to avoid fetching it from memory multiple times from potential memory aliasing.
-// IDEA: Keep the same order of mip layers, but mask out offset bits from the right side.
+// TODO: Check that the start offsets in mip layers are based on the image's own start offset.
+// TODO: Replace the lookup table for pyramid layers with template inline functions, so that it can be vectorized per pixel or 2x2 group using bitwise operations.
+// TODO: Calculate start offset dynamically for textures.
+//       Keep the same order of mip layers, but mask out offset bits from the right side.
 //       When the most significant bit is masked out, it jumps to the full resoultion image at offset zero.
 //       Offsets
 //         00000000000000000000000000000000 Full resolution of 64x64
@@ -67,15 +60,17 @@ namespace dsr {
 
 // Pointing to the parent image using raw pointers for fast rendering. May not exceed the lifetime of the parent image!
 struct TextureRgbaLayer {
-	SafePointer<uint32_t> data;                                         // Replace with the start offset
-	int32_t strideShift = 0;                                            // Subtract one per layer 
+	// Offset from the main texture's data pointer in whole texels.
+	uint32_t startOffset = 0;                                           // Generate by and-masking the smallest image's start offset with a double bit shift
+	// How much should we shift one to the left to get the stride in whole texels.
+	int32_t widthShift = 0;                                             // Subtract one per layer 
 	uint32_t widthMask = 0, heightMask = 0;                             // Shift one bit right per layer
+	// TODO: These dimensions are integers added against floats, which is very expensive.
+	//       Try to apply their multiplication against UV coordinates in an integer scale after getting enough bits for both high resolution and many laps around the texture.
 	int32_t width = 0, height = 0;                                      // Shift one bit right per layer
 	float subWidth = 0.0f, subHeight = 0.0f;                            // Try to use integers, so that these can be shifted
 	TextureRgbaLayer();
-	TextureRgbaLayer(SafePointer<uint32_t> data, int32_t width, int32_t height);
-	// Can it be sampled as a texture
-	bool exists() const { return this->data.getUnsafe() != nullptr; }
+	TextureRgbaLayer(uint32_t startOffset, int32_t width, int32_t height);
 };
 
 // TODO: Try to replace with generated bit masks from inline functions.
@@ -83,14 +78,16 @@ struct TextureRgbaLayer {
 
 // Pointing to the parent image using raw pointers for fast rendering. Do not separate from the image!
 struct TextureRgba {
+	SafePointer<uint32_t> data; // Direct access to the shared buffer's content for faster sampling.
 	// TODO: Remove the array, so that any number of layers can be contained by calculating the masks and offsets.
+	// TODO: Store bit masks and offsets needed to quickly generate the memory offsets for a pixel coordinate at a specified mip layer.
 	TextureRgbaLayer mips[MIP_BIN_COUNT]; // Pointing to all mip levels including the original image
 	int32_t layerCount = 0; // 0 Means that there are no pointers, 1 means that you have a pyramid but only one layer.
 	// Can it be sampled as a texture
-	bool exists() const { return this->mips[0].exists(); }
+	bool exists() const { return this->layerCount > 0; }
 	// Does it have a mip pyramid generated for smoother sampling
-	// TODO: Rename once there is no separate MIP buffer, just a single pyramid buffer.
-	bool hasMipBuffer() const { return this->layerCount != 0; }
+	// TODO: Rename.
+	bool hasMipBuffer() const { return this->layerCount > 1; }
 };
 
 class ImageRgbaU8Impl : public ImageImpl {

+ 14 - 30
Source/DFPSR/render/shader/RgbaMultiply.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2023 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -36,9 +36,8 @@ namespace dsr {
 template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>
 class Shader_RgbaMultiply : public Shader {
 private:
-	const TextureRgba *diffuseMap; // The full diffuseMap mipmap pyramid to use without DISABLE_MIPMAP
-	const TextureRgbaLayer *diffuseLayer; // Layer 0 of diffuseMap to use with DISABLE_MIPMAP
-	const TextureRgbaLayer *lightLayer;
+	const TextureRgba *diffuseMap; // Mip-mapping is allowed for diffuse textures.
+	const TextureRgba *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.
 	// Planar format with each vector representing the three triangle corners
 	const TriangleTexCoords texCoords;
 	const TriangleColors colors;
@@ -46,32 +45,25 @@ private:
 	float getVertexScale() {
 		float result = 255.0f; // Scale from normalized to byte for the output
 		if (HAS_DIFFUSE_MAP) {
-			result /= 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color
+			result *= 1.0f / 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color
 		}
 		if (HAS_LIGHT_MAP) {
-			result /= 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color
+			result *= 1.0f / 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color
 		}
 		return result;
 	}
 	explicit Shader_RgbaMultiply(const TriangleInput &triangleInput) :
 	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),
-	  diffuseLayer(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture.mips[0]) : nullptr),
-	  lightLayer(triangleInput.lightImage ? &(triangleInput.lightImage->texture.mips[0]) : nullptr),
+	  lightMap(triangleInput.lightImage ? &(triangleInput.lightImage->texture) : nullptr),
 	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {
 		// Texture coordinates must be on the positive side to allow using truncation as a floor function
 		if (HAS_DIFFUSE_MAP) {
-			// Incorrect tests?
-			if (DISABLE_MIPMAP) {
-				assert(this->diffuseLayer != nullptr); // Cannot sample null
-				assert(this->diffuseLayer->exists()); // Cannot sample regular images
-			} else {
-				assert(this->diffuseMap != nullptr); // Cannot sample null
-				assert(this->diffuseMap->exists()); // Cannot sample regular images
-			}
+			assert(this->diffuseMap != nullptr); // Cannot sample null
+			assert(this->diffuseMap->exists()); // Cannot sample regular images
 		}
 		if (HAS_LIGHT_MAP) {
-			assert(this->lightLayer != nullptr); // Cannot sample null
-			assert(this->lightLayer->exists()); // Cannot sample regular images
+			assert(this->lightMap != nullptr); // Cannot sample null
+			assert(this->lightMap->exists()); // Cannot sample regular images
 		}
 	}
 public:
@@ -86,16 +78,12 @@ public:
 			// Optimized for diffuse only
 			F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));
 			F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));
-			if (DISABLE_MIPMAP) {
-				return shaderMethods::sample_F32<Interpolation::BL, false>(this->diffuseLayer, u1, v1);
-			} else {
-				return shaderMethods::sample_F32<Interpolation::BL, false>(this->diffuseMap, u1, v1);
-			}
+			return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);
 		} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {
 			// Optimized for light only
 			F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));
 			F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));
-			return shaderMethods::sample_F32<Interpolation::BL, false>(this->lightLayer, u2, v2);
+			return shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);
 		} else {
 			// Interpolate the vertex color
 			Rgba_F32 color = HAS_VERTEX_FADING ?
@@ -105,17 +93,13 @@ public:
 			if (HAS_DIFFUSE_MAP) {
 				F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));
 				F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));
-				if (DISABLE_MIPMAP) {
-					color = color * shaderMethods::sample_F32<Interpolation::BL, false>(this->diffuseLayer, u1, v1);
-				} else {
-					color = color * shaderMethods::sample_F32<Interpolation::BL, false>(this->diffuseMap, u1, v1);
-				}
+				color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(this->diffuseMap, u1, v1);
 			}
 			// Sample lightmap
 			if (HAS_LIGHT_MAP) {
 				F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));
 				F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));
-				color = color * shaderMethods::sample_F32<Interpolation::BL, false>(this->lightLayer, u2, v2);
+				color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(this->lightMap, u2, v2);
 			}
 			return color;
 		}

+ 30 - 25
Source/DFPSR/render/shader/shaderMethods.h

@@ -96,9 +96,9 @@ namespace shaderMethods {
 	}
 
 	// Single layer sampling methods
-	inline U32x4 sample_U32(const TextureRgbaLayer *source, const U32x4 &col, const U32x4 &row) {
-		U32x4 pixelOffset((col + (row << (source->strideShift - 2)))); // PixelOffset = Column + Row * PixelStride
-		return gather(source->data, pixelOffset);
+	inline U32x4 sample_U32(SafePointer<uint32_t> data, const TextureRgbaLayer *source, const U32x4 &col, const U32x4 &row) {
+		U32x4 pixelOffset((source->startOffset + col + (row << source->widthShift))); // PixelOffset = Start + Column + Row * Width
+		return gather(data, pixelOffset);
 	}
 
 	// How many mip levels down from here should be sampled for the given texture coordinates
@@ -130,7 +130,7 @@ namespace shaderMethods {
 
 	// Single layer sampling method
 	template<Interpolation INTERPOLATION>
-	inline U32x4 sample_U32(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
+	inline U32x4 sample_U32(SafePointer<uint32_t> data, const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
 		if (INTERPOLATION == Interpolation::BL) {
 			U32x4 subPixelOffset = U32x4(1073741952); // 2 to the power of 30 + 128, adjusting to a safe part of the unsigned integer and adding half a pixel for the bi-linear interpolation.
 			U32x4 subPixLowX(truncateToU32(u * source->subWidth) + subPixelOffset); // SubPixelLowX = u * (Width * 256) + 128
@@ -146,10 +146,10 @@ namespace shaderMethods {
 			U32x4 colHigh(((colLow + 1) & wMask)); // ColumnHigh = (ColumnLow + 1) % Width
 			U32x4 rowHigh(((rowLow + 1) & hMask)); // RowHigh = (RowLow + 1) % Height
 			// Sample colors in the 4 closest pixels
-			U32x4 colorA(sample_U32(source, colLow, rowLow));
-			U32x4 colorB(sample_U32(source, colHigh, rowLow));
-			U32x4 colorC(sample_U32(source, colLow, rowHigh));
-			U32x4 colorD(sample_U32(source, colHigh, rowHigh));
+			U32x4 colorA(sample_U32(data, source, colLow, rowLow));
+			U32x4 colorB(sample_U32(data, source, colHigh, rowLow));
+			U32x4 colorC(sample_U32(data, source, colLow, rowHigh));
+			U32x4 colorD(sample_U32(data, source, colHigh, rowHigh));
 			// Take a weighted average
 			return shaderMethods::mix_BL(colorA, colorB, colorC, colorD, weightX, weightY);
 		} else { // Interpolation::NN or unhandled
@@ -160,12 +160,12 @@ namespace shaderMethods {
 			U32x4 pixY(truncateToU32(v * source->height + subPixelOffset)); // PixelY = V * Height
 			U32x4 col(pixX & source->widthMask); // Column = PixelX % Width
 			U32x4 row(pixY & source->heightMask); // Row = PixelY % Height
-			return sample_U32(source, col, row);
+			return sample_U32(data, source, col, row);
 		}
 	}
 
 	template<Interpolation INTERPOLATION, bool HIGH_QUALITY>
-	inline Rgba_F32 sample_F32(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
+	inline Rgba_F32 sample_F32(SafePointer<uint32_t> data, const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
 		if (INTERPOLATION == Interpolation::BL) {
 			if (HIGH_QUALITY) { // High quality interpolation
 				F32x4 subPixelOffset = F32x4(4194304.5f); // A large power of two and half a pixel's offset for bi-linear interpolation.
@@ -181,37 +181,42 @@ namespace shaderMethods {
 				U32x4 colHigh(((colLow + 1) & wMask)); // ColumnHigh = (ColumnLow + 1) % Width
 				U32x4 rowHigh(((rowLow + 1) & hMask)); // RowHigh = (RowLow + 1) % Height
 				// Sample colors in the 4 closest pixels
-				Rgba_F32 colorA(Rgba_F32(sample_U32(source, colLow, rowLow)));
-				Rgba_F32 colorB(Rgba_F32(sample_U32(source, colHigh, rowLow)));
-				Rgba_F32 colorC(Rgba_F32(sample_U32(source, colLow, rowHigh)));
-				Rgba_F32 colorD(Rgba_F32(sample_U32(source, colHigh, rowHigh)));
+				Rgba_F32 colorA(Rgba_F32(sample_U32(data, source, colLow, rowLow)));
+				Rgba_F32 colorB(Rgba_F32(sample_U32(data, source, colHigh, rowLow)));
+				Rgba_F32 colorC(Rgba_F32(sample_U32(data, source, colLow, rowHigh)));
+				Rgba_F32 colorD(Rgba_F32(sample_U32(data, source, colHigh, rowHigh)));
 				F32x4 weightX = pixX - floatFromU32(pixLowX);
 				F32x4 weightY = pixY - floatFromU32(pixLowY);
 				F32x4 invWeightX = 1.0f - weightX;
 				F32x4 invWeightY = 1.0f - weightY;
 				return (colorA * invWeightX + colorB * weightX) * invWeightY + (colorC * invWeightX + colorD * weightX) * weightY;
 			} else { // Fast interpolation
-				return Rgba_F32(sample_U32<Interpolation::BL>(source, u, v));
+				return Rgba_F32(sample_U32<Interpolation::BL>(data, source, u, v));
 			}
 		} else { // Interpolation::NN or unhandled
-			return Rgba_F32(sample_U32<Interpolation::NN>(source, u, v));
+			return Rgba_F32(sample_U32<Interpolation::NN>(data, source, u, v));
 		}
 	}
 
 	// Multi layer sampling method
-	template<Interpolation INTERPOLATION>
+	template<Interpolation INTERPOLATION, bool DISABLE_MIPMAP>
 	inline U32x4 sample_U32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {
-		int mipLevel = getMipLevel(source, u, v);
-		return sample_U32<INTERPOLATION>(&(source->mips[mipLevel]), u, v);
+		if (DISABLE_MIPMAP) {
+			return sample_U32<INTERPOLATION>(source->data, &(source->mips[0]), u, v);
+		} else {
+			int mipLevel = getMipLevel(source, u, v);
+			return sample_U32<INTERPOLATION>(source->data, &(source->mips[mipLevel]), u, v);
+		}
 	}
 
-	// Preconditions:
-	//   u >= -halfPixelOffsetU
-	//   v >= -halfPixelOffsetV
-	template<Interpolation INTERPOLATION, bool HIGH_QUALITY>
+	template<Interpolation INTERPOLATION, bool DISABLE_MIPMAP, bool HIGH_QUALITY>
 	inline Rgba_F32 sample_F32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {
-		int mipLevel = getMipLevel(source, u, v);
-		return sample_F32<INTERPOLATION, HIGH_QUALITY>(&(source->mips[mipLevel]), u, v);
+		if (DISABLE_MIPMAP) {
+			return sample_F32<INTERPOLATION, HIGH_QUALITY>(source->data, &(source->mips[0]), u, v);
+		} else {
+			int mipLevel = getMipLevel(source, u, v);
+			return sample_F32<INTERPOLATION, HIGH_QUALITY>(source->data, &(source->mips[mipLevel]), u, v);
+		}
 	}
 }