2 years ago · 4b28d8f085
--- a/Source/DFPSR/api/imageAPI.cpp
+++ b/Source/DFPSR/api/imageAPI.cpp
@@ -185,6 +185,11 @@ PackOrderIndex dsr::image_getPackOrderIndex(const ImageRgbaU8& image) {
 
				 }

			
 
				 

			
 
				 // Texture

			
 
				+void dsr::image_makeIntoTexture(ImageRgbaU8& image) {

			
 
				+	if (image) {

			
 
				+		image->makeIntoTexture();

			
 
				+	}

			
 
				+}

			
 
				 void dsr::image_generatePyramid(ImageRgbaU8& image) {

			
 
				 	if (image) {

			
 
				 		image->generatePyramid();

			
--- a/Source/DFPSR/api/imageAPI.h
+++ b/Source/DFPSR/api/imageAPI.h
@@ -80,13 +80,21 @@ namespace dsr {
 
				 	PackOrderIndex image_getPackOrderIndex(const ImageRgbaU8& image);

			
 
				 

			
 
				 // Texture

			
 
				-	// Pre-condition: image must exist and qualify as a texture according to image_isTexture

			
 
				-	// Side-effect: Creates a mip-map pyramid of lower resolution images from the current content

			
 
				-	// If successful, image_hasPyramid should return true from the image

			
 
				+	// Pre-condition: image must exist for something to happen

			
 
				+	// Side-effect: If image is not a valid texture, it will be resized into a suitable power-of-two dimension.

			
 
				+	// Applied automatically when calling image_generatePyramid for the first time on the image.

			
 
				+	// Warning! May invalidate all SafePointers and raw pointers to the image's data.

			
 
				+	void image_makeIntoTexture(ImageRgbaU8& image);

			
 
				+	// Pre-condition: image must exist for something to happen

			
 
				+	// Side-effects:

			
 
				+	//  If the image does not have valid texture dimensions, it will be resized using image_makeIntoTexture before generating the pyramid.

			
 
				+	//  Reallocates the image's buffer and uses the new memory to write smaller versions of the image.

			
 
				+	// Afterwards, image_hasPyramid should return true for the image

			
 
				+	// Warning! May invalidate all SafePointers and raw pointers to the image's data.

			
 
				 	void image_generatePyramid(ImageRgbaU8& image);

			
 
				 	// Pre-condition: image must exist

			
 
				 	// Side-effect: Removes image's mip-map pyramid, including its buffer to save memory

			
 
				-	// If successful, image_hasPyramid should return false from the image

			
 
				+	// Afterwards, image_hasPyramid should return false for the image

			
 
				 	void image_removePyramid(ImageRgbaU8& image);

			
 
				 	// Post-condition: Returns true iff image contains a mip-map pyramid generated by image_generatePyramid

			
 
				 	// Returns false without a warning if the image handle is empty

			
@@ -95,10 +103,10 @@ namespace dsr {
 
				 	//   Returns true iff image fulfills the criterias for being a texture

			
 
				 	//   Returns false without a warning if the image handle is empty

			
 
				 	// Texture criterias:

			
 
				-	//  * Each dimension of width and height should be a power-of-two from 4 to 16384

			
 
				-	//    width = 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 or 16384

			
 
				-	//    height = 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 or 16384

			
 
				-	//    Large enough to allow padding-free SIMD vectorization of 128-bit vectors (4 x 32 = 128)

			
 
				+	//  * Each dimension of width and height should be a power-of-two from 32 to 16384

			
 
				+	//    width = 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 or 16384

			
 
				+	//    height = 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 or 16384

			
 
				+	//    Large enough to be padding-free with 1024-bit memory alignment.

			
 
				 	//    Small enough to allow expressing the total size in bytes using a signed 32-bit integer

			
 
				 	//  * If it's a sub-image, it must also consume the whole with of the original image so that width times pixel size equals the stride

			
 
				 	//    Textures may not contain padding in the rows, but it's okay to use sub-images from a vertical atlas where the whole width is consumed

			
--- a/Source/DFPSR/image/Image.h
+++ b/Source/DFPSR/image/Image.h
@@ -38,9 +38,9 @@ namespace dsr {
 
				 // See imageInternal.h for protected methods

			
 
				 class ImageImpl {

			
 
				 public:

			
 
				-	const int32_t width, height, stride, pixelSize;

			
 
				+	int32_t width, height, stride, pixelSize;

			
 
				 	Buffer buffer; // Content

			
 
				-	const intptr_t startOffset; // Byte offset of the first pixel

			
 
				+	intptr_t startOffset; // Byte offset of the first pixel

			
 
				 	bool isSubImage = false;

			
 
				 private:

			
 
				 	void validate() {

			
--- a/Source/DFPSR/image/ImageRgbaU8.cpp
+++ b/Source/DFPSR/image/ImageRgbaU8.cpp
@@ -1,6 +1,6 @@
 
				 // zlib open source license

			
 
				 //

			
 
				-// Copyright (c) 2017 to 2019 David Forsgren Piuva

			
 
				+// Copyright (c) 2017 to 2023 David Forsgren Piuva

			
 
				 // 

			
 
				 // This software is provided 'as-is', without any express or implied

			
 
				 // warranty. In no event will the authors be held liable for any damages

			
@@ -24,12 +24,15 @@
 
				 #include "ImageRgbaU8.h"

			
 
				 #include "internal/imageInternal.h"

			
 
				 #include "internal/imageTemplate.h"

			
 
				+#include "draw.h"

			
 
				 #include <algorithm>

			
 
				 #include "../base/simd.h"

			
 
				 

			
 
				 using namespace dsr;

			
 
				 

			
 
				-IMAGE_DEFINITION(ImageRgbaU8Impl, 4, Color4xU8, uint8_t);

			
 
				+static const int pixelSize = 4;

			
 
				+

			
 
				+IMAGE_DEFINITION(ImageRgbaU8Impl, pixelSize, Color4xU8, uint8_t);

			
 
				 

			
 
				 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset, const PackOrder &packOrder) :

			
 
				   ImageImpl(newWidth, newHeight, newStride, sizeof(Color4xU8), buffer, startOffset), packOrder(packOrder) {

			
@@ -58,7 +61,7 @@ bool ImageRgbaU8Impl::isTexture(const ImageRgbaU8Impl* image) {
 
				 }

			
 
				 

			
 
				 ImageRgbaU8Impl ImageRgbaU8Impl::getWithoutPadding() const {

			
 
				-	if (this->stride == this->width * this->pixelSize) {

			
 
				+	if (this->stride == this->width * pixelSize) {

			
 
				 		// No padding

			
 
				 		return *this;

			
 
				 	} else {

			
@@ -104,20 +107,22 @@ ImageU8Impl ImageRgbaU8Impl::getChannel(int32_t channelIndex) const {
 
				 	return result;

			
 
				 }

			
 
				 

			
 
				+static const int32_t smallestSizeGroup = 5;

			
 
				+static const int32_t largestSizeGroup = 14;

			
 
				 static int32_t getSizeGroup(int32_t size) {

			
 
				 	int32_t group = -1;

			
 
				 	if (size == 1) {

			
 
				 		group = 0; // Too small for 16-byte alignment!

			
 
				 	} else if (size == 2) {

			
 
				-		group = 1; // Too small for 16-byte alignment!

			
 
				+		group = 1; // Too small for 16-byte alignment! (SSE2)

			
 
				 	} else if (size == 4) {

			
 
				-		group = 2; // Smallest allowed texture dimension

			
 
				+		group = 2; // Too small for 32-byte alignment! (AVX2)

			
 
				 	} else if (size == 8) {

			
 
				-		group = 3;

			
 
				+		group = 3; // Too small for 64-byte alignment! (AVX3)

			
 
				 	} else if (size == 16) {

			
 
				-		group = 4;

			
 
				+		group = 4; // Too small for 128-byte alignment!

			
 
				 	} else if (size == 32) {

			
 
				-		group = 5;

			
 
				+		group = 5; // Smallest allowed texture dimension, allowing 1024-bit SIMD.

			
 
				 	} else if (size == 64) {

			
 
				 		group = 6;

			
 
				 	} else if (size == 128) {

			
@@ -140,7 +145,22 @@ static int32_t getSizeGroup(int32_t size) {
 
				 	return group;

			
 
				 }

			
 
				 

			
 
				-static int32_t getPyramidSize(int32_t width, int32_t height, int32_t pixelSize, int32_t levels) {

			
 
				+inline int32_t sizeFromGroup(int32_t group) {

			
 
				+	return 1 << group;

			
 
				+}

			
 
				+

			
 
				+// Round the size down, unless it is already too small.

			
 
				+static int32_t roundSize(int32_t size) {

			
 
				+	for (int groupIndex = smallestSizeGroup; groupIndex < largestSizeGroup; groupIndex++) {

			
 
				+		int currentSize = sizeFromGroup(groupIndex);

			
 
				+		if (size < currentSize) {

			
 
				+			return currentSize;

			
 
				+		}

			
 
				+	}

			
 
				+	return sizeFromGroup(largestSizeGroup);

			
 
				+}

			
 
				+

			
 
				+static int32_t getPyramidSize(int32_t width, int32_t height, int32_t levels) {

			
 
				 	uint32_t result = 0;

			
 
				 	uint32_t byteCount = width * height * pixelSize;

			
 
				 	for (int32_t l = 0; l < levels; l++) {

			
@@ -150,36 +170,72 @@ static int32_t getPyramidSize(int32_t width, int32_t height, int32_t pixelSize,
 
				 	return (int32_t)result;

			
 
				 }

			
 
				 

			
 
				-static void downScaleByTwo(SafePointer<uint8_t> targetData, const SafePointer<uint8_t> sourceData, int32_t targetWidth, int32_t targetHeight, int32_t pixelSize, int32_t targetStride) {

			
 
				+inline U32xX averageColor(const U32xX &colorA, const U32xX &colorB) {

			
 
				+	// TODO: Expand to 16 bits or use built in average intrinsics for full bit depth.

			
 
				+	// 7-bit precision for speed.

			
 
				+	return reinterpret_U32FromU8(reinterpret_U8FromU32((colorA >> 1) & U32xX(0b01111111011111110111111101111111)) + reinterpret_U8FromU32((colorB >> 1) & U32xX(0b01111111011111110111111101111111)));

			
 
				+}

			
 
				+

			
 
				+inline U32xX pairwiseAverageColor(const U32xX &colorA, const U32xX &colorB) {

			
 
				+	// TODO: Vectorize with 32-bit unzipping of pixels and 8-bit average of channels.

			
 
				+	// Reference implementation

			
 
				+	ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsA[laneCountX_8Bit];

			
 
				+	ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsB[laneCountX_8Bit];

			
 
				+	ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsR[laneCountX_8Bit];

			
 
				+	colorA.writeAlignedUnsafe((uint32_t*)elementsA);

			
 
				+	colorB.writeAlignedUnsafe((uint32_t*)elementsB);

			
 
				+	int32_t halfPixels = laneCountX_32Bit / 2;

			
 
				+	for (int p = 0; p < halfPixels; p++) {

			
 
				+		for (int c = 0; c < 4; c++) {

			
 
				+			elementsR[p * 4 + c] = uint8_t((uint16_t(elementsA[p * 8 + c]) + uint16_t(elementsA[p * 8 + 4 + c])) >> 1);

			
 
				+			elementsR[(p + halfPixels) * 4 + c] = uint8_t((uint16_t(elementsB[p * 8 + c]) + uint16_t(elementsB[p * 8 + 4 + c])) >> 1);

			
 
				+		}

			
 
				+	}

			
 
				+	return U32xX::readAlignedUnsafe((uint32_t*)elementsR);

			
 
				+}

			
 
				+

			
 
				+static void downScaleByTwo(SafePointer<uint32_t> targetData, const SafePointer<uint32_t> sourceData, int32_t targetWidth, int32_t targetHeight, int32_t targetStride) {

			
 
				 	int32_t sourceStride = targetStride * 2;

			
 
				 	int32_t doubleSourceStride = sourceStride * 2;

			
 
				-	SafePointer<uint8_t> targetRow = targetData;

			
 
				-	const SafePointer<uint8_t> sourceRow = sourceData;

			
 
				+	SafePointer<uint32_t> targetRow = targetData;

			
 
				+	const SafePointer<uint32_t> sourceRow = sourceData;

			
 
				 	for (int32_t y = 0; y < targetHeight; y++) {

			
 
				-		const SafePointer<uint8_t> sourcePixel = sourceRow;

			
 
				-		SafePointer<uint8_t> targetPixel = targetRow;

			
 
				-		for (int32_t x = 0; x < targetWidth; x++) {

			
 
				-			// TODO: Use pariwise and vector average functions for fixed channel counts (SSE has _mm_avg_epu8 for vector average)

			
 
				-			for (int32_t c = 0; c < pixelSize; c++) {

			
 
				-				uint8_t value = (uint8_t)((

			
 
				-				    (uint16_t)(*sourcePixel)

			
 
				-				  + (uint16_t)(*(sourcePixel + pixelSize))

			
 
				-				  + (uint16_t)(*(sourcePixel + sourceStride))

			
 
				-				  + (uint16_t)(*(sourcePixel + sourceStride + pixelSize))) / 4);

			
 
				-				*targetPixel = value;

			
 
				-				targetPixel += 1;

			
 
				-				sourcePixel += 1;

			
 
				-			}

			
 
				-			sourcePixel += pixelSize;

			
 
				+		const SafePointer<uint32_t> upperSourcePixel = sourceRow;

			
 
				+		const SafePointer<uint32_t> lowerSourcePixel = sourceRow;

			
 
				+		lowerSourcePixel.increaseBytes(sourceStride);

			
 
				+		SafePointer<uint32_t> targetPixel = targetRow;

			
 
				+		for (int32_t x = 0; x < targetWidth; x += laneCountX_32Bit) {

			
 
				+			U32xX upperLeft = U32xX::readAligned(upperSourcePixel, "upperLeftSource in downScaleByTwo");

			
 
				+			U32xX upperRight = U32xX::readAligned(lowerSourcePixel + laneCountX_32Bit, "upperLeftSource in downScaleByTwo");

			
 
				+			U32xX lowerLeft = U32xX::readAligned(lowerSourcePixel, "upperLeftSource in downScaleByTwo");

			
 
				+			U32xX lowerRight = U32xX::readAligned(lowerSourcePixel + laneCountX_32Bit, "upperLeftSource in downScaleByTwo");

			
 
				+			U32xX upperAverage = pairwiseAverageColor(upperLeft, upperRight);

			
 
				+			U32xX lowerAverage = pairwiseAverageColor(lowerLeft, lowerRight);

			
 
				+			U32xX finalAverage = averageColor(upperAverage, lowerAverage);

			
 
				+			finalAverage.writeAligned(targetPixel, "average result in downScaleByTwo");

			
 
				+			targetPixel += laneCountX_32Bit;

			
 
				+			upperSourcePixel += laneCountX_32Bit * 2;

			
 
				+			lowerSourcePixel += laneCountX_32Bit * 2;

			
 
				 		}

			
 
				-		targetRow += targetStride;

			
 
				-		sourceRow += doubleSourceStride;

			
 
				+		targetRow.increaseBytes(targetStride);

			
 
				+		sourceRow.increaseBytes(doubleSourceStride);

			
 
				 	}

			
 
				 }

			
 
				 

			
 
				+static void updatePyramid(TextureRgba &texture, int32_t layerCount) {

			
 
				+	// Downscale each following layer from the previous.

			
 
				+	for (int32_t targetIndex = 1; targetIndex < layerCount; targetIndex++) {

			
 
				+		int32_t sourceIndex = targetIndex - 1;

			
 
				+		int32_t targetWidth = texture.mips[targetIndex].width;

			
 
				+		int32_t targetHeight = texture.mips[targetIndex].height;

			
 
				+		downScaleByTwo(texture.mips[targetIndex].data, texture.mips[sourceIndex].data, targetWidth, targetHeight, targetWidth * pixelSize);

			
 
				+	}

			
 
				+	texture.layerCount = layerCount;

			
 
				+}

			
 
				+

			
 
				 TextureRgbaLayer::TextureRgbaLayer() {}

			
 
				 

			
 
				-TextureRgbaLayer::TextureRgbaLayer(const uint8_t *data, int32_t width, int32_t height) :

			
 
				+TextureRgbaLayer::TextureRgbaLayer(SafePointer<uint32_t> data, int32_t width, int32_t height) :

			
 
				   data(data),

			
 
				   strideShift(getSizeGroup(width) + 2),

			
 
				   widthMask(width - 1),

			
@@ -191,74 +247,92 @@ TextureRgbaLayer::TextureRgbaLayer(const uint8_t *data, int32_t width, int32_t h
 
				   halfPixelOffsetU(1.0f - (0.5f / width)),

			
 
				   halfPixelOffsetV(1.0f - (0.5f / height)) {}

			
 
				 

			
 
				-void ImageRgbaU8Impl::generatePyramid() {

			
 
				+void ImageRgbaU8Impl::generatePyramidStructure(int32_t layerCount) {

			
 
				+	int32_t currentWidth = this->width;

			
 
				+	int32_t currentHeight = this->height;

			
 
				+	// Allocate smaller pyramid images within the buffer

			
 
				+	SafePointer<uint32_t> currentStart = buffer_getSafeData<uint32_t>(this->buffer, "Pyramid generation target");

			
 
				+	for (int32_t m = 0; m < layerCount; m++) {

			
 
				+		this->texture.mips[m] = TextureRgbaLayer(currentStart, currentWidth, currentHeight);

			
 
				+		currentStart += currentWidth * currentHeight;

			
 
				+		currentWidth /= 2;

			
 
				+		currentHeight /= 2;

			
 
				+	}

			
 
				+	// Fill unused mip levels with duplicates of the last mip level

			
 
				+	for (int32_t m = layerCount; m < MIP_BIN_COUNT; m++) {

			
 
				+		// m - 1 is never negative, because layerCount is clamped to at least 1 and nobody would choose zero for MIP_BIN_COUNT.

			
 
				+		this->texture.mips[m] = this->texture.mips[m - 1];

			
 
				+	}

			
 
				+	this->texture.layerCount = layerCount;

			
 
				+}

			
 
				+

			
 
				+void ImageRgbaU8Impl::removePyramidStructure() {

			
 
				+	for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {

			
 
				+		this->texture.mips[m] = TextureRgbaLayer(imageInternal::getSafeData<uint32_t>(*this), this->width, this->height);

			
 
				+	}

			
 
				+	// Declare the old pyramid invalid so that it will not be displayed while rendering, but keep the extra memory for next time it is generated.

			
 
				+	this->texture.layerCount = 1;

			
 
				+}

			
 
				+

			
 
				+void ImageRgbaU8Impl::makeIntoTexture() {

			
 
				+	// Check if the image is a valid texture.

			
 
				 	if (!this->isTexture()) {

			
 
				-		if (this->width < 4 || this->height < 4) {

			
 
				-			printText("Cannot generate a pyramid from an image smaller than 4x4 pixels.\n");

			
 
				-		} else if (this->width > 16384 || this->height > 16384) {

			
 
				-			printText("Cannot generate a pyramid from an image larger than 16384x16384 pixels.\n");

			
 
				-		} else if (getSizeGroup(this->width) == -1 || getSizeGroup(this->height) == -1) {

			
 
				-			printText("Cannot generate a pyramid from image dimensions that are not powers of two.\n");

			
 
				-		} else if (this->stride > this->width * pixelSize) {

			
 
				-			printText("Cannot generate a pyramid from an image that contains padding.\n");

			
 
				-		} else if (this->stride < this->width * pixelSize) {

			
 
				-			printText("Cannot generate a pyramid from an image with corrupted stride.\n");

			
 
				-		} else {

			
 
				-			printText("Cannot generate a pyramid from an image that has not been initialized correctly.\n");

			
 
				-		}

			
 
				+		// Get valid dimensions.

			
 
				+		int newWidth = roundSize(this->width);

			
 
				+		int newHeight = roundSize(this->height);

			
 
				+		// Create a new image with the correct dimensions.

			
 
				+		ImageRgbaU8Impl result = ImageRgbaU8Impl(newWidth, newHeight, DSR_DEFAULT_ALIGNMENT);

			
 
				+		// Resize the image content with bi-linear interpolation.

			
 
				+		imageImpl_resizeToTarget(result, *this, true);

			
 
				+		// Take over the new image's content.

			
 
				+		this->buffer = result.buffer;

			
 
				+		this->width = result.width;

			
 
				+		this->height = result.height;

			
 
				+		this->stride = result.stride;

			
 
				+		this->startOffset = 0; // Starts from the beginning.

			
 
				+		this->isSubImage = false; // No longer sharing buffer with any parent image.

			
 
				+	}

			
 
				+}

			
 
				+

			
 
				+void ImageRgbaU8Impl::generatePyramid() {

			
 
				+	int32_t fullSizeGroup = getSizeGroup(std::min(this->width, this->height));

			
 
				+	int32_t layerCount = std::min(std::max(fullSizeGroup - smallestSizeGroup, 1), MIP_BIN_COUNT);

			
 
				+	if (this->texture.layerCount > 1) {

			
 
				+		// Regenerate smaller images without wasting time with any redundant checks,

			
 
				+		//   because the image has already been approved the first time it had the pyramid allocated.

			
 
				+		updatePyramid(this->texture, layerCount);

			
 
				 	} else {

			
 
				-		int32_t pixelSize = this->pixelSize;

			
 
				-		int32_t mipmaps = std::min(std::max(getSizeGroup(std::min(this->width, this->height)) - 1, 1), MIP_BIN_COUNT);

			
 
				-		if (!this->texture.hasMipBuffer()) {

			
 
				-			this->texture.pyramidBuffer = buffer_create(getPyramidSize(this->width / 2, this->height / 2, pixelSize, mipmaps - 1));

			
 
				-		}

			
 
				-		// Point to the image's original buffer in mip level 0

			
 
				-		SafePointer<uint8_t> currentStart = imageInternal::getSafeData<uint8_t>(*this);

			
 
				+		// In the event of having to correct a bad image into a valid texture, there will be two reallocations.

			
 
				+		this->makeIntoTexture();

			
 
				+		Buffer oldBuffer = this->buffer;

			
 
				+		SafePointer<uint32_t> oldData = buffer_getSafeData<uint32_t>(oldBuffer, "Pyramid generation source") + this->startOffset;

			
 
				+		this->buffer = buffer_create(getPyramidSize(this->width, this->height, layerCount));

			
 
				 		int32_t currentWidth = this->width;

			
 
				 		int32_t currentHeight = this->height;

			
 
				-		this->texture.mips[0] = TextureRgbaLayer(currentStart.getUnsafe(), currentWidth, currentHeight);

			
 
				-		// Create smaller pyramid images in the extra buffer

			
 
				-		SafePointer<uint8_t> previousStart = currentStart;

			
 
				-		currentStart = buffer_getSafeData<uint8_t>(this->texture.pyramidBuffer, "Pyramid generation target");

			
 
				-		for (int32_t m = 1; m < mipmaps; m++) {

			
 
				-			currentWidth /= 2;

			
 
				-			currentHeight /= 2;

			
 
				-			this->texture.mips[m] = TextureRgbaLayer(currentStart.getUnsafe(), currentWidth, currentHeight);

			
 
				-			int32_t size = currentWidth * currentHeight * pixelSize;

			
 
				-			// In-place downscaling by two.

			
 
				-			downScaleByTwo(currentStart, previousStart, currentWidth, currentHeight, pixelSize, currentWidth * pixelSize);

			
 
				-			previousStart = currentStart;

			
 
				-			currentStart.increaseBytes(size);

			
 
				-		}

			
 
				-		// Fill unused mip levels with duplicates of the last mip level

			
 
				-		for (int32_t m = mipmaps; m < MIP_BIN_COUNT; m++) {

			
 
				-			// m - 1 is never negative, because mipmaps is clamped to at least 1 and nobody would choose zero for MIP_BIN_COUNT.

			
 
				-			this->texture.mips[m] = this->texture.mips[m - 1];

			
 
				-		}

			
 
				+		this->generatePyramidStructure(layerCount);

			
 
				+		// Copy the image's old content while assuming that there is no padding.

			
 
				+		safeMemoryCopy(this->texture.mips[0].data, oldData, this->width * this->height * pixelSize);

			
 
				+		// Generate smaller images.

			
 
				+		updatePyramid(this->texture, layerCount);

			
 
				+		// Once an image had a pyramid generated, the new buffer will remain for as long as the image exists.

			
 
				+		this->texture.layerCount = layerCount;

			
 
				+		// Remove start offset because the old data has been cloned to create the new pyramid image.

			
 
				+		this->startOffset = 0;

			
 
				 	}

			
 
				 }

			
 
				 

			
 
				 void ImageRgbaU8Impl::removePyramid() {

			
 
				-	// Only try to remove if it has a pyramid

			
 
				-	if (buffer_exists(this->texture.pyramidBuffer)) {

			
 
				-		// Remove the pyramid's buffer

			
 
				-		this->texture.pyramidBuffer = Buffer();

			
 
				-		// Re-initialize

			
 
				-		for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {

			
 
				-			this->texture.mips[m] = TextureRgbaLayer(imageInternal::getSafeData<uint8_t>(*this).getUnsafe(), this->width, this->height);

			
 
				-		}

			
 
				-	}

			
 
				+	// Duplicate the original image when no longer showing the pyramid.

			
 
				+	this->removePyramidStructure();

			
 
				 }

			
 
				 

			
 
				 void ImageRgbaU8Impl::initializeRgbaImage() {

			
 
				 	// If the image fills the criterias of a texture

			
 
				-	if (getSizeGroup(this->width) >= 2

			
 
				-	 && getSizeGroup(this->height) >= 2

			
 
				-	 && this->stride == this->width * this->pixelSize) {

			
 
				+	if (getSizeGroup(this->width) >= smallestSizeGroup

			
 
				+	 && getSizeGroup(this->height) >= smallestSizeGroup

			
 
				+	 && this->stride == this->width * pixelSize) {

			
 
				 		// Initialize each mip bin to show the original image

			
 
				-		for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {

			
 
				-			this->texture.mips[m] = TextureRgbaLayer(imageInternal::getSafeData<uint8_t>(*this).getUnsafe(), this->width, this->height);

			
 
				-		}

			
 
				+		this->removePyramidStructure();

			
 
				 	}

			
 
				 };

			
 
				 

			
--- a/Source/DFPSR/image/ImageRgbaU8.h
+++ b/Source/DFPSR/image/ImageRgbaU8.h
@@ -1,6 +1,6 @@
 
				 // zlib open source license

			
 
				 //

			
 
				-// Copyright (c) 2017 to 2019 David Forsgren Piuva

			
 
				+// Copyright (c) 2017 to 2023 David Forsgren Piuva

			
 
				 // 

			
 
				 // This software is provided 'as-is', without any express or implied

			
 
				 // warranty. In no event will the authors be held liable for any damages

			
@@ -30,72 +30,67 @@
 
				 

			
 
				 namespace dsr {

			
 
				 

			
 
				+// TODO: Figure out how to handle very small textures where the full resolution is smaller than the smallest allowed MIP level.

			
 
				+//       Larger SIMD vectors will change the smallest allowed textures if the bit pattern is used, which may be inconsistent if it keeps growing.

			
 
				+//       Having 32x32 pixels as the minimum size would allow using up to 1024-bit SIMD without a problem.

			
 
				 // TODO: Reallocate the image's buffer, so that the pyramid images are placed into the same allocation.

			
 
				 //       This allow reading a texture with multiple mip levels using different 32-bit offsets in the same SIMD vector holding multiple groups of 2x2 pixels.

			
 
				 // TODO: Adapt how far down to go in mip resolutions based on DSR_DEFAULT_ALIGNMENT, so that no mip level is padded in memory.

			
 
				 //       This is needed so that the stride can be calculated using bit shifting from the mip level.

			
 
				 //       The visual appearance may differ between SIMD lengths for low resolution textures, but not between computers running the same executable.

			
 
				-// TODO: Store the smallest layer first in memory, so that the offset is a multiple of the smallest size following a pre-determined bit pattern.

			
 
				-//       If one s is the number of pixels in the smallest mip layer, then the size of each layer n equals s * 2^n.

			
 
				-//       The offset in s units is then the sum of all previous unpadded dimensions.

			
 
				-//         0000000000000000 0

			
 
				-//         0000000000000001 1

			
 
				-//         0000000000000101 5

			
 
				-//         0000000000010101 21

			
 
				-//         0000000001010101 85

			
 
				-//         0000000101010101 341

			
 
				-//         0000010101010101 1365

			
 
				-//         0001010101010101 5461

			
 
				-//         0101010101010101 21845

			
 
				-//       Then one can start with the offset to the largest mip layer in pixels or bytes as the initial mask and then mask out initial ones using a mask directly from the MIP calculation.

			
 
				-//         0000000000000000 4x2 pixels at offset 0

			
 
				-//         0000000000001000 8x4 pixels at offset 8

			
 
				-//         0000000000101000 16x32 pixels at offset 40

			
 
				-//         0000000010101000 32x64 pixels at offset 168

			
 
				-//         0000001010101000 64x128 pixels at offset 680

			
 
				-//         0000101010101000 128x256 pixels at offset 2728 (Full unmasked offset leading to the highest mip level)

			
 
				-//       Masks for different visibility.

			
 
				-//         0000000000000011 Very far away or seen from the side

			
 
				-//         0000000000111111 Far away or seen from the side

			
 
				-//         0000001111111111 Normal viewing

			
 
				-//         0011111111111111 Close with many screen pixels per texels.

			
 
				-//       The difficult part is how to generate a good mip level offset mask from the pixel coordinate derivation from groups of 2x2 pixels.

			
 
				-//         The offset is not exactly exponential, so there will be visual tradeoffs between artifacts in this approximation.

			
 
				-//       One could take the texture coordinate offset per pixel as the initial value and

			
 
				-//         then repeat shifting and or masking at power of two offsets to only get ones after the initial one, but this would require many cycles.

			
 
				-//           Pixels per texel in full resolution times full resolution offset:

			
 
				-//             00000000000010101000110110011001

			
 
				-//           Mip offset mask:

			
 
				-//             00000000000011111111111111111111

			
 
				-//           Full resolution mip offset:

			
 
				-//             00000000001010101010101010000000

			
 
				-//           Final mip offset containing half width and height:

			
 
				-//             00000000000010101010101010000000

			
 
				+// TODO: Begin by replacing the lookup table for pyramid layers with template inline functions, because figuring out how to get start offset and stride consistently may take time.

			
 
				+//       Pixel loops will later look up the masks once and store it in SIMD vectors to avoid fetching it from memory multiple times from potential memory aliasing.

			
 
				+// IDEA: Keep the same order of mip layers, but mask out offset bits from the right side.

			
 
				+//       When the most significant bit is masked out, it jumps to the full resoultion image at offset zero.

			
 
				+//       Offsets

			
 
				+//         00000000000000000000000000000000 Full resolution of 64x64

			
 
				+//         00000000000000000000010000000000 Half resolution of 32x32

			
 
				+//         00000000000000000000010100000000 Quarter resolution of 16x16

			
 
				+//         00000000000000000000010101000000 Low resolution of 8x8

			
 
				+//         00000000000000000000010101010000 Lowest resolution of 4x4

			
 
				+//       Power of 4 offset masks

			
 
				+//         11111111111111111100000000000000 Show at most 16384 pixels (clamped to full resolution because no more bits are masked out)

			
 
				+//         11111111111111111111000000000000 Show at most 4096 pixels (full resolution for the image)

			
 
				+//         11111111111111111111110000000000 Show at most 1024 pixels

			
 
				+//         11111111111111111111111100000000 Show at most 256 pixels

			
 
				+//         11111111111111111111111111000000 Show at most 64 pixels

			
 
				+//         11111111111111111111111111110000 Show at most 16 pixels

			
 
				+// PROBLEMS:

			
 
				+//   * How can stride be calculated in the same way as the start offset?

			
 
				+//     - Consistently in base two, not using the base 4 mask.

			
 
				+//     - Limited to the range of available resolutions, not going to stride 512 when the full resolution stride is 256.

			
 
				+//   * What about the width and height masks, can they reuse the same bit masking to avoid looking up data with scalar operations?

			
 
				+//   * What should be done about very small textures?

			
 
				+//     Automatically scale them up to the minimum resolution and leave the original image in the middle of the buffer?

			
 
				+//     Change minimum size requirements?

			
 
				+//       This would be the simplest approach and nobody would want their textures up-scaled anyway if one can easily redraw images in a higher resolution.

			
 
				 

			
 
				 // Pointing to the parent image using raw pointers for fast rendering. May not exceed the lifetime of the parent image!

			
 
				 struct TextureRgbaLayer {

			
 
				-	const uint8_t *data = 0;

			
 
				+	SafePointer<uint32_t> data;

			
 
				 	int32_t strideShift = 0;

			
 
				 	uint32_t widthMask = 0, heightMask = 0;

			
 
				 	int32_t width = 0, height = 0;

			
 
				 	float subWidth = 0.0f, subHeight = 0.0f; // TODO: Better names?

			
 
				 	float halfPixelOffsetU = 0.0f, halfPixelOffsetV = 0.0f;

			
 
				 	TextureRgbaLayer();

			
 
				-	TextureRgbaLayer(const uint8_t *data, int32_t width, int32_t height);

			
 
				+	TextureRgbaLayer(SafePointer<uint32_t> data, int32_t width, int32_t height);

			
 
				 	// Can it be sampled as a texture

			
 
				-	bool exists() const { return this->data != nullptr; }

			
 
				+	bool exists() const { return this->data.getUnsafe() != nullptr; }

			
 
				 };

			
 
				 

			
 
				+// TODO: Try to replace with generated bit masks from inline functions.

			
 
				 #define MIP_BIN_COUNT 5

			
 
				 

			
 
				-// Pointing to the parent image using raw pointers for fast rendering. Not not separate from the image!

			
 
				+// Pointing to the parent image using raw pointers for fast rendering. Do not separate from the image!

			
 
				 struct TextureRgba {

			
 
				-	Buffer pyramidBuffer; // Storing the smaller mip levels

			
 
				 	TextureRgbaLayer mips[MIP_BIN_COUNT]; // Pointing to all mip levels including the original image

			
 
				+	int32_t layerCount = 0; // 0 Means that there are no pointers, 1 means that you have a pyramid but only one layer.

			
 
				 	// Can it be sampled as a texture

			
 
				 	bool exists() const { return this->mips[0].exists(); }

			
 
				 	// Does it have a mip pyramid generated for smoother sampling

			
 
				-	bool hasMipBuffer() const { return this->pyramidBuffer.get() != nullptr; }

			
 
				+	// TODO: Rename once there is no separate MIP buffer, just a single pyramid buffer.

			
 
				+	bool hasMipBuffer() const { return this->layerCount != 0; }

			
 
				 };

			
 
				 

			
 
				 class ImageRgbaU8Impl : public ImageImpl {

			
@@ -110,13 +105,19 @@ public:
 
				 	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment);

			
 
				 	// Native canvas constructor

			
 
				 	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex, int32_t alignment);

			
 
				-	// Fast reading

			
 
				-	TextureRgba texture; // The texture view

			
 
				-	void initializeRgbaImage(); // Points to level 0 from all bins to allow rendering

			
 
				+	// The texture view for fast reading

			
 
				+	TextureRgba texture;

			
 
				+	// Points to level 0 from all bins to allow rendering

			
 
				+	void initializeRgbaImage();

			
 
				+	// Resizes the image to valid texture dimensions

			
 
				+	void makeIntoTexture();

			
 
				 	void generatePyramid(); // Fills the following bins with smaller images

			
 
				 	void removePyramid();

			
 
				 	bool isTexture() const;

			
 
				 	static bool isTexture(const ImageRgbaU8Impl* image); // Null cannot be sampled as a texture

			
 
				+private:

			
 
				+	void generatePyramidStructure(int32_t layerCount);

			
 
				+	void removePyramidStructure();

			
 
				 public:

			
 
				 	// Conversion to monochrome by extracting a channel

			
 
				 	ImageU8Impl getChannel(int32_t channelIndex) const;

			
--- a/Source/DFPSR/render/shader/RgbaMultiply.h
+++ b/Source/DFPSR/render/shader/RgbaMultiply.h
@@ -84,8 +84,8 @@ public:
 
				 	Rgba_F32 getPixels_2x2(const F32x4x3 &vertexWeights) const override {

			
 
				 		if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {

			
 
				 			// Optimized for diffuse only

			
 
				-			ALIGN16 F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

			
 
				-			ALIGN16 F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

			
 
				+			F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

			
 
				+			F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

			
 
				 			if (DISABLE_MIPMAP) {

			
 
				 				return shaderMethods::sample_F32<Interpolation::BL, false>(this->diffuseLayer, u1, v1);

			
 
				 			} else {

			
@@ -93,18 +93,18 @@ public:
 
				 			}

			
 
				 		} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {

			
 
				 			// Optimized for light only

			
 
				-			ALIGN16 F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

			
 
				-			ALIGN16 F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

			
 
				+			F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

			
 
				+			F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

			
 
				 			return shaderMethods::sample_F32<Interpolation::BL, false>(this->lightLayer, u2, v2);

			
 
				 		} else {

			
 
				 			// Interpolate the vertex color

			
 
				-			ALIGN16 Rgba_F32 color = HAS_VERTEX_FADING ?

			
 
				+			Rgba_F32 color = HAS_VERTEX_FADING ?

			
 
				 			  shaderMethods::interpolateVertexColor(this->colors.red, this->colors.green, this->colors.blue, this->colors.alpha, vertexWeights) :

			
 
				 			  Rgba_F32(F32x4(this->colors.red.x), F32x4(this->colors.green.x), F32x4(this->colors.blue.x), F32x4(this->colors.alpha.x));

			
 
				 			// Sample diffuse

			
 
				 			if (HAS_DIFFUSE_MAP) {

			
 
				-				ALIGN16 F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

			
 
				-				ALIGN16 F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

			
 
				+				F32x4 u1(shaderMethods::interpolate(this->texCoords.u1, vertexWeights));

			
 
				+				F32x4 v1(shaderMethods::interpolate(this->texCoords.v1, vertexWeights));

			
 
				 				if (DISABLE_MIPMAP) {

			
 
				 					color = color * shaderMethods::sample_F32<Interpolation::BL, false>(this->diffuseLayer, u1, v1);

			
 
				 				} else {

			
@@ -113,8 +113,8 @@ public:
 
				 			}

			
 
				 			// Sample lightmap

			
 
				 			if (HAS_LIGHT_MAP) {

			
 
				-				ALIGN16 F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

			
 
				-				ALIGN16 F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

			
 
				+				F32x4 u2(shaderMethods::interpolate(this->texCoords.u2, vertexWeights));

			
 
				+				F32x4 v2(shaderMethods::interpolate(this->texCoords.v2, vertexWeights));

			
 
				 				color = color * shaderMethods::sample_F32<Interpolation::BL, false>(this->lightLayer, u2, v2);

			
 
				 			}

			
 
				 			return color;

			
--- a/Source/DFPSR/render/shader/shaderMethods.h
+++ b/Source/DFPSR/render/shader/shaderMethods.h
@@ -37,9 +37,9 @@ namespace dsr {
 
				 namespace shaderMethods {

			
 
				 	// Returns the linear interpolation of the values using corresponding weight ratios for A, B and C in 4 pixels at the same time.

			
 
				 	inline F32x4 interpolate(const FVector3D &vertexData, const F32x4x3 &vertexWeights) {

			
 
				-		ALIGN16 F32x4 vMA = vertexData.x * vertexWeights.v1;

			
 
				-		ALIGN16 F32x4 vMB = vertexData.y * vertexWeights.v2;

			
 
				-		ALIGN16 F32x4 vMC = vertexData.z * vertexWeights.v3;

			
 
				+		F32x4 vMA = vertexData.x * vertexWeights.v1;

			
 
				+		F32x4 vMB = vertexData.y * vertexWeights.v2;

			
 
				+		F32x4 vMC = vertexData.z * vertexWeights.v3;

			
 
				 		return vMA + vMB + vMC;

			
 
				 	}

			
 
				 

			
@@ -55,14 +55,14 @@ namespace shaderMethods {
 
				 	// Returns (colorA * weightA + colorB * weightB) / 256 as bytes

			
 
				 	// weightA and weightB should contain pairs of the same 16-bit weights for each of the 4 pixels in the corresponding A and B colors

			
 
				 	inline U32x4 weightColors(const U32x4 &colorA, const U16x8 &weightA, const U32x4 &colorB, const U16x8 &weightB) {

			
 
				-		ALIGN16 U32x4 lowMask(0x00FF00FFu);

			
 
				-		ALIGN16 U16x8 lowColorA = U16x8(colorA & lowMask);

			
 
				-		ALIGN16 U16x8 lowColorB = U16x8(colorB & lowMask);

			
 
				-		ALIGN16 U32x4 highMask(0xFF00FF00u);

			
 
				-		ALIGN16 U16x8 highColorA = U16x8((colorA & highMask) >> 8);

			
 
				-		ALIGN16 U16x8 highColorB = U16x8((colorB & highMask) >> 8);

			
 
				-		ALIGN16 U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();

			
 
				-		ALIGN16 U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();

			
 
				+		U32x4 lowMask(0x00FF00FFu);

			
 
				+		U16x8 lowColorA = U16x8(colorA & lowMask);

			
 
				+		U16x8 lowColorB = U16x8(colorB & lowMask);

			
 
				+		U32x4 highMask(0xFF00FF00u);

			
 
				+		U16x8 highColorA = U16x8((colorA & highMask) >> 8);

			
 
				+		U16x8 highColorB = U16x8((colorB & highMask) >> 8);

			
 
				+		U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();

			
 
				+		U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();

			
 
				 		return (((lowColor >> 8) & lowMask) | (highColor & highMask));

			
 
				 	}

			
 
				 

			
@@ -79,8 +79,8 @@ namespace shaderMethods {
 
				 

			
 
				 	inline U32x4 mix_L(const U32x4 &colorA, const U32x4 &colorB, const U32x4 &weight) {

			
 
				 		// Get inverse weights

			
 
				-		ALIGN16 U16x8 weightB = repeatAs16Bits(weight);

			
 
				-		ALIGN16 U16x8 weightA = invertWeight(weightB);

			
 
				+		U16x8 weightB = repeatAs16Bits(weight);

			
 
				+		U16x8 weightA = invertWeight(weightB);

			
 
				 		// Multiply

			
 
				 		return weightColors(colorA, weightA, colorB, weightB);

			
 
				 	}

			
@@ -97,19 +97,8 @@ namespace shaderMethods {
 
				 

			
 
				 	// Single layer sampling methods

			
 
				 	inline U32x4 sample_U32(const TextureRgbaLayer *source, const U32x4 &col, const U32x4 &row) {

			
 
				-		#ifdef USE_AVX2

			
 
				-			U32x4 pixelOffset((col + (row << (source->strideShift - 2)))); // PixelOffset = Column + Row * PixelStride

			
 
				-			return U32x4(GATHER_U32x4_AVX2(source->data, pixelOffset.v, 4));

			
 
				-			// return gather(source->data, pixelOffset.v); TODO: Needs SafePointer, so that this function can use the gather function with automatic emulation instead of hardcoding for AVX2

			
 
				-		#else

			
 
				-			UVector4D byteOffset = ((col << 2) + (row << source->strideShift)).get(); // ByteOffset = Column * 4 + Row * ByteStride

			
 
				-			return U32x4(

			
 
				-			  *((uint32_t*)(source->data + byteOffset.x)),

			
 
				-			  *((uint32_t*)(source->data + byteOffset.y)),

			
 
				-			  *((uint32_t*)(source->data + byteOffset.z)),

			
 
				-			  *((uint32_t*)(source->data + byteOffset.w))

			
 
				-			);

			
 
				-		#endif

			
 
				+		U32x4 pixelOffset((col + (row << (source->strideShift - 2)))); // PixelOffset = Column + Row * PixelStride

			
 
				+		return gather(source->data, pixelOffset);

			
 
				 	}

			
 
				 

			
 
				 	// How many mip levels down from here should be sampled for the given texture coordinates

			
@@ -140,7 +129,9 @@ namespace shaderMethods {
 
				 	}

			
 
				 

			
 
				 	// Single layer sampling method

			
 
				-	// Precondition: u, v > -0.875f = 1 - (0.5 / minimumMipSize)

			
 
				+	// Preconditions:

			
 
				+	//   u >= -halfPixelOffsetU

			
 
				+	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION>

			
 
				 	inline U32x4 sample_U32(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		if (INTERPOLATION == Interpolation::BL) {

			
@@ -174,7 +165,9 @@ namespace shaderMethods {
 
				 		}

			
 
				 	}

			
 
				 

			
 
				-	// Precondition: u, v > -0.875f = 1 - (0.5 / minimumMipSize)

			
 
				+	// Preconditions:

			
 
				+	//   u >= -halfPixelOffsetU

			
 
				+	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION, bool HIGH_QUALITY>

			
 
				 	inline Rgba_F32 sample_F32(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		if (INTERPOLATION == Interpolation::BL) {

			
@@ -211,14 +204,18 @@ namespace shaderMethods {
 
				 	}

			
 
				 

			
 
				 	// Multi layer sampling method

			
 
				-	// Precondition: u, v > -0.875f = 1 - (0.5 / minimumMipSize)

			
 
				+	// Preconditions:

			
 
				+	//   u >= -halfPixelOffsetU

			
 
				+	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION>

			
 
				 	inline U32x4 sample_U32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		int mipLevel = getMipLevel(source, u, v);

			
 
				 		return sample_U32<INTERPOLATION>(&(source->mips[mipLevel]), u, v);

			
 
				 	}

			
 
				 

			
 
				-	// Precondition: u, v > -0.875f = 1 - (0.5 / minimumMipSize)

			
 
				+	// Preconditions:

			
 
				+	//   u >= -halfPixelOffsetU

			
 
				+	//   v >= -halfPixelOffsetV

			
 
				 	template<Interpolation INTERPOLATION, bool HIGH_QUALITY>

			
 
				 	inline Rgba_F32 sample_F32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {

			
 
				 		int mipLevel = getMipLevel(source, u, v);