Browse Source

Automatically aligning float images to the F vector size to allow processing float images using only AVX.

David Piuva 2 years ago
parent
commit
4d27bde0d4

+ 0 - 1
Source/DFPSR/api/bufferAPI.cpp

@@ -61,7 +61,6 @@ static int getFinalAlignment(int requestedAlignment) {
 	// Find any power of two alignment divisible by both requestedAlignment and DSR_DEFAULT_ALIGNMENT
 	// Find any power of two alignment divisible by both requestedAlignment and DSR_DEFAULT_ALIGNMENT
 	int largestAlignment = max(requestedAlignment, DSR_DEFAULT_ALIGNMENT);
 	int largestAlignment = max(requestedAlignment, DSR_DEFAULT_ALIGNMENT);
 	for (uint32_t e = 0; e < 32; e++) {
 	for (uint32_t e = 0; e < 32; e++) {
-		uint32_t requestedAlignment = 1 << e;
 		if (1 << e == largestAlignment) return largestAlignment;
 		if (1 << e == largestAlignment) return largestAlignment;
 	}
 	}
 	return -1;
 	return -1;

+ 1 - 1
Source/DFPSR/api/imageAPI.cpp

@@ -45,7 +45,7 @@ AlignedImageU16 dsr::image_create_U16(int32_t width, int32_t height) {
 	return AlignedImageU16(std::make_shared<ImageU16Impl>(width, height, DSR_DEFAULT_ALIGNMENT));
 	return AlignedImageU16(std::make_shared<ImageU16Impl>(width, height, DSR_DEFAULT_ALIGNMENT));
 }
 }
 AlignedImageF32 dsr::image_create_F32(int32_t width, int32_t height) {
 AlignedImageF32 dsr::image_create_F32(int32_t width, int32_t height) {
-	return AlignedImageF32(std::make_shared<ImageF32Impl>(width, height, DSR_DEFAULT_ALIGNMENT));
+	return AlignedImageF32(std::make_shared<ImageF32Impl>(width, height, DSR_FLOAT_ALIGNMENT));
 }
 }
 OrderedImageRgbaU8 dsr::image_create_RgbaU8(int32_t width, int32_t height) {
 OrderedImageRgbaU8 dsr::image_create_RgbaU8(int32_t width, int32_t height) {
 	return OrderedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height, DSR_DEFAULT_ALIGNMENT));
 	return OrderedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height, DSR_DEFAULT_ALIGNMENT));

+ 3 - 2
Source/DFPSR/base/simd.h

@@ -56,8 +56,9 @@
 //   Pros and cons:
 //   Pros and cons:
 //     - Have to manually set the alignment of buffers to DSR_FLOAT_ALIGNMENT to prevent crashing.
 //     - Have to manually set the alignment of buffers to DSR_FLOAT_ALIGNMENT to prevent crashing.
 //       If the default alignment for buffers changed based on the size of F vectors, the more commonly used X vector would get slowed down from cache misses from padding larger than X vectors.
 //       If the default alignment for buffers changed based on the size of F vectors, the more commonly used X vector would get slowed down from cache misses from padding larger than X vectors.
-//     - It can be difficult to detect incorrect memory alignment, because a pointer can be aligned to more than requested by accident.
-//       If accidentally aligning to 128 bits instead of 256 bits, there is a 50% risk of failing to detect it at runtime.
+//       AlignedImageF32 and sound backends are already aligned with the F vector size, because they are not generic like Buffer.
+//     - It can be difficult to detect incorrect memory alignment, because a pointer can accidentally be aligned to more than what was requested.
+//       If accidentally aligning to 128 bits instead of 256 bits, there is a 50% risk of failing to detect it at runtime and later fail on another computer.
 //       If sticking with 128-bit or X vectors, all buffers will be correctly aligned automatically.
 //       If sticking with 128-bit or X vectors, all buffers will be correctly aligned automatically.
 //     + For heavy calculations where memory access is not the bottleneck, using larger SIMD vectors when enabled allow saving energy and increasing performance.
 //     + For heavy calculations where memory access is not the bottleneck, using larger SIMD vectors when enabled allow saving energy and increasing performance.
 //     - If you forget to test with longer vector lengths (compiling with -mavx2 or -mEMULATE_256BIT_SIMD) then you might find bugs from not iterating or aligning memory correctly.
 //     - If you forget to test with longer vector lengths (compiling with -mavx2 or -mEMULATE_256BIT_SIMD) then you might find bugs from not iterating or aligning memory correctly.

+ 2 - 2
Source/DFPSR/image/Image.cpp

@@ -30,7 +30,7 @@ ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixe
 	this->validate();
 	this->validate();
 }
 }
 
 
-ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize) :
-  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer_create(stride * height)), startOffset(0), isSubImage(false) {
+ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, int alignment) :
+  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer_create(stride * height, alignment)), startOffset(0), isSubImage(false) {
 	this->validate();
 	this->validate();
 }
 }

+ 1 - 1
Source/DFPSR/image/Image.h

@@ -54,7 +54,7 @@ public:
 	// Sub-images
 	// Sub-images
 	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, Buffer buffer, intptr_t startOffset);
 	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, Buffer buffer, intptr_t startOffset);
 	// New images
 	// New images
-	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize);
+	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, int alignment);
 };
 };
 
 
 #define IMAGE_DECLARATION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \
 #define IMAGE_DECLARATION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \

+ 1 - 1
Source/DFPSR/image/ImageF32.cpp

@@ -33,7 +33,7 @@ ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t newStrid
 }
 }
 
 
 ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
 ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(float), alignment), sizeof(float)) {
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(float), alignment), sizeof(float), alignment) {
 }
 }
 
 
 IMAGE_DEFINITION(ImageF32Impl, 1, float, float);
 IMAGE_DEFINITION(ImageF32Impl, 1, float, float);

+ 2 - 4
Source/DFPSR/image/ImageRgbaU8.cpp

@@ -41,13 +41,13 @@ ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t ne
 }
 }
 
 
 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), alignment), sizeof(Color4xU8)) {
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), alignment), sizeof(Color4xU8), alignment) {
 	this->initializeRgbaImage();
 	this->initializeRgbaImage();
 }
 }
 
 
 // Native canvas constructor
 // Native canvas constructor
 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex, int32_t alignment) :
 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex, int32_t alignment) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), 16), sizeof(Color4xU8)) {
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), 16), sizeof(Color4xU8), alignment) {
 	this->packOrder = PackOrder::getPackOrder(packOrderIndex);
 	this->packOrder = PackOrder::getPackOrder(packOrderIndex);
 	this->initializeRgbaImage();
 	this->initializeRgbaImage();
 }
 }
@@ -308,8 +308,6 @@ void ImageRgbaU8Impl::generatePyramid() {
 		Buffer oldBuffer = this->buffer;
 		Buffer oldBuffer = this->buffer;
 		SafePointer<uint32_t> oldData = buffer_getSafeData<uint32_t>(oldBuffer, "Pyramid generation source") + this->startOffset;
 		SafePointer<uint32_t> oldData = buffer_getSafeData<uint32_t>(oldBuffer, "Pyramid generation source") + this->startOffset;
 		this->buffer = buffer_create(getPyramidSize(this->width, this->height, layerCount));
 		this->buffer = buffer_create(getPyramidSize(this->width, this->height, layerCount));
-		int32_t currentWidth = this->width;
-		int32_t currentHeight = this->height;
 		this->generatePyramidStructure(layerCount);
 		this->generatePyramidStructure(layerCount);
 		// Copy the image's old content while assuming that there is no padding.
 		// Copy the image's old content while assuming that there is no padding.
 		safeMemoryCopy(this->texture.data + this->texture.mips[0].startOffset, oldData, this->width * this->height * pixelSize);
 		safeMemoryCopy(this->texture.data + this->texture.mips[0].startOffset, oldData, this->width * this->height * pixelSize);

+ 1 - 1
Source/DFPSR/image/ImageU16.cpp

@@ -33,7 +33,7 @@ ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t newStrid
 }
 }
 
 
 ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
 ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint16_t), alignment), sizeof(uint16_t)) {
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint16_t), alignment), sizeof(uint16_t), alignment) {
 }
 }
 
 
 IMAGE_DEFINITION(ImageU16Impl, 1, uint16_t, uint16_t);
 IMAGE_DEFINITION(ImageU16Impl, 1, uint16_t, uint16_t);

+ 1 - 1
Source/DFPSR/image/ImageU8.cpp

@@ -33,7 +33,7 @@ ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride,
 }
 }
 
 
 ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
 ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint8_t), alignment), sizeof(uint8_t)) {
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint8_t), alignment), sizeof(uint8_t), alignment) {
 }
 }
 
 
 IMAGE_DEFINITION(ImageU8Impl, 1, uint8_t, uint8_t);
 IMAGE_DEFINITION(ImageU8Impl, 1, uint8_t, uint8_t);

+ 3 - 0
Source/test/tests/SimdTest.cpp

@@ -10,6 +10,9 @@ START_TEST(Simd)
 	#ifdef USE_SSSE3
 	#ifdef USE_SSSE3
 		printText("	* SSSE3\n");
 		printText("	* SSSE3\n");
 	#endif
 	#endif
+	#ifdef USE_AVX
+		printText("	* AVX\n");
+	#endif
 	#ifdef USE_AVX2
 	#ifdef USE_AVX2
 		printText("	* AVX2\n");
 		printText("	* AVX2\n");
 	#endif
 	#endif