2 years ago · 4d27bde0d4
--- a/Source/DFPSR/api/bufferAPI.cpp
+++ b/Source/DFPSR/api/bufferAPI.cpp
@@ -61,7 +61,6 @@ static int getFinalAlignment(int requestedAlignment) {
 
															 	// Find any power of two alignment divisible by both requestedAlignment and DSR_DEFAULT_ALIGNMENT

														
 
															 	int largestAlignment = max(requestedAlignment, DSR_DEFAULT_ALIGNMENT);

														
 
															 	for (uint32_t e = 0; e < 32; e++) {

														
 
															-		uint32_t requestedAlignment = 1 << e;

														
 
															 		if (1 << e == largestAlignment) return largestAlignment;

														
 
															 	}

														
 
															 	return -1;

														
--- a/Source/DFPSR/api/imageAPI.cpp
+++ b/Source/DFPSR/api/imageAPI.cpp
@@ -45,7 +45,7 @@ AlignedImageU16 dsr::image_create_U16(int32_t width, int32_t height) {
 
															 	return AlignedImageU16(std::make_shared<ImageU16Impl>(width, height, DSR_DEFAULT_ALIGNMENT));

														
 
															 }

														
 
															 AlignedImageF32 dsr::image_create_F32(int32_t width, int32_t height) {

														
 
															-	return AlignedImageF32(std::make_shared<ImageF32Impl>(width, height, DSR_DEFAULT_ALIGNMENT));

														
 
															+	return AlignedImageF32(std::make_shared<ImageF32Impl>(width, height, DSR_FLOAT_ALIGNMENT));

														
 
															 }

														
 
															 OrderedImageRgbaU8 dsr::image_create_RgbaU8(int32_t width, int32_t height) {

														
 
															 	return OrderedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height, DSR_DEFAULT_ALIGNMENT));

														
--- a/Source/DFPSR/base/simd.h
+++ b/Source/DFPSR/base/simd.h
@@ -56,8 +56,9 @@
 
															 //   Pros and cons:

														
 
															 //     - Have to manually set the alignment of buffers to DSR_FLOAT_ALIGNMENT to prevent crashing.

														
 
															 //       If the default alignment for buffers changed based on the size of F vectors, the more commonly used X vector would get slowed down from cache misses from padding larger than X vectors.

														
 
															-//     - It can be difficult to detect incorrect memory alignment, because a pointer can be aligned to more than requested by accident.

														
 
															-//       If accidentally aligning to 128 bits instead of 256 bits, there is a 50% risk of failing to detect it at runtime.

														
 
															+//       AlignedImageF32 and sound backends are already aligned with the F vector size, because they are not generic like Buffer.

														
 
															+//     - It can be difficult to detect incorrect memory alignment, because a pointer can accidentally be aligned to more than what was requested.

														
 
															+//       If accidentally aligning to 128 bits instead of 256 bits, there is a 50% risk of failing to detect it at runtime and later fail on another computer.

														
 
															 //       If sticking with 128-bit or X vectors, all buffers will be correctly aligned automatically.

														
 
															 //     + For heavy calculations where memory access is not the bottleneck, using larger SIMD vectors when enabled allow saving energy and increasing performance.

														
 
															 //     - If you forget to test with longer vector lengths (compiling with -mavx2 or -mEMULATE_256BIT_SIMD) then you might find bugs from not iterating or aligning memory correctly.

														
--- a/Source/DFPSR/image/Image.cpp
+++ b/Source/DFPSR/image/Image.cpp
@@ -30,7 +30,7 @@ ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixe
 
															 	this->validate();

														
 
															 }

														
 
															-ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize) :

														
 
															-  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer_create(stride * height)), startOffset(0), isSubImage(false) {

														
 
															+ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, int alignment) :

														
 
															+  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer_create(stride * height, alignment)), startOffset(0), isSubImage(false) {

														
 
															 	this->validate();

														
 
															 }

														
--- a/Source/DFPSR/image/Image.h
+++ b/Source/DFPSR/image/Image.h
@@ -54,7 +54,7 @@ public:
 
															 	// Sub-images

														
 
															 	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, Buffer buffer, intptr_t startOffset);

														
 
															 	// New images

														
 
															-	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize);

														
 
															+	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, int alignment);

														
 
															 };

														
 
															 #define IMAGE_DECLARATION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \

														
--- a/Source/DFPSR/image/ImageF32.cpp
+++ b/Source/DFPSR/image/ImageF32.cpp
@@ -33,7 +33,7 @@ ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t newStrid
 
															 }

														
 
															 ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :

														
 
															-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(float), alignment), sizeof(float)) {

														
 
															+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(float), alignment), sizeof(float), alignment) {

														
 
															 }

														
 
															 IMAGE_DEFINITION(ImageF32Impl, 1, float, float);

														
--- a/Source/DFPSR/image/ImageRgbaU8.cpp
+++ b/Source/DFPSR/image/ImageRgbaU8.cpp
@@ -41,13 +41,13 @@ ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t ne
 
															 }

														
 
															 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :

														
 
															-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), alignment), sizeof(Color4xU8)) {

														
 
															+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), alignment), sizeof(Color4xU8), alignment) {

														
 
															 	this->initializeRgbaImage();

														
 
															 }

														
 
															 // Native canvas constructor

														
 
															 ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex, int32_t alignment) :

														
 
															-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), 16), sizeof(Color4xU8)) {

														
 
															+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), 16), sizeof(Color4xU8), alignment) {

														
 
															 	this->packOrder = PackOrder::getPackOrder(packOrderIndex);

														
 
															 	this->initializeRgbaImage();

														
 
															 }

														
@@ -308,8 +308,6 @@ void ImageRgbaU8Impl::generatePyramid() {
 
															 		Buffer oldBuffer = this->buffer;

														
 
															 		SafePointer<uint32_t> oldData = buffer_getSafeData<uint32_t>(oldBuffer, "Pyramid generation source") + this->startOffset;

														
 
															 		this->buffer = buffer_create(getPyramidSize(this->width, this->height, layerCount));

														
 
															-		int32_t currentWidth = this->width;

														
 
															-		int32_t currentHeight = this->height;

														
 
															 		this->generatePyramidStructure(layerCount);

														
 
															 		// Copy the image's old content while assuming that there is no padding.

														
 
															 		safeMemoryCopy(this->texture.data + this->texture.mips[0].startOffset, oldData, this->width * this->height * pixelSize);

														
--- a/Source/DFPSR/image/ImageU16.cpp
+++ b/Source/DFPSR/image/ImageU16.cpp
@@ -33,7 +33,7 @@ ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t newStrid
 
															 }

														
 
															 ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :

														
 
															-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint16_t), alignment), sizeof(uint16_t)) {

														
 
															+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint16_t), alignment), sizeof(uint16_t), alignment) {

														
 
															 }

														
 
															 IMAGE_DEFINITION(ImageU16Impl, 1, uint16_t, uint16_t);

														
--- a/Source/DFPSR/image/ImageU8.cpp
+++ b/Source/DFPSR/image/ImageU8.cpp
@@ -33,7 +33,7 @@ ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride,
 
															 }

														
 
															 ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :

														
 
															-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint8_t), alignment), sizeof(uint8_t)) {

														
 
															+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint8_t), alignment), sizeof(uint8_t), alignment) {

														
 
															 }

														
 
															 IMAGE_DEFINITION(ImageU8Impl, 1, uint8_t, uint8_t);

														
--- a/Source/test/tests/SimdTest.cpp
+++ b/Source/test/tests/SimdTest.cpp
@@ -10,6 +10,9 @@ START_TEST(Simd)
 
															 	#ifdef USE_SSSE3

														
 
															 		printText("	* SSSE3\n");

														
 
															 	#endif

														
 
															+	#ifdef USE_AVX

														
 
															+		printText("	* AVX\n");

														
 
															+	#endif

														
 
															 	#ifdef USE_AVX2

														
 
															 		printText("	* AVX2\n");

														
 
															 	#endif