2 年之前 · ec5b143c57
--- a/Source/DFPSR/History.txt
+++ b/Source/DFPSR/History.txt
@@ -0,0 +1,11 @@
 
				+It is not fun when things break backward compatibility, but it would be even less fun if the whole library became a bloated mess that nobody uses after a few decades.

			
 
				+While every new feature may create naming conflicts from using the dsr namespace implicitly and every bugfix can make workarounds relying on incorrect behavior stop working, some things require extra attention to make porting to a new version easier.

			
 
				+

			
 
				+There are plans to create an automatic refactoring tool built into the Builder build system that could potentially do this automatically for you, but one must be very careful with overwriting people's code in case that someone does not use version control.

			
 
				+

			
 
				+Changes since version 0.1.0

			
 
				+	* simdExtra.h was removed, because such a low depth of abstraction would risk making the code slower from not fitting well with future SIMD extensions.

			
 
				+		On missing header when including simdExtra.h:

			
 
				+			Replace 'simdExtra.h' with 'simd.h' to make it compile.

			
 
				+			Remove any duplicate includes of simd.h to clean up your code.

			
 
				+			Remove all code within '#ifdef USE_SIMD_EXTRA' or '#if defined USE_SIMD_EXTRA' to clean up your code.

			
--- a/Source/DFPSR/base/simdExtra.h
+++ b/Source/DFPSR/base/simdExtra.h
@@ -1,66 +0,0 @@
 
				-// zlib open source license

			
 
				-//

			
 
				-// Copyright (c) 2019 David Forsgren Piuva

			
 
				-// 

			
 
				-// This software is provided 'as-is', without any express or implied

			
 
				-// warranty. In no event will the authors be held liable for any damages

			
 
				-// arising from the use of this software.

			
 
				-// 

			
 
				-// Permission is granted to anyone to use this software for any purpose,

			
 
				-// including commercial applications, and to alter it and redistribute it

			
 
				-// freely, subject to the following restrictions:

			
 
				-// 

			
 
				-//    1. The origin of this software must not be misrepresented; you must not

			
 
				-//    claim that you wrote the original software. If you use this software

			
 
				-//    in a product, an acknowledgment in the product documentation would be

			
 
				-//    appreciated but is not required.

			
 
				-// 

			
 
				-//    2. Altered source versions must be plainly marked as such, and must not be

			
 
				-//    misrepresented as being the original software.

			
 
				-// 

			
 
				-//    3. This notice may not be removed or altered from any source

			
 
				-//    distribution.

			
 
				-

			
 
				-// An advanced high performance extension to the simpler simd.h

			
 
				-//    The caller is expected to write the reference implementation separatelly for unhandled target machines.

			
 
				-//        Because the code is not as clean as when using infix math operations from simd.h,

			
 
				-//        so you will need to write a separate scalar version anyway for documentating the behaviour.

			
 
				-//    This module can only be used when the USE_SIMD_EXTRA macro is defined.

			
 
				-//        This allow USE_SIMD_EXTRA to be more picky about which SIMD instruction sets to use

			
 
				-//        in order to get access to a larger intersection between the platforms.

			
 
				-//        It also keeps simd.h easy to port and emulate.

			
 
				-//    Works directly with simd vectors using aliases, instead of the wrappers.

			
 
				-//        This makes it easier to mix directly with SIMD intrinsics for a specific target.

			
 
				-

			
 
				-#ifndef DFPSR_SIMD_EXTRA

			
 
				-#define DFPSR_SIMD_EXTRA

			
 
				-	#include "simd.h"

			
 
				-

			
 
				-	#if defined USE_SSE2

			
 
				-		#define USE_SIMD_EXTRA

			
 
				-		//struct SIMD_F32x4x2 {

			
 
				-		//	SIMD_F32x4 val[2];

			
 
				-		//};

			
 
				-		//struct SIMD_U16x8x2 {

			
 
				-		//	SIMD_U16x8 val[2];

			
 
				-		//};

			
 
				-		struct SIMD_U32x4x2 {

			
 
				-			SIMD_U32x4 val[2];

			
 
				-		};

			
 
				-		//struct SIMD_I32x4x2 {

			
 
				-		//	SIMD_I32x4 val[2];

			
 
				-		//};

			
 
				-		static inline SIMD_U32x4x2 ZIP_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {

			
 
				-			ALIGN16 SIMD_U32x4x2 result;

			
 
				-			result.val[0] = _mm_unpacklo_epi32(lower, higher);

			
 
				-			result.val[1] = _mm_unpackhi_epi32(lower, higher);

			
 
				-			return result;

			
 
				-		}

			
 
				-		static inline SIMD_U32x4 ZIP_LOW_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {

			
 
				-			return _mm_unpacklo_epi32(lower, higher);

			
 
				-		}

			
 
				-		static inline SIMD_U32x4 ZIP_HIGH_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {

			
 
				-			return _mm_unpackhi_epi32(lower, higher);

			
 
				-		}

			
 
				-	#endif

			
 
				-#endif

			
--- a/Source/DFPSR/image/draw.cpp
+++ b/Source/DFPSR/image/draw.cpp
@@ -1,4 +1,5 @@
 
				 // zlib open source license

			
 
				+// zlib open source license

			
 
				 //

			
 
				 // Copyright (c) 2018 to 2019 David Forsgren Piuva

			
 
				 // 

			
@@ -21,7 +22,7 @@
 
				 //    3. This notice may not be removed or altered from any source

			
 
				 //    distribution.

			
 
				 

			
 
				-#include "../base/simdExtra.h"

			
 
				+#include "../base/simd.h"

			
 
				 #include "draw.h"

			
 
				 #include "internal/imageInternal.h"

			
 
				 #include "../math/scalar.h"

			
@@ -1187,51 +1188,32 @@ static void blockMagnify_reference(
 
				 static void blockMagnify_2x2(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {

			
 
				 	#ifdef USE_SIMD_EXTRA

			
 
				 		const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);

			
 
				-		SafePointer<uint32_t> upperTargetRow = imageInternal::getSafeData<uint32_t>(target, 0);

			
 
				-		SafePointer<uint32_t> lowerTargetRow = imageInternal::getSafeData<uint32_t>(target, 1);

			
 
				-		int doubleTargetStride = target.stride * 2;

			
 
				+		SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);

			
 
				+		SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);

			
 
				+		int blockTargetStride = target.stride * 2;

			
 
				 		for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {

			
 
				 			// Carriage return

			
 
				 			const SafePointer<uint32_t> sourcePixel = sourceRow;

			
 
				-			SafePointer<uint32_t> upperTargetPixel = upperTargetRow;

			
 
				-			SafePointer<uint32_t> lowerTargetPixel = lowerTargetRow;

			
 
				+			SafePointer<uint32_t> targetPixelA = targetRowA;

			
 
				+			SafePointer<uint32_t> targetPixelB = targetRowB;

			
 
				 			// Write to whole multiples of 8 pixels

			
 
				 			int writeLeftX = 0;

			
 
				-			while (writeLeftX + 8 <= clipWidth) {

			
 
				-				// Read pixels

			
 
				-				ALIGN16 SIMD_U32x4 sourcePixels = U32x4::readAligned(sourcePixel, "blockMagnify_2x2 @ whole sourcePixels").v;

			
 
				-				sourcePixel += 4;

			
 
				-				// Double the pixels by zipping with itself

			
 
				-				ALIGN16 SIMD_U32x4x2 doubledPixels = ZIP_U32_SIMD(sourcePixels, sourcePixels);

			
 
				-				// Write lower part

			
 
				-				U32x4(doubledPixels.val[0]).writeAligned(upperTargetPixel, "blockMagnify_2x2 @ write upper left #1");

			
 
				-				upperTargetPixel += 4;

			
 
				-				U32x4(doubledPixels.val[0]).writeAligned(lowerTargetPixel, "blockMagnify_2x2 @ write lower left #1");

			
 
				-				lowerTargetPixel += 4;

			
 
				-				// Write upper part

			
 
				-				U32x4(doubledPixels.val[1]).writeAligned(upperTargetPixel, "blockMagnify_2x2 @ write upper right #1");

			
 
				-				upperTargetPixel += 4;

			
 
				-				U32x4(doubledPixels.val[1]).writeAligned(lowerTargetPixel, "blockMagnify_2x2 @ write lower right #1");

			
 
				-				lowerTargetPixel += 4;

			
 
				-				// Count

			
 
				-				writeLeftX += 8;

			
 
				-			}

			
 
				-			// Fill the last pixels using scalar operations to avoid going out of bound

			
 
				 			while (writeLeftX + 2 <= clipWidth) {

			
 
				-				// Read one pixel

			
 
				-				uint32_t sourceColor = *sourcePixel;

			
 
				-				// Write 2x2 pixels

			
 
				-				*upperTargetPixel = sourceColor; upperTargetPixel += 1;

			
 
				-				*upperTargetPixel = sourceColor; upperTargetPixel += 1;

			
 
				-				*lowerTargetPixel = sourceColor; lowerTargetPixel += 1;

			
 
				-				*lowerTargetPixel = sourceColor; lowerTargetPixel += 1;

			
 
				+				// Read one pixel at a time

			
 
				+				uint32_t scalarValue = *sourcePixel;

			
 
				+				sourcePixel += 1;

			
 
				+				// Write to a whole block of pixels

			
 
				+				targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue;

			
 
				+				targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue;

			
 
				+				targetPixelA += 2;

			
 
				+				targetPixelB += 2;

			
 
				 				// Count

			
 
				 				writeLeftX += 2;

			
 
				 			}

			
 
				 			// Line feed

			
 
				 			sourceRow.increaseBytes(source.stride);

			
 
				-			upperTargetRow.increaseBytes(doubleTargetStride);

			
 
				-			lowerTargetRow.increaseBytes(doubleTargetStride);

			
 
				+			targetRowA.increaseBytes(blockTargetStride);

			
 
				+			targetRowB.increaseBytes(blockTargetStride);

			
 
				 		}

			
 
				 	#else

			
 
				 		blockMagnify_reference<false>(target, source, 2, 2, clipWidth, clipHeight);