Browse Source

Enabling all available SIMD extensions when testing.

David Piuva 10 months ago
parent
commit
f8ea6b14ce
4 changed files with 78 additions and 91 deletions
  1. 2 6
      .github/workflows/ci.yml
  2. 2 6
      .github/workflows/ci.yml.tabs
  3. 70 76
      Source/DFPSR/base/simd.h
  4. 4 3
      Source/test.sh

+ 2 - 6
.github/workflows/ci.yml

@@ -8,15 +8,11 @@ jobs:
     strategy:
     strategy:
       matrix:
       matrix:
         os: [ubuntu-latest, macos-latest]
         os: [ubuntu-latest, macos-latest]
-        architecture: [x86_32, x86_64]
+        architecture: [x86_32, x86_64, arm, arm64]
     steps:
     steps:
       - name: Checkout
       - name: Checkout
         uses: actions/checkout@v4
         uses: actions/checkout@v4
       - name: Run tests
       - name: Run tests
         run: |
         run: |
           cd ./Source
           cd ./Source
-          if [[ "${{ matrix.architecture }}" == "x86_32" ]]; then
-            ./test.sh
-          elif [[ "${{ matrix.architecture }}" == "x86_64" ]]; then
-            ./test.sh
-          fi
+          ./test.sh

+ 2 - 6
.github/workflows/ci.yml.tabs

@@ -7,15 +7,11 @@ jobs:
 		strategy:
 		strategy:
 			matrix:
 			matrix:
 				os: [ubuntu-latest, macos-latest]
 				os: [ubuntu-latest, macos-latest]
-				architecture: [x86_32, x86_64]
+				architecture: [x86_32, x86_64, arm, arm64]
 		steps:
 		steps:
 			- name: Checkout
 			- name: Checkout
 				uses: actions/checkout@v4
 				uses: actions/checkout@v4
 			- name: Run tests
 			- name: Run tests
 				run: |
 				run: |
 					cd ./Source
 					cd ./Source
-					if [[ "${{ matrix.architecture }}" == "x86_32" ]]; then
-						./test.sh
-					elif [[ "${{ matrix.architecture }}" == "x86_64" ]]; then
-						./test.sh
-					fi
+					./test.sh

+ 70 - 76
Source/DFPSR/base/simd.h

@@ -3341,87 +3341,81 @@
 	template <uint32_t bitOffset>
 	template <uint32_t bitOffset>
 	inline U8x32 bitShiftLeftImmediate(const U8x32& left) {
 	inline U8x32 bitShiftLeftImmediate(const U8x32& left) {
 		static_assert(bitOffset < 8u, "Immediate left shift of 32-bit values may not shift more than 7 bits!");
 		static_assert(bitOffset < 8u, "Immediate left shift of 32-bit values may not shift more than 7 bits!");
-		#if defined USE_AVX2
-			return U8x32(_mm256_slli_epi8(left.v, bitOffset));
-		#else
-			return U8x32(
-			  left.scalars[ 0] << bitOffset,
-			  left.scalars[ 1] << bitOffset,
-			  left.scalars[ 2] << bitOffset,
-			  left.scalars[ 3] << bitOffset,
-			  left.scalars[ 4] << bitOffset,
-			  left.scalars[ 5] << bitOffset,
-			  left.scalars[ 6] << bitOffset,
-			  left.scalars[ 7] << bitOffset,
-			  left.scalars[ 8] << bitOffset,
-			  left.scalars[ 9] << bitOffset,
-			  left.scalars[10] << bitOffset,
-			  left.scalars[11] << bitOffset,
-			  left.scalars[12] << bitOffset,
-			  left.scalars[13] << bitOffset,
-			  left.scalars[14] << bitOffset,
-			  left.scalars[15] << bitOffset,
-			  left.scalars[16] << bitOffset,
-			  left.scalars[17] << bitOffset,
-			  left.scalars[18] << bitOffset,
-			  left.scalars[19] << bitOffset,
-			  left.scalars[20] << bitOffset,
-			  left.scalars[21] << bitOffset,
-			  left.scalars[22] << bitOffset,
-			  left.scalars[23] << bitOffset,
-			  left.scalars[24] << bitOffset,
-			  left.scalars[25] << bitOffset,
-			  left.scalars[26] << bitOffset,
-			  left.scalars[27] << bitOffset,
-			  left.scalars[28] << bitOffset,
-			  left.scalars[29] << bitOffset,
-			  left.scalars[30] << bitOffset,
-			  left.scalars[31] << bitOffset
-			);
-		#endif
+		// TODO: Use a larger lane and a mask generated in compile time.
+		return U8x32(
+		  left.scalars[ 0] << bitOffset,
+		  left.scalars[ 1] << bitOffset,
+		  left.scalars[ 2] << bitOffset,
+		  left.scalars[ 3] << bitOffset,
+		  left.scalars[ 4] << bitOffset,
+		  left.scalars[ 5] << bitOffset,
+		  left.scalars[ 6] << bitOffset,
+		  left.scalars[ 7] << bitOffset,
+		  left.scalars[ 8] << bitOffset,
+		  left.scalars[ 9] << bitOffset,
+		  left.scalars[10] << bitOffset,
+		  left.scalars[11] << bitOffset,
+		  left.scalars[12] << bitOffset,
+		  left.scalars[13] << bitOffset,
+		  left.scalars[14] << bitOffset,
+		  left.scalars[15] << bitOffset,
+		  left.scalars[16] << bitOffset,
+		  left.scalars[17] << bitOffset,
+		  left.scalars[18] << bitOffset,
+		  left.scalars[19] << bitOffset,
+		  left.scalars[20] << bitOffset,
+		  left.scalars[21] << bitOffset,
+		  left.scalars[22] << bitOffset,
+		  left.scalars[23] << bitOffset,
+		  left.scalars[24] << bitOffset,
+		  left.scalars[25] << bitOffset,
+		  left.scalars[26] << bitOffset,
+		  left.scalars[27] << bitOffset,
+		  left.scalars[28] << bitOffset,
+		  left.scalars[29] << bitOffset,
+		  left.scalars[30] << bitOffset,
+		  left.scalars[31] << bitOffset
+		);
 	}
 	}
 	// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
 	// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
 	template <uint32_t bitOffset>
 	template <uint32_t bitOffset>
 	inline U8x32 bitShiftRightImmediate(const U8x32& left) {
 	inline U8x32 bitShiftRightImmediate(const U8x32& left) {
 		static_assert(bitOffset < 8u, "Immediate right shift of 32-bit values may not shift more than 7 bits!");
 		static_assert(bitOffset < 8u, "Immediate right shift of 32-bit values may not shift more than 7 bits!");
-		#if defined USE_AVX2
-			return U8x32(_mm256_srli_epi8(left.v, bitOffset));
-		#else
-			return U8x32(
-			  left.scalars[ 0] >> bitOffset,
-			  left.scalars[ 1] >> bitOffset,
-			  left.scalars[ 2] >> bitOffset,
-			  left.scalars[ 3] >> bitOffset,
-			  left.scalars[ 4] >> bitOffset,
-			  left.scalars[ 5] >> bitOffset,
-			  left.scalars[ 6] >> bitOffset,
-			  left.scalars[ 7] >> bitOffset,
-			  left.scalars[ 8] >> bitOffset,
-			  left.scalars[ 9] >> bitOffset,
-			  left.scalars[10] >> bitOffset,
-			  left.scalars[11] >> bitOffset,
-			  left.scalars[12] >> bitOffset,
-			  left.scalars[13] >> bitOffset,
-			  left.scalars[14] >> bitOffset,
-			  left.scalars[15] >> bitOffset,
-			  left.scalars[16] >> bitOffset,
-			  left.scalars[17] >> bitOffset,
-			  left.scalars[18] >> bitOffset,
-			  left.scalars[19] >> bitOffset,
-			  left.scalars[20] >> bitOffset,
-			  left.scalars[21] >> bitOffset,
-			  left.scalars[22] >> bitOffset,
-			  left.scalars[23] >> bitOffset,
-			  left.scalars[24] >> bitOffset,
-			  left.scalars[25] >> bitOffset,
-			  left.scalars[26] >> bitOffset,
-			  left.scalars[27] >> bitOffset,
-			  left.scalars[28] >> bitOffset,
-			  left.scalars[29] >> bitOffset,
-			  left.scalars[30] >> bitOffset,
-			  left.scalars[31] >> bitOffset
-			);
-		#endif
+		// TODO: Use a larger lane and a mask generated in compile time.
+		return U8x32(
+		  left.scalars[ 0] >> bitOffset,
+		  left.scalars[ 1] >> bitOffset,
+		  left.scalars[ 2] >> bitOffset,
+		  left.scalars[ 3] >> bitOffset,
+		  left.scalars[ 4] >> bitOffset,
+		  left.scalars[ 5] >> bitOffset,
+		  left.scalars[ 6] >> bitOffset,
+		  left.scalars[ 7] >> bitOffset,
+		  left.scalars[ 8] >> bitOffset,
+		  left.scalars[ 9] >> bitOffset,
+		  left.scalars[10] >> bitOffset,
+		  left.scalars[11] >> bitOffset,
+		  left.scalars[12] >> bitOffset,
+		  left.scalars[13] >> bitOffset,
+		  left.scalars[14] >> bitOffset,
+		  left.scalars[15] >> bitOffset,
+		  left.scalars[16] >> bitOffset,
+		  left.scalars[17] >> bitOffset,
+		  left.scalars[18] >> bitOffset,
+		  left.scalars[19] >> bitOffset,
+		  left.scalars[20] >> bitOffset,
+		  left.scalars[21] >> bitOffset,
+		  left.scalars[22] >> bitOffset,
+		  left.scalars[23] >> bitOffset,
+		  left.scalars[24] >> bitOffset,
+		  left.scalars[25] >> bitOffset,
+		  left.scalars[26] >> bitOffset,
+		  left.scalars[27] >> bitOffset,
+		  left.scalars[28] >> bitOffset,
+		  left.scalars[29] >> bitOffset,
+		  left.scalars[30] >> bitOffset,
+		  left.scalars[31] >> bitOffset
+		);
 	}
 	}
 
 
 	inline U16x16 operator+(const U16x16& left, const U16x16& right) {
 	inline U16x16 operator+(const U16x16& left, const U16x16& right) {

+ 4 - 3
Source/test.sh

@@ -5,17 +5,18 @@ TEMP_ROOT=${ROOT_PATH}/../../temporary
 CPP_VERSION=-std=c++14
 CPP_VERSION=-std=c++14
 MODE="-DDEBUG"
 MODE="-DDEBUG"
 DEBUGGER="-g"
 DEBUGGER="-g"
+SIMD="-march=native"
 O_LEVEL=-O2
 O_LEVEL=-O2
 
 
 chmod +x ${ROOT_PATH}/tools/build.sh;
 chmod +x ${ROOT_PATH}/tools/build.sh;
-${ROOT_PATH}/tools/buildScripts/build.sh "NONE" "NONE" "${ROOT_PATH}" "${TEMP_ROOT}" "NONE" "${MODE} ${DEBUGGER} ${CPP_VERSION} ${O_LEVEL}";
+${ROOT_PATH}/tools/buildScripts/build.sh "NONE" "NONE" "${ROOT_PATH}" "${TEMP_ROOT}" "NONE" "${MODE} ${DEBUGGER} ${SIMD} ${CPP_VERSION} ${O_LEVEL}";
 if [ $? -ne 0 ]
 if [ $? -ne 0 ]
 then
 then
 	exit 1
 	exit 1
 fi
 fi
 
 
 # Get the specific temporary sub-folder for the compilation settings
 # Get the specific temporary sub-folder for the compilation settings
-TEMP_SUB="${MODE}_${DEBUGGER}_${CPP_VERSION}_${O_LEVEL}"
+TEMP_SUB="${MODE}_${DEBUGGER}_${SIMD}_${CPP_VERSION}_${O_LEVEL}"
 TEMP_SUB=$(echo $TEMP_SUB | tr "+" "p")
 TEMP_SUB=$(echo $TEMP_SUB | tr "+" "p")
 TEMP_SUB=$(echo $TEMP_SUB | tr -d " =-")
 TEMP_SUB=$(echo $TEMP_SUB | tr -d " =-")
 TEMP_DIR=${TEMP_ROOT}/${TEMP_SUB}
 TEMP_DIR=${TEMP_ROOT}/${TEMP_SUB}
@@ -31,7 +32,7 @@ for file in ./test/tests/*.cpp; do
 	rm -f ${TEMP_DIR}/application;
 	rm -f ${TEMP_DIR}/application;
 	# Compile test case that defines main
 	# Compile test case that defines main
 	echo "Compiling ${name}";
 	echo "Compiling ${name}";
-	g++ ${CPP_VERSION} ${MODE} ${DEBUGGER} -c ${file} -o ${TEMP_DIR}/${base}_test.o;
+	g++ ${CPP_VERSION} ${MODE} ${DEBUGGER} ${SIMD} -c ${file} -o ${TEMP_DIR}/${base}_test.o;
 	if [ $? -ne 0 ]
 	if [ $? -ne 0 ]
 	then
 	then
 		exit 1
 		exit 1