7 months ago · 64a5deba77
--- a/.github/workflows/Windows.yml
+++ b/.github/workflows/Windows.yml
@@ -7,8 +7,8 @@ on:
 
				     branches: [ master ]
			
 
				 
			
 
				 jobs:
			
 
				-  Debug_VK:
			
 
				-    name: "Debug_VK"
			
 
				+  Debug_x64_VK:
			
 
				+    name: "Debug_x64_VK"
			
 
				     runs-on: windows-latest
			
 
				 
			
 
				     steps:
			
@@ -21,8 +21,8 @@ jobs:
 
				     - name: Build
			
 
				       run: cmake --build ${{github.workspace}}/build --config Debug
			
 
				 
			
 
				-  Debug_D3D:
			
 
				-    name: "Debug_D3D"
			
 
				+  Debug_x64_D3D:
			
 
				+    name: "Debug_x64_D3D"
			
 
				     runs-on: windows-latest
			
 
				 
			
 
				     steps:
			
@@ -35,8 +35,22 @@ jobs:
 
				     - name: Build
			
 
				       run: cmake --build ${{github.workspace}}/build --config Debug
			
 
				 
			
 
				-  Release_VK:
			
 
				-    name: "Release_VK"
			
 
				+  Debug_arm64_VK:
			
 
				+    name: "Debug_arm64_VK"
			
 
				+    runs-on: windows-11-arm
			
 
				+
			
 
				+    steps:
			
 
				+    - name: Clone
			
 
				+      uses: actions/checkout@v3
			
 
				+
			
 
				+    - name: Configure CMake
			
 
				+      run: cmake -B ${{github.workspace}}/build -DANKI_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Debug -DANKI_EXTRA_CHECKS=ON
			
 
				+
			
 
				+    - name: Build
			
 
				+      run: cmake --build ${{github.workspace}}/build --config Debug
			
 
				+
			
 
				+  Release_x64_VK:
			
 
				+    name: "Release_x64_VK"
			
 
				     runs-on: windows-latest
			
 
				 
			
 
				     steps:
			
@@ -49,8 +63,8 @@ jobs:
 
				     - name: Build
			
 
				       run: cmake --build ${{github.workspace}}/build --config Release
			
 
				 
			
 
				-  Release_D3D:
			
 
				-    name: "Release_D3D"
			
 
				+  Release_x64_D3D:
			
 
				+    name: "Release_x64_D3D"
			
 
				     runs-on: windows-latest
			
 
				 
			
 
				     steps:
			
@@ -63,8 +77,8 @@ jobs:
 
				     - name: Build
			
 
				       run: cmake --build ${{github.workspace}}/build --config Release
			
 
				 
			
 
				-  DLSS:
			
 
				-    name: "DLSS"
			
 
				+  DLSS_x64:
			
 
				+    name: "DLSS_x64"
			
 
				     runs-on: windows-latest
			
 
				 
			
 
				     steps:
			
--- a/AnKi/Math/Simd.h
+++ b/AnKi/Math/Simd.h
@@ -109,44 +109,44 @@ inline float32x4_t neonSuffleFloat32x4(float32x4_t inV1, float32x4_t inV2)
 
				 
			
 
				 // Specializations
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 2>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 2>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	return vcombine_f32(vget_low_f32(inV1), vdup_lane_f32(vget_high_f32(inV1), 0));
			
 
				 }
			
 
				 
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<0, 1, 3, 3>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<0, 1, 3, 3>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	return vcombine_f32(vget_low_f32(inV1), vdup_lane_f32(vget_high_f32(inV1), 1));
			
 
				 }
			
 
				 
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 3>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 3>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	return inV1;
			
 
				 }
			
 
				 
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<1, 0, 3, 2>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<1, 0, 3, 2>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	return vcombine_f32(vrev64_f32(vget_low_f32(inV1)), vrev64_f32(vget_high_f32(inV1)));
			
 
				 }
			
 
				 
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<2, 2, 1, 0>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<2, 2, 1, 0>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	return vcombine_f32(vdup_lane_f32(vget_high_f32(inV1), 0), vrev64_f32(vget_low_f32(inV1)));
			
 
				 }
			
 
				 
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<2, 3, 0, 1>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<2, 3, 0, 1>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	return vcombine_f32(vget_high_f32(inV1), vget_low_f32(inV1));
			
 
				 }
			
 
				 
			
 
				 // Used extensively by cross product
			
 
				 template<>
			
 
				-inline float32x4_t neonSuffleFloat32x4<1, 2, 0, 0>(float32x4_t inV1, float32x4_t inV2)
			
 
				+inline float32x4_t neonSuffleFloat32x4<1, 2, 0, 0>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
			
 
				 {
			
 
				 	static uint8x16_t table = ANKI_NEON_UINT8x16(0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03);
			
 
				 	return vreinterpretq_f32_u8(vqtbl1q_u8(vreinterpretq_u8_f32(inV1), table));
			
--- a/AnKi/Math/Vec.h
+++ b/AnKi/Math/Vec.h
@@ -1992,7 +1992,7 @@ public:
 
				 #	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_add_ps(m_simd, b.m_simd);
			
 
				 #	else
			
 
				-		m_simd += b.m_simd;
			
 
				+		m_simd = vaddq_f32(m_simd, b.m_simd);
			
 
				 #	endif
			
 
				 		return *this;
			
 
				 	}
			
@@ -2076,7 +2076,7 @@ public:
 
				 #	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_mul_ps(m_simd, b.m_simd);
			
 
				 #	else
			
 
				-		m_simd *= b.m_simd;
			
 
				+		m_simd = vmulq_f32(m_simd, b.m_simd);
			
 
				 #	endif
			
 
				 		return *this;
			
 
				 	}
			
@@ -2120,7 +2120,7 @@ public:
 
				 #	if ANKI_SIMD_SSE
			
 
				 		m_simd = _mm_div_ps(m_simd, b.m_simd);
			
 
				 #	else
			
 
				-		m_simd /= b.m_simd;
			
 
				+		m_simd = vdivq_f32(m_simd, b.m_simd);
			
 
				 #	endif
			
 
				 		return *this;
			
 
				 	}