Browse Source

Finalize the WoA build

Panagiotis Christopoulos Charitos 7 months ago
parent
commit
64a5deba77
3 changed files with 34 additions and 20 deletions
  1. 24 10
      .github/workflows/Windows.yml
  2. 7 7
      AnKi/Math/Simd.h
  3. 3 3
      AnKi/Math/Vec.h

+ 24 - 10
.github/workflows/Windows.yml

@@ -7,8 +7,8 @@ on:
     branches: [ master ]
 
 jobs:
-  Debug_VK:
-    name: "Debug_VK"
+  Debug_x64_VK:
+    name: "Debug_x64_VK"
     runs-on: windows-latest
 
     steps:
@@ -21,8 +21,8 @@ jobs:
     - name: Build
       run: cmake --build ${{github.workspace}}/build --config Debug
 
-  Debug_D3D:
-    name: "Debug_D3D"
+  Debug_x64_D3D:
+    name: "Debug_x64_D3D"
     runs-on: windows-latest
 
     steps:
@@ -35,8 +35,22 @@ jobs:
     - name: Build
       run: cmake --build ${{github.workspace}}/build --config Debug
 
-  Release_VK:
-    name: "Release_VK"
+  Debug_arm64_VK:
+    name: "Debug_arm64_VK"
+    runs-on: windows-11-arm
+
+    steps:
+    - name: Clone
+      uses: actions/checkout@v3
+
+    - name: Configure CMake
+      run: cmake -B ${{github.workspace}}/build -DANKI_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Debug -DANKI_EXTRA_CHECKS=ON
+
+    - name: Build
+      run: cmake --build ${{github.workspace}}/build --config Debug
+
+  Release_x64_VK:
+    name: "Release_x64_VK"
     runs-on: windows-latest
 
     steps:
@@ -49,8 +63,8 @@ jobs:
     - name: Build
       run: cmake --build ${{github.workspace}}/build --config Release
 
-  Release_D3D:
-    name: "Release_D3D"
+  Release_x64_D3D:
+    name: "Release_x64_D3D"
     runs-on: windows-latest
 
     steps:
@@ -63,8 +77,8 @@ jobs:
     - name: Build
       run: cmake --build ${{github.workspace}}/build --config Release
 
-  DLSS:
-    name: "DLSS"
+  DLSS_x64:
+    name: "DLSS_x64"
     runs-on: windows-latest
 
     steps:

+ 7 - 7
AnKi/Math/Simd.h

@@ -109,44 +109,44 @@ inline float32x4_t neonSuffleFloat32x4(float32x4_t inV1, float32x4_t inV2)
 
 // Specializations
 template<>
-inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 2>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 2>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	return vcombine_f32(vget_low_f32(inV1), vdup_lane_f32(vget_high_f32(inV1), 0));
 }
 
 template<>
-inline float32x4_t neonSuffleFloat32x4<0, 1, 3, 3>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<0, 1, 3, 3>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	return vcombine_f32(vget_low_f32(inV1), vdup_lane_f32(vget_high_f32(inV1), 1));
 }
 
 template<>
-inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 3>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<0, 1, 2, 3>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	return inV1;
 }
 
 template<>
-inline float32x4_t neonSuffleFloat32x4<1, 0, 3, 2>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<1, 0, 3, 2>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	return vcombine_f32(vrev64_f32(vget_low_f32(inV1)), vrev64_f32(vget_high_f32(inV1)));
 }
 
 template<>
-inline float32x4_t neonSuffleFloat32x4<2, 2, 1, 0>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<2, 2, 1, 0>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	return vcombine_f32(vdup_lane_f32(vget_high_f32(inV1), 0), vrev64_f32(vget_low_f32(inV1)));
 }
 
 template<>
-inline float32x4_t neonSuffleFloat32x4<2, 3, 0, 1>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<2, 3, 0, 1>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	return vcombine_f32(vget_high_f32(inV1), vget_low_f32(inV1));
 }
 
 // Used extensively by cross product
 template<>
-inline float32x4_t neonSuffleFloat32x4<1, 2, 0, 0>(float32x4_t inV1, float32x4_t inV2)
+inline float32x4_t neonSuffleFloat32x4<1, 2, 0, 0>(float32x4_t inV1, [[maybe_unused]] float32x4_t inV2)
 {
 	static uint8x16_t table = ANKI_NEON_UINT8x16(0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03);
 	return vreinterpretq_f32_u8(vqtbl1q_u8(vreinterpretq_u8_f32(inV1), table));

+ 3 - 3
AnKi/Math/Vec.h

@@ -1992,7 +1992,7 @@ public:
 #	if ANKI_SIMD_SSE
 		m_simd = _mm_add_ps(m_simd, b.m_simd);
 #	else
-		m_simd += b.m_simd;
+		m_simd = vaddq_f32(m_simd, b.m_simd);
 #	endif
 		return *this;
 	}
@@ -2076,7 +2076,7 @@ public:
 #	if ANKI_SIMD_SSE
 		m_simd = _mm_mul_ps(m_simd, b.m_simd);
 #	else
-		m_simd *= b.m_simd;
+		m_simd = vmulq_f32(m_simd, b.m_simd);
 #	endif
 		return *this;
 	}
@@ -2120,7 +2120,7 @@ public:
 #	if ANKI_SIMD_SSE
 		m_simd = _mm_div_ps(m_simd, b.m_simd);
 #	else
-		m_simd /= b.m_simd;
+		m_simd = vdivq_f32(m_simd, b.m_simd);
 #	endif
 		return *this;
 	}