|
@@ -731,316 +731,28 @@ float Mat44::GetDeterminant3x3() const
|
|
|
|
|
|
Mat44 Mat44::Adjointed3x3() const
|
|
Mat44 Mat44::Adjointed3x3() const
|
|
{
|
|
{
|
|
- // Adapted from Inversed() to remove 4th column and the division by the determinant
|
|
|
|
- // Note: This can be optimized.
|
|
|
|
-
|
|
|
|
- JPH_ASSERT(mCol[0][3] == 0.0f);
|
|
|
|
- JPH_ASSERT(mCol[1][3] == 0.0f);
|
|
|
|
- JPH_ASSERT(mCol[2][3] == 0.0f);
|
|
|
|
-
|
|
|
|
-#if defined(JPH_USE_SSE)
|
|
|
|
- __m128 tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0));
|
|
|
|
- __m128 row1 = _mm_shuffle_ps(mCol[2].mValue, _mm_setzero_ps(), _MM_SHUFFLE(1, 0, 1, 0));
|
|
|
|
- __m128 row0 = _mm_shuffle_ps(tmp1, row1, _MM_SHUFFLE(2, 0, 2, 0));
|
|
|
|
- row1 = _mm_shuffle_ps(row1, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
|
|
|
|
- tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2));
|
|
|
|
- __m128 row3 = _mm_shuffle_ps(mCol[2].mValue, _mm_set_ps(1, 0, 0, 0), _MM_SHUFFLE(3, 2, 3, 2));
|
|
|
|
- __m128 row2 = _mm_shuffle_ps(tmp1, row3, _MM_SHUFFLE(2, 0, 2, 0));
|
|
|
|
- row3 = _mm_shuffle_ps(row3, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row2, row3);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- __m128 minor0 = _mm_mul_ps(row1, tmp1);
|
|
|
|
- __m128 minor1 = _mm_mul_ps(row0, tmp1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
|
|
|
|
- minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
|
|
|
|
- minor1 = _mm_shuffle_ps(minor1, minor1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row1, row2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, _MM_SHUFFLE(1, 0, 3, 2)), row3);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- row2 = _mm_shuffle_ps(row2, row2, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
|
|
|
|
- __m128 minor2 = _mm_mul_ps(row0, tmp1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
|
|
|
|
- minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
|
|
|
|
- minor2 = _mm_shuffle_ps(minor2, minor2, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row0, row1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row0, row3);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
|
|
|
|
- minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
|
|
|
|
- minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row0, row2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
|
|
|
|
-
|
|
|
|
- Mat44 result;
|
|
|
|
- result.mCol[0].mValue = minor0;
|
|
|
|
- result.mCol[1].mValue = minor1;
|
|
|
|
- result.mCol[2].mValue = minor2;
|
|
|
|
- result.mCol[3] = Vec4(0, 0, 0, 1);
|
|
|
|
- return result;
|
|
|
|
-#elif defined(JPH_USE_NEON)
|
|
|
|
- Type v0001 = vsetq_lane_f32(1, vdupq_n_f32(0), 3);
|
|
|
|
- Type tmp1 = JPH_NEON_SHUFFLE_F32x4(mCol[0].mValue, mCol[1].mValue, 0, 1, 4, 5);
|
|
|
|
- Type row1 = JPH_NEON_SHUFFLE_F32x4(mCol[2].mValue, v0001, 0, 1, 4, 5);
|
|
|
|
- Type row0 = JPH_NEON_SHUFFLE_F32x4(tmp1, row1, 0, 2, 4, 6);
|
|
|
|
- row1 = JPH_NEON_SHUFFLE_F32x4(row1, tmp1, 1, 3, 5, 7);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(mCol[0].mValue, mCol[1].mValue, 2, 3, 6, 7);
|
|
|
|
- Type row3 = JPH_NEON_SHUFFLE_F32x4(mCol[2].mValue, v0001, 2, 3, 6, 7);
|
|
|
|
- Type row2 = JPH_NEON_SHUFFLE_F32x4(tmp1, row3, 0, 2, 4, 6);
|
|
|
|
- row3 = JPH_NEON_SHUFFLE_F32x4(row3, tmp1, 1, 3, 5, 7);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row2, row3);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- Type minor0 = vmulq_f32(row1, tmp1);
|
|
|
|
- Type minor1 = vmulq_f32(row0, tmp1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor0 = vsubq_f32(vmulq_f32(row1, tmp1), minor0);
|
|
|
|
- minor1 = vsubq_f32(vmulq_f32(row0, tmp1), minor1);
|
|
|
|
- minor1 = JPH_NEON_SHUFFLE_F32x4(minor1, minor1, 2, 3, 0, 1);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row1, row2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor0 = vaddq_f32(vmulq_f32(row3, tmp1), minor0);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor0 = vsubq_f32(minor0, vmulq_f32(row3, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(row1, row1, 2, 3, 0, 1);
|
|
|
|
- tmp1 = vmulq_f32(tmp1, row3);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- row2 = JPH_NEON_SHUFFLE_F32x4(row2, row2, 2, 3, 0, 1);
|
|
|
|
- minor0 = vaddq_f32(vmulq_f32(row2, tmp1), minor0);
|
|
|
|
- Type minor2 = vmulq_f32(row0, tmp1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor0 = vsubq_f32(minor0, vmulq_f32(row2, tmp1));
|
|
|
|
- minor2 = vsubq_f32(vmulq_f32(row0, tmp1), minor2);
|
|
|
|
- minor2 = JPH_NEON_SHUFFLE_F32x4(minor2, minor2, 2, 3, 0, 1);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row0, row1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor2 = vaddq_f32(vmulq_f32(row3, tmp1), minor2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor2 = vsubq_f32(vmulq_f32(row3, tmp1), minor2);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row0, row3);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor1 = vsubq_f32(minor1, vmulq_f32(row2, tmp1));
|
|
|
|
- minor2 = vaddq_f32(vmulq_f32(row1, tmp1), minor2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor1 = vaddq_f32(vmulq_f32(row2, tmp1), minor1);
|
|
|
|
- minor2 = vsubq_f32(minor2, vmulq_f32(row1, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row0, row2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor1 = vaddq_f32(vmulq_f32(row3, tmp1), minor1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor1 = vsubq_f32(minor1, vmulq_f32(row3, tmp1));
|
|
|
|
-
|
|
|
|
- Mat44 result;
|
|
|
|
- result.mCol[0].mValue = minor0;
|
|
|
|
- result.mCol[1].mValue = minor1;
|
|
|
|
- result.mCol[2].mValue = minor2;
|
|
|
|
- result.mCol[3].mValue = v0001;
|
|
|
|
- return result;
|
|
|
|
-#else
|
|
|
|
return Mat44(
|
|
return Mat44(
|
|
- Vec4(JPH_EL(1, 1) * JPH_EL(2, 2) - JPH_EL(1, 2) * JPH_EL(2, 1),
|
|
|
|
- JPH_EL(1, 2) * JPH_EL(2, 0) - JPH_EL(1, 0) * JPH_EL(2, 2),
|
|
|
|
- JPH_EL(1, 0) * JPH_EL(2, 1) - JPH_EL(1, 1) * JPH_EL(2, 0),
|
|
|
|
- 0),
|
|
|
|
- Vec4(JPH_EL(0, 2) * JPH_EL(2, 1) - JPH_EL(0, 1) * JPH_EL(2, 2),
|
|
|
|
- JPH_EL(0, 0) * JPH_EL(2, 2) - JPH_EL(0, 2) * JPH_EL(2, 0),
|
|
|
|
- JPH_EL(0, 1) * JPH_EL(2, 0) - JPH_EL(0, 0) * JPH_EL(2, 1),
|
|
|
|
- 0),
|
|
|
|
- Vec4(JPH_EL(0, 1) * JPH_EL(1, 2) - JPH_EL(0, 2) * JPH_EL(1, 1),
|
|
|
|
- JPH_EL(0, 2) * JPH_EL(1, 0) - JPH_EL(0, 0) * JPH_EL(1, 2),
|
|
|
|
- JPH_EL(0, 0) * JPH_EL(1, 1) - JPH_EL(0, 1) * JPH_EL(1, 0),
|
|
|
|
- 0),
|
|
|
|
|
|
+ Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0)
|
|
|
|
+ - Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0),
|
|
|
|
+ Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0)
|
|
|
|
+ - Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0),
|
|
|
|
+ Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0)
|
|
|
|
+ - Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0),
|
|
Vec4(0, 0, 0, 1));
|
|
Vec4(0, 0, 0, 1));
|
|
-#endif
|
|
|
|
}
|
|
}
|
|
|
|
|
|
Mat44 Mat44::Inversed3x3() const
|
|
Mat44 Mat44::Inversed3x3() const
|
|
{
|
|
{
|
|
- // Adapted from Inversed() to remove 4th column
|
|
|
|
- // Note: This can be optimized.
|
|
|
|
-
|
|
|
|
- JPH_ASSERT(mCol[0][3] == 0.0f);
|
|
|
|
- JPH_ASSERT(mCol[1][3] == 0.0f);
|
|
|
|
- JPH_ASSERT(mCol[2][3] == 0.0f);
|
|
|
|
-
|
|
|
|
-#if defined(JPH_USE_SSE)
|
|
|
|
- __m128 tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(1, 0, 1, 0));
|
|
|
|
- __m128 row1 = _mm_shuffle_ps(mCol[2].mValue, _mm_setzero_ps(), _MM_SHUFFLE(1, 0, 1, 0));
|
|
|
|
- __m128 row0 = _mm_shuffle_ps(tmp1, row1, _MM_SHUFFLE(2, 0, 2, 0));
|
|
|
|
- row1 = _mm_shuffle_ps(row1, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
|
|
|
|
- tmp1 = _mm_shuffle_ps(mCol[0].mValue, mCol[1].mValue, _MM_SHUFFLE(3, 2, 3, 2));
|
|
|
|
- __m128 row3 = _mm_shuffle_ps(mCol[2].mValue, _mm_set_ps(1, 0, 0, 0), _MM_SHUFFLE(3, 2, 3, 2));
|
|
|
|
- __m128 row2 = _mm_shuffle_ps(tmp1, row3, _MM_SHUFFLE(2, 0, 2, 0));
|
|
|
|
- row3 = _mm_shuffle_ps(row3, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row2, row3);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- __m128 minor0 = _mm_mul_ps(row1, tmp1);
|
|
|
|
- __m128 minor1 = _mm_mul_ps(row0, tmp1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
|
|
|
|
- minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
|
|
|
|
- minor1 = _mm_shuffle_ps(minor1, minor1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row1, row2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, _MM_SHUFFLE(1, 0, 3, 2)), row3);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- row2 = _mm_shuffle_ps(row2, row2, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
|
|
|
|
- __m128 minor2 = _mm_mul_ps(row0, tmp1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
|
|
|
|
- minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
|
|
|
|
- minor2 = _mm_shuffle_ps(minor2, minor2, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row0, row1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row0, row3);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
|
|
|
|
- minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
|
|
|
|
- minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = _mm_mul_ps(row0, row2);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(2, 3, 0, 1));
|
|
|
|
- minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
|
|
|
|
- tmp1 = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(1, 0, 3, 2));
|
|
|
|
- minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
|
|
|
|
-
|
|
|
|
- __m128 det = _mm_mul_ps(row0, minor0);
|
|
|
|
- det = _mm_add_ps(_mm_shuffle_ps(det, det, _MM_SHUFFLE(2, 3, 0, 1)), det); // Original code did (x + z) + (y + w), changed to (x + y) + (z + w) to match the ARM code below and make the result cross platform deterministic
|
|
|
|
- det = _mm_add_ss(_mm_shuffle_ps(det, det, _MM_SHUFFLE(1, 0, 3, 2)), det);
|
|
|
|
- det = _mm_div_ss(_mm_set_ss(1.0f), det);
|
|
|
|
- det = _mm_shuffle_ps(det, det, _MM_SHUFFLE(0, 0, 0, 0));
|
|
|
|
-
|
|
|
|
- Mat44 result;
|
|
|
|
- result.mCol[0].mValue = _mm_mul_ps(det, minor0);
|
|
|
|
- result.mCol[1].mValue = _mm_mul_ps(det, minor1);
|
|
|
|
- result.mCol[2].mValue = _mm_mul_ps(det, minor2);
|
|
|
|
- result.mCol[3] = Vec4(0, 0, 0, 1);
|
|
|
|
- return result;
|
|
|
|
-#elif defined(JPH_USE_NEON)
|
|
|
|
- Type v0001 = vsetq_lane_f32(1, vdupq_n_f32(0), 3);
|
|
|
|
- Type tmp1 = JPH_NEON_SHUFFLE_F32x4(mCol[0].mValue, mCol[1].mValue, 0, 1, 4, 5);
|
|
|
|
- Type row1 = JPH_NEON_SHUFFLE_F32x4(mCol[2].mValue, v0001, 0, 1, 4, 5);
|
|
|
|
- Type row0 = JPH_NEON_SHUFFLE_F32x4(tmp1, row1, 0, 2, 4, 6);
|
|
|
|
- row1 = JPH_NEON_SHUFFLE_F32x4(row1, tmp1, 1, 3, 5, 7);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(mCol[0].mValue, mCol[1].mValue, 2, 3, 6, 7);
|
|
|
|
- Type row3 = JPH_NEON_SHUFFLE_F32x4(mCol[2].mValue, v0001, 2, 3, 6, 7);
|
|
|
|
- Type row2 = JPH_NEON_SHUFFLE_F32x4(tmp1, row3, 0, 2, 4, 6);
|
|
|
|
- row3 = JPH_NEON_SHUFFLE_F32x4(row3, tmp1, 1, 3, 5, 7);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row2, row3);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- Type minor0 = vmulq_f32(row1, tmp1);
|
|
|
|
- Type minor1 = vmulq_f32(row0, tmp1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor0 = vsubq_f32(vmulq_f32(row1, tmp1), minor0);
|
|
|
|
- minor1 = vsubq_f32(vmulq_f32(row0, tmp1), minor1);
|
|
|
|
- minor1 = JPH_NEON_SHUFFLE_F32x4(minor1, minor1, 2, 3, 0, 1);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row1, row2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor0 = vaddq_f32(vmulq_f32(row3, tmp1), minor0);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor0 = vsubq_f32(minor0, vmulq_f32(row3, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(row1, row1, 2, 3, 0, 1);
|
|
|
|
- tmp1 = vmulq_f32(tmp1, row3);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- row2 = JPH_NEON_SHUFFLE_F32x4(row2, row2, 2, 3, 0, 1);
|
|
|
|
- minor0 = vaddq_f32(vmulq_f32(row2, tmp1), minor0);
|
|
|
|
- Type minor2 = vmulq_f32(row0, tmp1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor0 = vsubq_f32(minor0, vmulq_f32(row2, tmp1));
|
|
|
|
- minor2 = vsubq_f32(vmulq_f32(row0, tmp1), minor2);
|
|
|
|
- minor2 = JPH_NEON_SHUFFLE_F32x4(minor2, minor2, 2, 3, 0, 1);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row0, row1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor2 = vaddq_f32(vmulq_f32(row3, tmp1), minor2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor2 = vsubq_f32(vmulq_f32(row3, tmp1), minor2);
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row0, row3);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor1 = vsubq_f32(minor1, vmulq_f32(row2, tmp1));
|
|
|
|
- minor2 = vaddq_f32(vmulq_f32(row1, tmp1), minor2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor1 = vaddq_f32(vmulq_f32(row2, tmp1), minor1);
|
|
|
|
- minor2 = vsubq_f32(minor2, vmulq_f32(row1, tmp1));
|
|
|
|
-
|
|
|
|
- tmp1 = vmulq_f32(row0, row2);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 1, 0, 3, 2);
|
|
|
|
- minor1 = vaddq_f32(vmulq_f32(row3, tmp1), minor1);
|
|
|
|
- tmp1 = JPH_NEON_SHUFFLE_F32x4(tmp1, tmp1, 2, 3, 0, 1);
|
|
|
|
- minor1 = vsubq_f32(minor1, vmulq_f32(row3, tmp1));
|
|
|
|
-
|
|
|
|
- Type det = vmulq_f32(row0, minor0);
|
|
|
|
- det = vdupq_n_f32(vaddvq_f32(det));
|
|
|
|
- det = vdivq_f32(vdupq_n_f32(1.0f), det);
|
|
|
|
-
|
|
|
|
- Mat44 result;
|
|
|
|
- result.mCol[0].mValue = vmulq_f32(det, minor0);
|
|
|
|
- result.mCol[1].mValue = vmulq_f32(det, minor1);
|
|
|
|
- result.mCol[2].mValue = vmulq_f32(det, minor2);
|
|
|
|
- result.mCol[3].mValue = v0001;
|
|
|
|
- return result;
|
|
|
|
-#else
|
|
|
|
float det = GetDeterminant3x3();
|
|
float det = GetDeterminant3x3();
|
|
|
|
|
|
return Mat44(
|
|
return Mat44(
|
|
- Vec4((JPH_EL(1, 1) * JPH_EL(2, 2) - JPH_EL(1, 2) * JPH_EL(2, 1)) / det,
|
|
|
|
- (JPH_EL(1, 2) * JPH_EL(2, 0) - JPH_EL(1, 0) * JPH_EL(2, 2)) / det,
|
|
|
|
- (JPH_EL(1, 0) * JPH_EL(2, 1) - JPH_EL(1, 1) * JPH_EL(2, 0)) / det,
|
|
|
|
- 0),
|
|
|
|
- Vec4((JPH_EL(0, 2) * JPH_EL(2, 1) - JPH_EL(0, 1) * JPH_EL(2, 2)) / det,
|
|
|
|
- (JPH_EL(0, 0) * JPH_EL(2, 2) - JPH_EL(0, 2) * JPH_EL(2, 0)) / det,
|
|
|
|
- (JPH_EL(0, 1) * JPH_EL(2, 0) - JPH_EL(0, 0) * JPH_EL(2, 1)) / det,
|
|
|
|
- 0),
|
|
|
|
- Vec4((JPH_EL(0, 1) * JPH_EL(1, 2) - JPH_EL(0, 2) * JPH_EL(1, 1)) / det,
|
|
|
|
- (JPH_EL(0, 2) * JPH_EL(1, 0) - JPH_EL(0, 0) * JPH_EL(1, 2)) / det,
|
|
|
|
- (JPH_EL(0, 0) * JPH_EL(1, 1) - JPH_EL(0, 1) * JPH_EL(1, 0)) / det,
|
|
|
|
- 0),
|
|
|
|
|
|
+ (Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0)
|
|
|
|
+ - Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0)) / det,
|
|
|
|
+ (Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(2, 1), JPH_EL(2, 2), JPH_EL(2, 0), 0)
|
|
|
|
+ - Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(2, 2), JPH_EL(2, 0), JPH_EL(2, 1), 0)) / det,
|
|
|
|
+ (Vec4(JPH_EL(0, 1), JPH_EL(0, 2), JPH_EL(0, 0), 0) * Vec4(JPH_EL(1, 2), JPH_EL(1, 0), JPH_EL(1, 1), 0)
|
|
|
|
+ - Vec4(JPH_EL(0, 2), JPH_EL(0, 0), JPH_EL(0, 1), 0) * Vec4(JPH_EL(1, 1), JPH_EL(1, 2), JPH_EL(1, 0), 0)) / det,
|
|
Vec4(0, 0, 0, 1));
|
|
Vec4(0, 0, 0, 1));
|
|
-#endif
|
|
|
|
}
|
|
}
|
|
|
|
|
|
Quat Mat44::GetQuaternion() const
|
|
Quat Mat44::GetQuaternion() const
|