|
@@ -54,7 +54,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, float, Q> Result;
|
|
vec<4, float, Q> Result;
|
|
|
- Result.data = _mm_add_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm_add_ps(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -78,7 +78,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, float, Q> Result;
|
|
vec<4, float, Q> Result;
|
|
|
- Result.data = _mm_sub_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm_sub_ps(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -90,7 +90,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, double, Q> Result;
|
|
vec<4, double, Q> Result;
|
|
|
- Result.data = _mm256_sub_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm256_sub_pd(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -102,7 +102,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, float, Q> Result;
|
|
vec<4, float, Q> Result;
|
|
|
- Result.data = _mm_mul_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm_mul_ps(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -114,7 +114,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, double, Q> Result;
|
|
vec<4, double, Q> Result;
|
|
|
- Result.data = _mm256_mul_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm256_mul_pd(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -126,19 +126,19 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, float, Q> Result;
|
|
vec<4, float, Q> Result;
|
|
|
- Result.data = _mm_div_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm_div_ps(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
- # if GLM_ARCH & GLM_ARCH_AVX_BIT
|
|
|
|
|
|
|
+# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
|
|
template<qualifier Q>
|
|
template<qualifier Q>
|
|
|
struct compute_vec4_div<double, Q, true>
|
|
struct compute_vec4_div<double, Q, true>
|
|
|
{
|
|
{
|
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, double, Q> Result;
|
|
vec<4, double, Q> Result;
|
|
|
- Result.data = _mm256_div_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data);
|
|
|
|
|
|
|
+ Result.data = _mm256_div_pd(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -150,150 +150,282 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
|
|
GLM_FUNC_QUALIFIER static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
|
|
|
{
|
|
{
|
|
|
vec<4, float, aligned_lowp> Result;
|
|
vec<4, float, aligned_lowp> Result;
|
|
|
- Result.data = _mm_mul_ps((glm_f32vec4)a.data, _mm_rcp_ps(b.data));
|
|
|
|
|
|
|
+ Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data));
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_and<int, Q, true, 32, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int, Q> Result;
|
|
|
|
|
+ Result.data = _mm_and_si128(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_and<T, Q, true, 32, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_and<uint, Q, true, 32, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, int, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm_and_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint, Q> Result;
|
|
|
|
|
+ Result.data = _mm_and_si128(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_and<T, Q, true, 64, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_and<int64, Q, true, 64, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_and_si256(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_and<uint64, Q, true, 64, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm256_and_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_and_si256(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
# endif
|
|
# endif
|
|
|
|
|
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_or<T, Q, true, 32, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_or<int, Q, true, 32, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm_or_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, int, Q> Result;
|
|
|
|
|
+ Result.data = _mm_or_si128(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_or<uint, Q, true, 32, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, uint, Q> Result;
|
|
|
|
|
+ Result.data = _mm_or_si128(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_or<T, Q, true, 64, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_or<int64, Q, true, 64, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_or_si256(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_or<uint64, Q, true, 64, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm256_or_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_or_si256(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
# endif
|
|
# endif
|
|
|
|
|
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_xor<T, Q, true, 32, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_xor<int, Q, true, 32, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm_xor_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, int, Q> Result;
|
|
|
|
|
+ Result.data = _mm_xor_si128(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_xor<uint, Q, true, 32, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, uint, Q> Result;
|
|
|
|
|
+ Result.data = _mm_xor_si128(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_xor<T, Q, true, 64, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_xor<int64, Q, true, 64, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_xor_si256(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_xor<uint64, Q, true, 64, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm256_xor_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_xor_si256(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
# endif
|
|
# endif
|
|
|
|
|
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_shift_left<T, Q, true, 32, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_left<int, Q, true, 32, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int, Q> Result;
|
|
|
|
|
+ Result.data = _mm_sll_epi32(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_left<uint, Q, true, 32, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm_sll_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint, Q> Result;
|
|
|
|
|
+ Result.data = _mm_sll_epi32(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_shift_left<T, Q, true, 64, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_left<int64, Q, true, 64, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm256_sll_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, int64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_sll_epi64(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_left<uint64, Q, true, 64, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, uint64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_sll_epi64(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
# endif
|
|
# endif
|
|
|
|
|
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_shift_right<T, Q, true, 32, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_right<int, Q, true, 32, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int, Q> Result;
|
|
|
|
|
+ Result.data = _mm_srl_epi32(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_right<uint, Q, true, 32, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm_srl_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint, Q> Result;
|
|
|
|
|
+ Result.data = _mm_srl_epi32(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_shift_right<T, Q, true, 64, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_right<int64, Q, true, 64, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_srl_epi64(a.data, b.data);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_shift_right<uint64, Q, true, 64, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm256_srl_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data);
|
|
|
|
|
|
|
+ vec<4, uint64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_srl_epi64(a.data, b.data);
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
# endif
|
|
# endif
|
|
|
|
|
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_bitwise_not<T, Q, true, 32, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_bitwise_not<int, Q, true, 32, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm_xor_si128((glm_f32vec4)v.data, _mm_set1_epi32(-1));
|
|
|
|
|
|
|
+ vec<4, int, Q> Result;
|
|
|
|
|
+ Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_bitwise_not<uint, Q, true, 32, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, uint, Q> Result;
|
|
|
|
|
+ Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
|
|
- template<typename T, qualifier Q>
|
|
|
|
|
- struct compute_vec4_bitwise_not<T, Q, true, 64, true>
|
|
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_bitwise_not<int64, Q, true, 64, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& v)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec<4, int64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1));
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_vec4_bitwise_not<uint64, Q, true, 64, true>
|
|
|
{
|
|
{
|
|
|
- GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v)
|
|
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& v)
|
|
|
{
|
|
{
|
|
|
- vec<4, T, Q> Result;
|
|
|
|
|
- Result.data = _mm256_xor_si256((glm_f32vec4)v.data, _mm_set1_epi32(-1));
|
|
|
|
|
|
|
+ vec<4, uint64, Q> Result;
|
|
|
|
|
+ Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1));
|
|
|
return Result;
|
|
return Result;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -304,7 +436,7 @@ namespace detail
|
|
|
{
|
|
{
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
|
|
{
|
|
{
|
|
|
- return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) == 0;
|
|
|
|
|
|
|
+ return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) == 0;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
@@ -315,7 +447,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
|
|
{
|
|
{
|
|
|
//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
|
|
//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
|
|
|
- __m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data);
|
|
|
|
|
|
|
+ __m128i neq = _mm_xor_si128(v1.data, v2.data);
|
|
|
return _mm_test_all_zeros(neq, neq) == 0;
|
|
return _mm_test_all_zeros(neq, neq) == 0;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
@@ -326,7 +458,7 @@ namespace detail
|
|
|
{
|
|
{
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
|
|
{
|
|
{
|
|
|
- return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) != 0;
|
|
|
|
|
|
|
+ return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
|
|
|
|
@@ -337,7 +469,7 @@ namespace detail
|
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
|
GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
|
|
{
|
|
{
|
|
|
//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
|
|
//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
|
|
|
- __m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data);
|
|
|
|
|
|
|
+ __m128i neq = _mm_xor_si128(v1.data, v2.data);
|
|
|
return _mm_test_all_zeros(neq, neq) != 0;
|
|
return _mm_test_all_zeros(neq, neq) != 0;
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|