2
0

fix-build-no-ssse3.patch 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. From 02c22d3df501dc284ba732fa82a6c408c57b3237 Mon Sep 17 00:00:00 2001
  2. From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= <[email protected]>
  3. Date: Thu, 19 Jan 2023 23:30:13 +0100
  4. Subject: [PATCH] mathlib: Remove incomplete support for SSE3 which assumed
  5. SSSE3
  6. `_mm_shuffle_epi8` requires SSSE3 so the check on `ASTCENC_SSE >= 30` is
  7. too lax and would fail if `__SSE3__` is supported, but not `__SSSE3__`.
  8. The only supported configurations are SSE2, SSE4.1, and AVX2, so as
  9. discussed in #393 we drop the SSE3 checks and require SSE4.1 instead.
  10. ---
  11. Source/astcenc_mathlib.h | 2 --
  12. Source/astcenc_vecmathlib_sse_4.h | 10 +++++-----
  13. 2 files changed, 5 insertions(+), 7 deletions(-)
  14. diff --git a/Source/astcenc_mathlib.h b/Source/astcenc_mathlib.h
  15. index 67e989e..0540c4f 100644
  16. --- a/Source/astcenc_mathlib.h
  17. +++ b/Source/astcenc_mathlib.h
  18. @@ -48,8 +48,6 @@
  19. #define ASTCENC_SSE 42
  20. #elif defined(__SSE4_1__)
  21. #define ASTCENC_SSE 41
  22. - #elif defined(__SSE3__)
  23. - #define ASTCENC_SSE 30
  24. #elif defined(__SSE2__)
  25. #define ASTCENC_SSE 20
  26. #else
  27. diff --git a/Source/astcenc_vecmathlib_sse_4.h b/Source/astcenc_vecmathlib_sse_4.h
  28. index 76fe577..26dcc4a 100644
  29. --- a/Source/astcenc_vecmathlib_sse_4.h
  30. +++ b/Source/astcenc_vecmathlib_sse_4.h
  31. @@ -1046,7 +1046,7 @@ ASTCENC_SIMD_INLINE void vtable_prepare(vint4 t0, vint4& t0p)
  32. */
  33. ASTCENC_SIMD_INLINE void vtable_prepare(vint4 t0, vint4 t1, vint4& t0p, vint4& t1p)
  34. {
  35. -#if ASTCENC_SSE >= 30
  36. +#if ASTCENC_SSE >= 41
  37. t0p = t0;
  38. t1p = t0 ^ t1;
  39. #else
  40. @@ -1062,7 +1062,7 @@ ASTCENC_SIMD_INLINE void vtable_prepare(
  41. vint4 t0, vint4 t1, vint4 t2, vint4 t3,
  42. vint4& t0p, vint4& t1p, vint4& t2p, vint4& t3p)
  43. {
  44. -#if ASTCENC_SSE >= 30
  45. +#if ASTCENC_SSE >= 41
  46. t0p = t0;
  47. t1p = t0 ^ t1;
  48. t2p = t1 ^ t2;
  49. @@ -1080,7 +1080,7 @@ ASTCENC_SIMD_INLINE void vtable_prepare(
  50. */
  51. ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 idx)
  52. {
  53. -#if ASTCENC_SSE >= 30
  54. +#if ASTCENC_SSE >= 41
  55. // Set index byte MSB to 1 for unused bytes so shuffle returns zero
  56. __m128i idxx = _mm_or_si128(idx.m, _mm_set1_epi32(static_cast<int>(0xFFFFFF00)));
  57. @@ -1102,7 +1102,7 @@ ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 idx)
  58. */
  59. ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 t1, vint4 idx)
  60. {
  61. -#if ASTCENC_SSE >= 30
  62. +#if ASTCENC_SSE >= 41
  63. // Set index byte MSB to 1 for unused bytes so shuffle returns zero
  64. __m128i idxx = _mm_or_si128(idx.m, _mm_set1_epi32(static_cast<int>(0xFFFFFF00)));
  65. @@ -1130,7 +1130,7 @@ ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 t1, vint4 idx)
  66. */
  67. ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 t1, vint4 t2, vint4 t3, vint4 idx)
  68. {
  69. -#if ASTCENC_SSE >= 30
  70. +#if ASTCENC_SSE >= 41
  71. // Set index byte MSB to 1 for unused bytes so shuffle returns zero
  72. __m128i idxx = _mm_or_si128(idx.m, _mm_set1_epi32(static_cast<int>(0xFFFFFF00)));
  73. --
  74. 2.39.1