basisu_kernels_sse.cpp 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. // basisu_kernels_sse.cpp
  2. // Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License");
  5. // you may not use this file except in compliance with the License.
  6. // You may obtain a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS,
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. // See the License for the specific language governing permissions and
  14. // limitations under the License.
  15. #include "basisu_enc.h"
  16. #if BASISU_SUPPORT_SSE
  17. #define CPPSPMD_SSE2 0
  18. #ifdef _MSC_VER
  19. #include <intrin.h>
  20. #endif
  21. #include "cppspmd_sse.h"
  22. #include "cppspmd_type_aliases.h"
  23. using namespace basisu;
  24. #include "basisu_kernels_declares.h"
  25. #include "basisu_kernels_imp.h"
  26. namespace basisu
  27. {
  28. struct cpu_info
  29. {
  30. cpu_info() { memset(this, 0, sizeof(*this)); }
  31. bool m_has_fpu;
  32. bool m_has_mmx;
  33. bool m_has_sse;
  34. bool m_has_sse2;
  35. bool m_has_sse3;
  36. bool m_has_ssse3;
  37. bool m_has_sse41;
  38. bool m_has_sse42;
  39. bool m_has_avx;
  40. bool m_has_avx2;
  41. bool m_has_pclmulqdq;
  42. };
  43. static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx)
  44. {
  45. info.m_has_fpu = (edx & (1 << 0)) != 0;
  46. info.m_has_mmx = (edx & (1 << 23)) != 0;
  47. info.m_has_sse = (edx & (1 << 25)) != 0;
  48. info.m_has_sse2 = (edx & (1 << 26)) != 0;
  49. info.m_has_sse3 = (ecx & (1 << 0)) != 0;
  50. info.m_has_ssse3 = (ecx & (1 << 9)) != 0;
  51. info.m_has_sse41 = (ecx & (1 << 19)) != 0;
  52. info.m_has_sse42 = (ecx & (1 << 20)) != 0;
  53. info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0;
  54. info.m_has_avx = (ecx & (1 << 28)) != 0;
  55. }
  56. static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx)
  57. {
  58. info.m_has_avx2 = (ebx & (1 << 5)) != 0;
  59. }
  60. #ifndef _MSC_VER
  61. static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs)
  62. {
  63. uint32_t ebx = 0, edx = 0;
  64. #if defined(__PIC__) && defined(__i386__)
  65. __asm__("movl %%ebx, %%edi;"
  66. "cpuid;"
  67. "xchgl %%ebx, %%edi;"
  68. : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
  69. #else
  70. __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
  71. #endif
  72. regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx;
  73. }
  74. #endif
  75. static void get_cpuinfo(cpu_info &info)
  76. {
  77. int regs[4];
  78. #ifdef _MSC_VER
  79. __cpuid(regs, 0);
  80. #else
  81. do_cpuid(0, 0, (uint32_t *)regs);
  82. #endif
  83. const uint32_t max_eax = regs[0];
  84. if (max_eax >= 1U)
  85. {
  86. #ifdef _MSC_VER
  87. __cpuid(regs, 1);
  88. #else
  89. do_cpuid(1, 0, (uint32_t*)regs);
  90. #endif
  91. extract_x86_flags(info, regs[2], regs[3]);
  92. }
  93. if (max_eax >= 7U)
  94. {
  95. #ifdef _MSC_VER
  96. __cpuidex(regs, 7, 0);
  97. #else
  98. do_cpuid(7, 0, (uint32_t*)regs);
  99. #endif
  100. extract_x86_extended_flags(info, regs[1]);
  101. }
  102. }
  103. void detect_sse41()
  104. {
  105. cpu_info info;
  106. get_cpuinfo(info);
  107. // Check for everything from SSE to SSE 4.1
  108. g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41;
  109. }
  110. } // namespace basisu
  111. #else // #if BASISU_SUPPORT_SSE
  112. namespace basisu
  113. {
  114. void detect_sse41()
  115. {
  116. }
  117. } // namespace basisu
  118. #endif // #if BASISU_SUPPORT_SSE