123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- // basisu_kernels_sse.cpp
- // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include "basisu_enc.h"
- #if BASISU_SUPPORT_SSE
- #define CPPSPMD_SSE2 0
- #ifdef _MSC_VER
- #include <intrin.h>
- #endif
- #if !defined(_MSC_VER)
- #if __AVX__ || __AVX2__ || __AVX512F__
- #error Please check your compiler options
- #endif
-
- #if CPPSPMD_SSE2
- #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__
- #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file
- #endif
- #else
- #if !__SSE4_1__ || !__SSE3__ || !__SSSE3__
- #error Please check your compiler options
- #endif
- #endif
- #endif
- #include "cppspmd_sse.h"
- #include "cppspmd_type_aliases.h"
- using namespace basisu;
- #include "basisu_kernels_declares.h"
- #include "basisu_kernels_imp.h"
- namespace basisu
- {
- struct cpu_info
- {
- cpu_info() { memset(this, 0, sizeof(*this)); }
- bool m_has_fpu;
- bool m_has_mmx;
- bool m_has_sse;
- bool m_has_sse2;
- bool m_has_sse3;
- bool m_has_ssse3;
- bool m_has_sse41;
- bool m_has_sse42;
- bool m_has_avx;
- bool m_has_avx2;
- bool m_has_pclmulqdq;
- };
- static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx)
- {
- info.m_has_fpu = (edx & (1 << 0)) != 0;
- info.m_has_mmx = (edx & (1 << 23)) != 0;
- info.m_has_sse = (edx & (1 << 25)) != 0;
- info.m_has_sse2 = (edx & (1 << 26)) != 0;
- info.m_has_sse3 = (ecx & (1 << 0)) != 0;
- info.m_has_ssse3 = (ecx & (1 << 9)) != 0;
- info.m_has_sse41 = (ecx & (1 << 19)) != 0;
- info.m_has_sse42 = (ecx & (1 << 20)) != 0;
- info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0;
- info.m_has_avx = (ecx & (1 << 28)) != 0;
- }
- static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx)
- {
- info.m_has_avx2 = (ebx & (1 << 5)) != 0;
- }
- #ifndef _MSC_VER
- static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs)
- {
- uint32_t ebx = 0, edx = 0;
- #if defined(__PIC__) && defined(__i386__)
- __asm__("movl %%ebx, %%edi;"
- "cpuid;"
- "xchgl %%ebx, %%edi;"
- : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
- #else
- __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
- #endif
- regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx;
- }
- #endif
- static void get_cpuinfo(cpu_info &info)
- {
- int regs[4];
- #ifdef _MSC_VER
- __cpuid(regs, 0);
- #else
- do_cpuid(0, 0, (uint32_t *)regs);
- #endif
- const uint32_t max_eax = regs[0];
- if (max_eax >= 1U)
- {
- #ifdef _MSC_VER
- __cpuid(regs, 1);
- #else
- do_cpuid(1, 0, (uint32_t*)regs);
- #endif
- extract_x86_flags(info, regs[2], regs[3]);
- }
- if (max_eax >= 7U)
- {
- #ifdef _MSC_VER
- __cpuidex(regs, 7, 0);
- #else
- do_cpuid(7, 0, (uint32_t*)regs);
- #endif
- extract_x86_extended_flags(info, regs[1]);
- }
- }
- void detect_sse41()
- {
- cpu_info info;
- get_cpuinfo(info);
- // Check for everything from SSE to SSE 4.1
- g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41;
- }
- } // namespace basisu
- #else // #if BASISU_SUPPORT_SSE
- namespace basisu
- {
- void detect_sse41()
- {
- }
- } // namespace basisu
- #endif // #if BASISU_SUPPORT_SSE
|