|
@@ -1,5 +1,5 @@
|
|
/* CpuArch.c -- CPU specific code
|
|
/* CpuArch.c -- CPU specific code
|
|
-2024-07-04 : Igor Pavlov : Public domain */
|
|
|
|
|
|
+Igor Pavlov : Public domain */
|
|
|
|
|
|
#include "Precomp.h"
|
|
#include "Precomp.h"
|
|
|
|
|
|
@@ -17,7 +17,7 @@
|
|
/*
|
|
/*
|
|
cpuid instruction supports (subFunction) parameter in ECX,
|
|
cpuid instruction supports (subFunction) parameter in ECX,
|
|
that is used only with some specific (function) parameter values.
|
|
that is used only with some specific (function) parameter values.
|
|
- But we always use only (subFunction==0).
|
|
|
|
|
|
+ most functions use only (subFunction==0).
|
|
*/
|
|
*/
|
|
/*
|
|
/*
|
|
__cpuid(): MSVC and GCC/CLANG use same function/macro name
|
|
__cpuid(): MSVC and GCC/CLANG use same function/macro name
|
|
@@ -49,43 +49,49 @@
|
|
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
|
|
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
|
|
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
|
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
|
|
|
|
|
-#define x86_cpuid_MACRO(p, func) { \
|
|
|
|
|
|
+ /* "=&r" selects free register. It can select even rbx, if that register is free.
|
|
|
|
+ "=&D" for (RDI) also works, but the code can be larger with "=&D"
|
|
|
|
+ "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */
|
|
|
|
+
|
|
|
|
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
|
|
__asm__ __volatile__ ( \
|
|
__asm__ __volatile__ ( \
|
|
ASM_LN "mov %%rbx, %q1" \
|
|
ASM_LN "mov %%rbx, %q1" \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "xchg %%rbx, %q1" \
|
|
ASM_LN "xchg %%rbx, %q1" \
|
|
- : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
|
|
|
-
|
|
|
|
- /* "=&r" selects free register. It can select even rbx, if that register is free.
|
|
|
|
- "=&D" for (RDI) also works, but the code can be larger with "=&D"
|
|
|
|
- "2"(0) means (subFunction = 0),
|
|
|
|
- 2 is (zero-based) index in the output constraint list "=c" (ECX). */
|
|
|
|
|
|
+ : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
|
|
|
|
|
|
#elif defined(MY_CPU_X86) && defined(__PIC__) \
|
|
#elif defined(MY_CPU_X86) && defined(__PIC__) \
|
|
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
|
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
|
|
|
|
|
-#define x86_cpuid_MACRO(p, func) { \
|
|
|
|
|
|
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
|
|
__asm__ __volatile__ ( \
|
|
__asm__ __volatile__ ( \
|
|
ASM_LN "mov %%ebx, %k1" \
|
|
ASM_LN "mov %%ebx, %k1" \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "xchg %%ebx, %k1" \
|
|
ASM_LN "xchg %%ebx, %k1" \
|
|
- : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
|
|
|
|
|
+ : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
|
|
|
|
|
|
#else
|
|
#else
|
|
|
|
|
|
-#define x86_cpuid_MACRO(p, func) { \
|
|
|
|
|
|
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
|
|
__asm__ __volatile__ ( \
|
|
__asm__ __volatile__ ( \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "cpuid" \
|
|
- : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
|
|
|
|
|
+ : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
|
|
|
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0)
|
|
|
|
|
|
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
{
|
|
{
|
|
x86_cpuid_MACRO(p, func)
|
|
x86_cpuid_MACRO(p, func)
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static
|
|
|
|
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
|
|
|
|
+{
|
|
|
|
+ x86_cpuid_MACRO_2(p, func, subFunc)
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
|
|
Z7_NO_INLINE
|
|
Z7_NO_INLINE
|
|
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
@@ -205,11 +211,39 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
__asm ret 0
|
|
__asm ret 0
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static
|
|
|
|
+void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
|
|
|
|
+{
|
|
|
|
+ UNUSED_VAR(p)
|
|
|
|
+ UNUSED_VAR(func)
|
|
|
|
+ UNUSED_VAR(subFunc)
|
|
|
|
+ __asm push ebx
|
|
|
|
+ __asm push edi
|
|
|
|
+ __asm mov edi, ecx // p
|
|
|
|
+ __asm mov eax, edx // func
|
|
|
|
+ __asm mov ecx, [esp + 12] // subFunc
|
|
|
|
+ __asm cpuid
|
|
|
|
+ __asm mov [edi ], eax
|
|
|
|
+ __asm mov [edi + 4], ebx
|
|
|
|
+ __asm mov [edi + 8], ecx
|
|
|
|
+ __asm mov [edi + 12], edx
|
|
|
|
+ __asm pop edi
|
|
|
|
+ __asm pop ebx
|
|
|
|
+ __asm ret 4
|
|
|
|
+}
|
|
|
|
+
|
|
#else // MY_CPU_AMD64
|
|
#else // MY_CPU_AMD64
|
|
|
|
|
|
#if _MSC_VER >= 1600
|
|
#if _MSC_VER >= 1600
|
|
#include <intrin.h>
|
|
#include <intrin.h>
|
|
#define MY_cpuidex __cpuidex
|
|
#define MY_cpuidex __cpuidex
|
|
|
|
+
|
|
|
|
+static
|
|
|
|
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
|
|
|
|
+{
|
|
|
|
+ __cpuidex((int *)p, func, subFunc);
|
|
|
|
+}
|
|
|
|
+
|
|
#else
|
|
#else
|
|
/*
|
|
/*
|
|
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
|
|
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
|
|
@@ -219,7 +253,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
We still can use __cpuid for low (func) values that don't require ECX,
|
|
We still can use __cpuid for low (func) values that don't require ECX,
|
|
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
|
|
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
|
|
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
|
|
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
|
|
- where ECX value is first parameter for FASTCALL / NO_INLINE func,
|
|
|
|
|
|
+ where ECX value is first parameter for FASTCALL / NO_INLINE func.
|
|
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
|
|
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
|
|
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
|
|
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
|
|
|
|
|
|
@@ -233,6 +267,11 @@ Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int
|
|
}
|
|
}
|
|
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
|
|
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
|
|
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
|
|
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
|
|
|
|
+static
|
|
|
|
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
|
|
|
|
+{
|
|
|
|
+ MY_cpuidex_HACK(subFunc, func, (Int32 *)p);
|
|
|
|
+}
|
|
#endif // _MSC_VER >= 1600
|
|
#endif // _MSC_VER >= 1600
|
|
|
|
|
|
#if !defined(MY_CPU_AMD64)
|
|
#if !defined(MY_CPU_AMD64)
|
|
@@ -445,6 +484,23 @@ BoolInt CPU_IsSupported_SHA(void)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
|
|
+BoolInt CPU_IsSupported_SHA512(void)
|
|
|
|
+{
|
|
|
|
+ if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here
|
|
|
|
+
|
|
|
|
+ if (z7_x86_cpuid_GetMaxFunc() < 7)
|
|
|
|
+ return False;
|
|
|
|
+ {
|
|
|
|
+ UInt32 d[4];
|
|
|
|
+ z7_x86_cpuid_subFunc(d, 7, 0);
|
|
|
|
+ if (d[0] < 1) // d[0] - is max supported subleaf value
|
|
|
|
+ return False;
|
|
|
|
+ z7_x86_cpuid_subFunc(d, 7, 1);
|
|
|
|
+ return (BoolInt)(d[0]) & 1;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
|
|
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
|
|
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
|
|
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
|
|
@@ -776,6 +832,18 @@ BoolInt CPU_IsSupported_NEON(void)
|
|
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
|
|
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+BoolInt CPU_IsSupported_SHA512(void)
|
|
|
|
+{
|
|
|
|
+ return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512");
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+BoolInt CPU_IsSupported_SHA3(void)
|
|
|
|
+{
|
|
|
|
+ return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3");
|
|
|
|
+}
|
|
|
|
+*/
|
|
|
|
+
|
|
#ifdef MY_CPU_ARM64
|
|
#ifdef MY_CPU_ARM64
|
|
#define APPLE_CRYPTO_SUPPORT_VAL 1
|
|
#define APPLE_CRYPTO_SUPPORT_VAL 1
|
|
#else
|
|
#else
|
|
@@ -860,6 +928,19 @@ MY_HWCAP_CHECK_FUNC (CRC32)
|
|
MY_HWCAP_CHECK_FUNC (SHA1)
|
|
MY_HWCAP_CHECK_FUNC (SHA1)
|
|
MY_HWCAP_CHECK_FUNC (SHA2)
|
|
MY_HWCAP_CHECK_FUNC (SHA2)
|
|
MY_HWCAP_CHECK_FUNC (AES)
|
|
MY_HWCAP_CHECK_FUNC (AES)
|
|
|
|
+#ifdef MY_CPU_ARM64
|
|
|
|
+// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017.
|
|
|
|
+// we define them here, if they are not defined
|
|
|
|
+#ifndef HWCAP_SHA3
|
|
|
|
+// #define HWCAP_SHA3 (1 << 17)
|
|
|
|
+#endif
|
|
|
|
+#ifndef HWCAP_SHA512
|
|
|
|
+// #pragma message("=== HWCAP_SHA512 define === ")
|
|
|
|
+#define HWCAP_SHA512 (1 << 21)
|
|
|
|
+#endif
|
|
|
|
+MY_HWCAP_CHECK_FUNC (SHA512)
|
|
|
|
+// MY_HWCAP_CHECK_FUNC (SHA3)
|
|
|
|
+#endif
|
|
|
|
|
|
#endif // __APPLE__
|
|
#endif // __APPLE__
|
|
#endif // _WIN32
|
|
#endif // _WIN32
|