setup_arch.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /* Copyright (C) 2013 Povilas Kanapickas <[email protected]>
  2. Distributed under the Boost Software License, Version 1.0.
  3. (See accompanying file LICENSE_1_0.txt or copy at
  4. http://www.boost.org/LICENSE_1_0.txt)
  5. */
  6. #ifndef LIBSIMDPP_SIMD_SETUP_ARCH_H
  7. #define LIBSIMDPP_SIMD_SETUP_ARCH_H
  8. #include <simdpp/detail/preprocessor.h>
  9. #include <simdpp/detail/preprocessor/stringize.hpp>
  10. // Set up macros for current architecture. Note that this file may be included
  11. // multiple times, more information on the caveats are within the file.
  12. #include <simdpp/detail/preprocess_single_arch.h>
  13. // Set up main feature macros
  14. #if SIMDPP_ARCH_PP_USE_NULL
  15. #define SIMDPP_USE_NULL 1
  16. #else
  17. #define SIMDPP_USE_NULL 0
  18. #endif
  19. #if SIMDPP_ARCH_PP_USE_SSE2
  20. #define SIMDPP_USE_SSE2 1
  21. #else
  22. #define SIMDPP_USE_SSE2 0
  23. #endif
  24. #if SIMDPP_ARCH_PP_USE_SSE3
  25. #define SIMDPP_USE_SSE3 1
  26. #else
  27. #define SIMDPP_USE_SSE3 0
  28. #endif
  29. #if SIMDPP_ARCH_PP_USE_SSSE3
  30. #define SIMDPP_USE_SSSE3 1
  31. #else
  32. #define SIMDPP_USE_SSSE3 0
  33. #endif
  34. #if SIMDPP_ARCH_PP_USE_SSE4_1
  35. #define SIMDPP_USE_SSE4_1 1
  36. #else
  37. #define SIMDPP_USE_SSE4_1 0
  38. #endif
  39. #if SIMDPP_ARCH_PP_USE_X86_POPCNT_INSN
  40. #define SIMDPP_USE_X86_POPCNT_INSN 1
  41. #else
  42. #define SIMDPP_USE_X86_POPCNT_INSN 0
  43. #endif
  44. #if SIMDPP_ARCH_PP_USE_AVX
  45. #define SIMDPP_USE_AVX 1
  46. #else
  47. #define SIMDPP_USE_AVX 0
  48. #endif
  49. #if SIMDPP_ARCH_PP_USE_AVX2
  50. #define SIMDPP_USE_AVX2 1
  51. #else
  52. #define SIMDPP_USE_AVX2 0
  53. #endif
  54. #if SIMDPP_ARCH_PP_USE_FMA3
  55. #define SIMDPP_USE_FMA3 1
  56. #else
  57. #define SIMDPP_USE_FMA3 0
  58. #endif
  59. #if SIMDPP_ARCH_PP_USE_FMA4
  60. #define SIMDPP_USE_FMA4 1
  61. #else
  62. #define SIMDPP_USE_FMA4 0
  63. #endif
  64. #if SIMDPP_ARCH_PP_USE_XOP
  65. #define SIMDPP_USE_XOP 1
  66. #else
  67. #define SIMDPP_USE_XOP 0
  68. #endif
  69. #if SIMDPP_ARCH_PP_USE_AVX512F
  70. #define SIMDPP_USE_AVX512F 1
  71. #else
  72. #define SIMDPP_USE_AVX512F 0
  73. #endif
  74. #if SIMDPP_ARCH_PP_USE_AVX512BW
  75. #define SIMDPP_USE_AVX512BW 1
  76. #else
  77. #define SIMDPP_USE_AVX512BW 0
  78. #endif
  79. #if SIMDPP_ARCH_PP_USE_AVX512DQ
  80. #define SIMDPP_USE_AVX512DQ 1
  81. #else
  82. #define SIMDPP_USE_AVX512DQ 0
  83. #endif
  84. #if SIMDPP_ARCH_PP_USE_AVX512VL
  85. #define SIMDPP_USE_AVX512VL 1
  86. #else
  87. #define SIMDPP_USE_AVX512VL 0
  88. #endif
  89. #if SIMDPP_ARCH_PP_USE_NEON
  90. #define SIMDPP_USE_NEON 1
  91. #else
  92. #define SIMDPP_USE_NEON 0
  93. #endif
  94. #if SIMDPP_ARCH_PP_USE_NEON_FLT_SP
  95. #define SIMDPP_USE_NEON_FLT_SP 1
  96. #else
  97. #define SIMDPP_USE_NEON_FLT_SP 0
  98. #endif
  99. #if SIMDPP_ARCH_PP_USE_ALTIVEC
  100. #define SIMDPP_USE_ALTIVEC 1
  101. #else
  102. #define SIMDPP_USE_ALTIVEC 0
  103. #endif
  104. #if SIMDPP_ARCH_PP_USE_VSX_206
  105. #define SIMDPP_USE_VSX_206 1
  106. #else
  107. #define SIMDPP_USE_VSX_206 0
  108. #endif
  109. #if SIMDPP_ARCH_PP_USE_VSX_207
  110. #define SIMDPP_USE_VSX_207 1
  111. #else
  112. #define SIMDPP_USE_VSX_207 0
  113. #endif
  114. #if SIMDPP_ARCH_PP_USE_MSA
  115. #define SIMDPP_USE_MSA 1
  116. #else
  117. #define SIMDPP_USE_MSA 0
  118. #endif
  119. // Generate SIMDPP_ARCH_NAMESPACE. It's a human-readable identifier depending
  120. // on the enabled instruction sets
  121. #if SIMDPP_ARCH_PP_NS_USE_NULL
  122. #define SIMDPP_NS_ID_NULL SIMDPP_INSN_ID_NULL
  123. #else
  124. #define SIMDPP_NS_ID_NULL
  125. #endif
  126. #if SIMDPP_ARCH_PP_NS_USE_SSE2
  127. #define SIMDPP_NS_ID_SSE2 SIMDPP_INSN_ID_SSE2
  128. #else
  129. #define SIMDPP_NS_ID_SSE2
  130. #endif
  131. #if SIMDPP_ARCH_PP_NS_USE_SSE3
  132. #define SIMDPP_NS_ID_SSE3 SIMDPP_INSN_ID_SSE3
  133. #else
  134. #define SIMDPP_NS_ID_SSE3
  135. #endif
  136. #if SIMDPP_ARCH_PP_NS_USE_SSSE3
  137. #define SIMDPP_NS_ID_SSSE3 SIMDPP_INSN_ID_SSSE3
  138. #else
  139. #define SIMDPP_NS_ID_SSSE3
  140. #endif
  141. #if SIMDPP_ARCH_PP_NS_USE_SSE4_1
  142. #define SIMDPP_NS_ID_SSE4_1 SIMDPP_INSN_ID_SSE4_1
  143. #else
  144. #define SIMDPP_NS_ID_SSE4_1
  145. #endif
  146. #if SIMDPP_ARCH_PP_NS_USE_POPCNT_INSN
  147. #define SIMDPP_NS_ID_POPCNT_INSN SIMDPP_INSN_ID_POPCNT_INSN
  148. #else
  149. #define SIMDPP_NS_ID_POPCNT_INSN
  150. #endif
  151. #if SIMDPP_ARCH_PP_NS_USE_AVX
  152. #define SIMDPP_NS_ID_AVX SIMDPP_INSN_ID_AVX
  153. #else
  154. #define SIMDPP_NS_ID_AVX
  155. #endif
  156. #if SIMDPP_ARCH_PP_NS_USE_AVX2
  157. #define SIMDPP_NS_ID_AVX2 SIMDPP_INSN_ID_AVX2
  158. #else
  159. #define SIMDPP_NS_ID_AVX2
  160. #endif
  161. #if SIMDPP_ARCH_PP_NS_USE_FMA3
  162. #define SIMDPP_NS_ID_FMA3 SIMDPP_INSN_ID_FMA3
  163. #else
  164. #define SIMDPP_NS_ID_FMA3
  165. #endif
  166. #if SIMDPP_ARCH_PP_NS_USE_FMA4
  167. #define SIMDPP_NS_ID_FMA4 SIMDPP_INSN_ID_FMA4
  168. #else
  169. #define SIMDPP_NS_ID_FMA4
  170. #endif
  171. #if SIMDPP_ARCH_PP_NS_USE_XOP
  172. #define SIMDPP_NS_ID_XOP SIMDPP_INSN_ID_XOP
  173. #else
  174. #define SIMDPP_NS_ID_XOP
  175. #endif
  176. #if SIMDPP_ARCH_PP_NS_USE_AVX512F
  177. #define SIMDPP_NS_ID_AVX512F SIMDPP_INSN_ID_AVX512F
  178. #else
  179. #define SIMDPP_NS_ID_AVX512F
  180. #endif
  181. #if SIMDPP_ARCH_PP_NS_USE_AVX512BW
  182. #define SIMDPP_NS_ID_AVX512BW SIMDPP_INSN_ID_AVX512BW
  183. #else
  184. #define SIMDPP_NS_ID_AVX512BW
  185. #endif
  186. #if SIMDPP_ARCH_PP_NS_USE_AVX512DQ
  187. #define SIMDPP_NS_ID_AVX512DQ SIMDPP_INSN_ID_AVX512DQ
  188. #else
  189. #define SIMDPP_NS_ID_AVX512DQ
  190. #endif
  191. #if SIMDPP_ARCH_PP_NS_USE_AVX512VL
  192. #define SIMDPP_NS_ID_AVX512VL SIMDPP_INSN_ID_AVX512VL
  193. #else
  194. #define SIMDPP_NS_ID_AVX512VL
  195. #endif
  196. #if SIMDPP_ARCH_PP_NS_USE_NEON
  197. #define SIMDPP_NS_ID_NEON SIMDPP_INSN_ID_NEON
  198. #else
  199. #define SIMDPP_NS_ID_NEON
  200. #endif
  201. #if SIMDPP_ARCH_PP_NS_USE_NEON_FLT_SP
  202. #define SIMDPP_NS_ID_NEON_FLT_SP SIMDPP_INSN_ID_NEON_FLT_SP
  203. #else
  204. #define SIMDPP_NS_ID_NEON_FLT_SP
  205. #endif
  206. #if SIMDPP_ARCH_PP_NS_USE_ALTIVEC
  207. #define SIMDPP_NS_ID_ALTIVEC SIMDPP_INSN_ID_ALTIVEC
  208. #else
  209. #define SIMDPP_NS_ID_ALTIVEC
  210. #endif
  211. #if SIMDPP_ARCH_PP_NS_USE_VSX_206
  212. #define SIMDPP_NS_ID_VSX_206 SIMDPP_INSN_ID_VSX_206
  213. #else
  214. #define SIMDPP_NS_ID_VSX_206
  215. #endif
  216. #if SIMDPP_ARCH_PP_NS_USE_VSX_207
  217. #define SIMDPP_NS_ID_VSX_207 SIMDPP_INSN_ID_VSX_207
  218. #else
  219. #define SIMDPP_NS_ID_VSX_207
  220. #endif
  221. #if SIMDPP_ARCH_PP_NS_USE_MSA
  222. #define SIMDPP_NS_ID_MSA SIMDPP_INSN_ID_MSA
  223. #else
  224. #define SIMDPP_NS_ID_MSA
  225. #endif
  226. #define SIMDPP_ARCH_NAMESPACE SIMDPP_PP_PASTE22(arch, \
  227. SIMDPP_NS_ID_NULL, \
  228. SIMDPP_NS_ID_SSE2, \
  229. SIMDPP_NS_ID_SSE3, \
  230. SIMDPP_NS_ID_SSSE3, \
  231. SIMDPP_NS_ID_SSE4_1, \
  232. SIMDPP_NS_ID_POPCNT_INSN, \
  233. SIMDPP_NS_ID_AVX, \
  234. SIMDPP_NS_ID_AVX2, \
  235. SIMDPP_NS_ID_AVX512F, \
  236. SIMDPP_NS_ID_AVX512BW, \
  237. SIMDPP_NS_ID_AVX512DQ, \
  238. SIMDPP_NS_ID_AVX512VL, \
  239. SIMDPP_NS_ID_FMA3, \
  240. SIMDPP_NS_ID_FMA4, \
  241. SIMDPP_NS_ID_XOP, \
  242. SIMDPP_NS_ID_NEON, \
  243. SIMDPP_NS_ID_NEON_FLT_SP, \
  244. SIMDPP_NS_ID_MSA, \
  245. SIMDPP_NS_ID_ALTIVEC, \
  246. SIMDPP_NS_ID_VSX_206, \
  247. SIMDPP_NS_ID_VSX_207)
  248. #define SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE SIMDPP_ARCH_NAMESPACE
  249. #define SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH 0
  250. #include <simdpp/dispatch/preprocess_single_compile_arch.h>
  251. #undef SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH
  252. #undef SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE
  253. // Include headers relevant for the enabled instruction sets.
  254. #if SIMDPP_USE_SSE2
  255. #include <xmmintrin.h>
  256. #include <emmintrin.h>
  257. #endif
  258. #if SIMDPP_USE_SSE3
  259. #include <pmmintrin.h>
  260. #endif
  261. #if SIMDPP_USE_SSSE3
  262. #include <tmmintrin.h>
  263. #endif
  264. #if SIMDPP_USE_SSE4_1
  265. #include <smmintrin.h>
  266. #endif
  267. #if SIMDPP_USE_AVX
  268. #include <immintrin.h>
  269. #endif
  270. #if SIMDPP_USE_AVX2
  271. #include <immintrin.h>
  272. #endif
  273. #if SIMDPP_USE_FMA3
  274. #include <immintrin.h>
  275. #endif
  276. #if SIMDPP_USE_FMA4
  277. #include <x86intrin.h>
  278. #if SIMDPP_USE_FMA3
  279. #error "X86_FMA3 and X86_FMA4 can't be used together"
  280. #endif
  281. #endif
  282. #if SIMDPP_USE_XOP
  283. #include <x86intrin.h>
  284. #endif
  285. #if SIMDPP_USE_AVX512F || SIMDPP_USE_AVX512BW
  286. #include <immintrin.h>
  287. #endif
  288. #if SIMDPP_USE_NEON || SIMDPP_USE_NEON_FLT_SP
  289. #include <arm_neon.h>
  290. #endif
  291. #if SIMDPP_USE_ALTIVEC
  292. #include <altivec.h>
  293. #undef vector
  294. #undef pixel
  295. #undef bool
  296. #endif
  297. #if SIMDPP_USE_MSA
  298. #include <msa.h>
  299. #endif
  300. // helper macros
  301. #if __amd64__ || __x86_64__ || _M_AMD64 || __aarch64__ || __powerpc64__
  302. #define SIMDPP_64_BITS 1
  303. #define SIMDPP_32_BITS 0
  304. #else
  305. #define SIMDPP_32_BITS 1
  306. #define SIMDPP_64_BITS 0
  307. #endif
  308. #if SIMDPP_USE_NEON && SIMDPP_64_BITS
  309. #undef SIMDPP_USE_NEON_FLT_SP
  310. #define SIMDPP_USE_NEON_FLT_SP 1
  311. #endif
  312. #if SIMDPP_USE_ALTIVEC
  313. #ifndef __BYTE_ORDER__
  314. #error "Could not determine byte order"
  315. #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  316. #define SIMDPP_LITTLE_ENDIAN 1
  317. #define SIMDPP_BIG_ENDIAN 0
  318. #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  319. #define SIMDPP_LITTLE_ENDIAN 0
  320. #define SIMDPP_BIG_ENDIAN 1
  321. #else
  322. #error "Could not determine byte order"
  323. #endif
  324. #endif
  325. #define SIMDPP_USE_NEON32 (SIMDPP_USE_NEON && SIMDPP_32_BITS)
  326. #define SIMDPP_USE_NEON64 (SIMDPP_USE_NEON && SIMDPP_64_BITS)
  327. #define SIMDPP_USE_NEON32_FLT_SP (SIMDPP_USE_NEON_FLT_SP && SIMDPP_32_BITS)
  328. #define SIMDPP_USE_NEON_NO_FLT_SP (SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP)
  329. #if __i386__ || __i386 || _M_IX86 || __amd64__ || __x64_64__ || _M_AMD64 || _M_X64
  330. #define SIMDPP_X86 1
  331. #elif _M_ARM || __arm__ || __aarch64__
  332. #define SIMDPP_ARM 1
  333. #elif __powerpc__ || __powerpc64__
  334. #define SIMDPP_PPC 1
  335. #elif __mips__
  336. #define SIMDPP_MIPS 1
  337. #endif
  338. /** @def SIMDPP_ARCH_NAME
  339. Usable in contexts where a string is required
  340. */
  341. #define SIMDPP_ARCH_NAME SIMDPP_PP_STRINGIZE(SIMDPP_ARCH_NAMESPACE)
  342. // misc macros
  343. #if __GNUC__
  344. #define SIMDPP_INL __attribute__((__always_inline__)) inline
  345. #elif _MSC_VER
  346. #define SIMDPP_INL __forceinline
  347. #else
  348. #define SIMDPP_INL inline
  349. #endif
  350. #if defined(__GNUC__) || defined(__clang__)
  351. #define SIMDPP_DEPRECATED(msg) __attribute__ ((deprecated(msg)))
  352. #else
  353. #define SIMDPP_DEPRECATED(msg)
  354. #endif
  355. #if __GNUC__
  356. #define SIMDPP_ALIGN(X) __attribute__((__aligned__(X)))
  357. #elif _MSC_VER
  358. #define SIMDPP_ALIGN(X) __declspec(align(X))
  359. #else
  360. #error "Unsupported compiler"
  361. #endif
  362. #define SIMDPP_LIBRARY_VERSION_CXX11 1
  363. #define SIMDPP_LIBRARY_VERSION_CXX98 0
  364. #include <simdpp/detail/workarounds.h>
  365. #include <simdpp/deprecations.h>
  366. // #define SIMDPP_EXPR_DEBUG 1
  367. // FIXME: unused (workarounds for AMD CPUs)
  368. // #define SIMDPP_USE_AMD
  369. #endif