msvc_ia32_common.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /*
  2. Copyright (c) 2005-2020 Intel Corporation
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. #if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
  14. #error Do not #include this internal file directly; use public TBB headers instead.
  15. #endif
  16. #define __TBB_machine_msvc_ia32_common_H
  17. #include <intrin.h>
  18. //TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
  19. #if !_M_X64 || __INTEL_COMPILER
  20. #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
  21. #else
  22. //MSVC in x64 mode does not accept inline assembler
  23. #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
  24. #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
  25. #endif
  26. #if _M_X64
  27. #define __TBB_r(reg_name) r##reg_name
  28. #define __TBB_W(name) name##64
  29. namespace tbb { namespace internal { namespace msvc_intrinsics {
  30. typedef __int64 word;
  31. }}}
  32. #else
  33. #define __TBB_r(reg_name) e##reg_name
  34. #define __TBB_W(name) name
  35. namespace tbb { namespace internal { namespace msvc_intrinsics {
  36. typedef long word;
  37. }}}
  38. #endif
  39. #if __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT
  40. // S is the operand size in bytes, B is the suffix for intrinsics for that size
  41. #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U) \
  42. __pragma(intrinsic( _InterlockedCompareExchange##B )) \
  43. static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
  44. return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand ); \
  45. } \
  46. __pragma(intrinsic( _InterlockedExchangeAdd##B )) \
  47. static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
  48. return _InterlockedExchangeAdd##B ( (T*)ptr, addend ); \
  49. } \
  50. __pragma(intrinsic( _InterlockedExchange##B )) \
  51. static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
  52. return _InterlockedExchange##B ( (T*)ptr, value ); \
  53. }
  54. // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
  55. __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
  56. __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
  57. __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
  58. #if __TBB_WORDSIZE==8
  59. __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
  60. #endif
  61. #undef __TBB_MACHINE_DEFINE_ATOMICS
  62. #endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */
  63. #if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
  64. #pragma intrinsic(_ReadWriteBarrier)
  65. #pragma intrinsic(_mm_mfence)
  66. #define __TBB_compiler_fence() _ReadWriteBarrier()
  67. #define __TBB_full_memory_fence() _mm_mfence()
  68. #elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
  69. #define __TBB_compiler_fence() __asm { __asm nop }
  70. #define __TBB_full_memory_fence() __asm { __asm mfence }
  71. #else
  72. #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
  73. #endif
  74. #define __TBB_control_consistency_helper() __TBB_compiler_fence()
  75. #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
  76. #define __TBB_release_consistency_helper() __TBB_compiler_fence()
  77. #if (_MSC_VER>=1300) || (__INTEL_COMPILER)
  78. #pragma intrinsic(_mm_pause)
  79. namespace tbb { namespace internal { namespace msvc_intrinsics {
  80. static inline void pause (uintptr_t delay ) {
  81. for (;delay>0; --delay )
  82. _mm_pause();
  83. }
  84. }}}
  85. #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
  86. #define __TBB_SINGLE_PAUSE _mm_pause()
  87. #else
  88. #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
  89. #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
  90. #endif
  91. namespace tbb { namespace internal { namespace msvc_inline_asm
  92. static inline void pause (uintptr_t delay ) {
  93. _asm
  94. {
  95. mov __TBB_r(ax), delay
  96. __TBB_L1:
  97. pause
  98. add __TBB_r(ax), -1
  99. jne __TBB_L1
  100. }
  101. return;
  102. }
  103. }}}
  104. #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
  105. #define __TBB_SINGLE_PAUSE __asm pause
  106. #endif
  107. #if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
  108. // MSVC did not have this intrinsic prior to VC8.
  109. // ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
  110. #pragma intrinsic(__TBB_W(_BitScanReverse))
  111. namespace tbb { namespace internal { namespace msvc_intrinsics {
  112. static inline uintptr_t lg_bsr( uintptr_t i ){
  113. unsigned long j;
  114. __TBB_W(_BitScanReverse)( &j, i );
  115. return j;
  116. }
  117. }}}
  118. #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
  119. #else
  120. #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
  121. #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
  122. #endif
  123. namespace tbb { namespace internal { namespace msvc_inline_asm {
  124. static inline uintptr_t lg_bsr( uintptr_t i ){
  125. uintptr_t j;
  126. __asm
  127. {
  128. bsr __TBB_r(ax), i
  129. mov j, __TBB_r(ax)
  130. }
  131. return j;
  132. }
  133. }}}
  134. #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
  135. #endif
  136. #if _MSC_VER>=1400
  137. #pragma intrinsic(__TBB_W(_InterlockedOr))
  138. #pragma intrinsic(__TBB_W(_InterlockedAnd))
  139. namespace tbb { namespace internal { namespace msvc_intrinsics {
  140. static inline void lock_or( volatile void *operand, intptr_t addend ){
  141. __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
  142. }
  143. static inline void lock_and( volatile void *operand, intptr_t addend ){
  144. __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
  145. }
  146. }}}
  147. #define __TBB_AtomicOR(P,V) tbb::internal::msvc_intrinsics::lock_or(P,V)
  148. #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
  149. #else
  150. #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
  151. #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
  152. #endif
  153. namespace tbb { namespace internal { namespace msvc_inline_asm {
  154. static inline void lock_or( volatile void *operand, __int32 addend ) {
  155. __asm
  156. {
  157. mov eax, addend
  158. mov edx, [operand]
  159. lock or [edx], eax
  160. }
  161. }
  162. static inline void lock_and( volatile void *operand, __int32 addend ) {
  163. __asm
  164. {
  165. mov eax, addend
  166. mov edx, [operand]
  167. lock and [edx], eax
  168. }
  169. }
  170. }}}
  171. #define __TBB_AtomicOR(P,V) tbb::internal::msvc_inline_asm::lock_or(P,V)
  172. #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
  173. #endif
  174. #pragma intrinsic(__rdtsc)
  175. namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
  176. static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
  177. return __rdtsc();
  178. }
  179. #define __TBB_time_stamp() __TBB_machine_time_stamp()
  180. // API to retrieve/update FPU control setting
  181. #define __TBB_CPU_CTL_ENV_PRESENT 1
  182. namespace tbb { namespace internal { class cpu_ctl_env; } }
  183. #if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
  184. inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
  185. __asm {
  186. __asm mov __TBB_r(ax), ctl
  187. __asm stmxcsr [__TBB_r(ax)]
  188. __asm fstcw [__TBB_r(ax)+4]
  189. }
  190. }
  191. inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
  192. __asm {
  193. __asm mov __TBB_r(ax), ctl
  194. __asm ldmxcsr [__TBB_r(ax)]
  195. __asm fldcw [__TBB_r(ax)+4]
  196. }
  197. }
  198. #else
  199. extern "C" {
  200. void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
  201. void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
  202. }
  203. #endif
  204. namespace tbb {
  205. namespace internal {
  206. class cpu_ctl_env {
  207. private:
  208. int mxcsr;
  209. short x87cw;
  210. static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
  211. public:
  212. bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
  213. void get_env() {
  214. __TBB_get_cpu_ctl_env( this );
  215. mxcsr &= MXCSR_CONTROL_MASK;
  216. }
  217. void set_env() const { __TBB_set_cpu_ctl_env( this ); }
  218. };
  219. } // namespace internal
  220. } // namespace tbb
  221. #if !__TBB_WIN8UI_SUPPORT
  222. extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
  223. #define __TBB_Yield() SwitchToThread()
  224. #else
  225. #include<thread>
  226. #define __TBB_Yield() std::this_thread::yield()
  227. #endif
  228. #undef __TBB_r
  229. #undef __TBB_W
  230. #undef __TBB_word
  231. extern "C" {
  232. __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
  233. void __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
  234. // 'pause' instruction aborts HLE/RTM transactions
  235. inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
  236. #if __TBB_TSX_INTRINSICS_PRESENT
  237. #define __TBB_machine_is_in_transaction _xtest
  238. #define __TBB_machine_begin_transaction _xbegin
  239. #define __TBB_machine_end_transaction _xend
  240. // The value (0xFF) below comes from the
  241. // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
  242. #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
  243. #else
  244. __int8 __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
  245. unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
  246. void __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
  247. void __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
  248. #endif /* __TBB_TSX_INTRINSICS_PRESENT */
  249. }