msvc_armv7.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. /*
  2. Copyright (c) 2005-2020 Intel Corporation
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. #if !defined(__TBB_machine_H) || defined(__TBB_msvc_armv7_H)
  14. #error Do not #include this internal file directly; use public TBB headers instead.
  15. #endif
  16. #define __TBB_msvc_armv7_H
  17. #include <intrin.h>
  18. #include <float.h>
  19. #define __TBB_WORDSIZE 4
  20. #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
  21. #if defined(TBB_WIN32_USE_CL_BUILTINS)
  22. // We can test this on _M_IX86
  23. #pragma intrinsic(_ReadWriteBarrier)
  24. #pragma intrinsic(_mm_mfence)
  25. #define __TBB_compiler_fence() _ReadWriteBarrier()
  26. #define __TBB_full_memory_fence() _mm_mfence()
  27. #define __TBB_control_consistency_helper() __TBB_compiler_fence()
  28. #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
  29. #define __TBB_release_consistency_helper() __TBB_compiler_fence()
  30. #else
  31. //Now __dmb(_ARM_BARRIER_SY) is used for both compiler and memory fences
  32. //This might be changed later after testing
  33. #define __TBB_compiler_fence() __dmb(_ARM_BARRIER_SY)
  34. #define __TBB_full_memory_fence() __dmb(_ARM_BARRIER_SY)
  35. #define __TBB_control_consistency_helper() __TBB_compiler_fence()
  36. #define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
  37. #define __TBB_release_consistency_helper() __TBB_full_memory_fence()
  38. #endif
  39. //--------------------------------------------------
  40. // Compare and swap
  41. //--------------------------------------------------
  42. /**
  43. * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
  44. * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
  45. * @param value value to assign *ptr to if *ptr==comparand
  46. * @param comparand value to compare with *ptr
  47. * @return value originally in memory at ptr, regardless of success
  48. */
  49. #define __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(S,T,F) \
  50. inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
  51. return _InterlockedCompareExchange##F(reinterpret_cast<volatile T *>(ptr),value,comparand); \
  52. } \
  53. #define __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(S,T,F) \
  54. inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
  55. return _InterlockedExchangeAdd##F(reinterpret_cast<volatile T *>(ptr),value); \
  56. } \
  57. __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(1,char,8)
  58. __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(2,short,16)
  59. __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(4,long,)
  60. __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(8,__int64,64)
  61. __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(4,long,)
  62. #if defined(TBB_WIN32_USE_CL_BUILTINS)
  63. // No _InterlockedExchangeAdd64 intrinsic on _M_IX86
  64. #define __TBB_64BIT_ATOMICS 0
  65. #else
  66. __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(8,__int64,64)
  67. #endif
  68. inline void __TBB_machine_pause (int32_t delay )
  69. {
  70. while(delay>0)
  71. {
  72. __TBB_compiler_fence();
  73. delay--;
  74. }
  75. }
  76. // API to retrieve/update FPU control setting
  77. #define __TBB_CPU_CTL_ENV_PRESENT 1
  78. namespace tbb {
  79. namespace internal {
  80. template <typename T, size_t S>
  81. struct machine_load_store_relaxed {
  82. static inline T load ( const volatile T& location ) {
  83. const T value = location;
  84. /*
  85. * An extra memory barrier is required for errata #761319
  86. * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
  87. */
  88. __TBB_acquire_consistency_helper();
  89. return value;
  90. }
  91. static inline void store ( volatile T& location, T value ) {
  92. location = value;
  93. }
  94. };
  95. class cpu_ctl_env {
  96. private:
  97. unsigned int my_ctl;
  98. public:
  99. bool operator!=( const cpu_ctl_env& ctl ) const { return my_ctl != ctl.my_ctl; }
  100. void get_env() { my_ctl = _control87(0, 0); }
  101. void set_env() const { _control87( my_ctl, ~0U ); }
  102. };
  103. } // namespace internal
  104. } // namespaces tbb
  105. // Machine specific atomic operations
  106. #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
  107. #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
  108. #define __TBB_Pause(V) __TBB_machine_pause(V)
  109. // Use generics for some things
  110. #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
  111. #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
  112. #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
  113. #define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
  114. #define __TBB_USE_GENERIC_FETCH_STORE 1
  115. #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
  116. #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
  117. #if defined(TBB_WIN32_USE_CL_BUILTINS)
  118. #if !__TBB_WIN8UI_SUPPORT
  119. extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
  120. #define __TBB_Yield() SwitchToThread()
  121. #else
  122. #include<thread>
  123. #define __TBB_Yield() std::this_thread::yield()
  124. #endif
  125. #else
  126. #define __TBB_Yield() __yield()
  127. #endif
  128. // Machine specific atomic operations
  129. #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
  130. #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
  131. template <typename T1,typename T2>
  132. inline void __TBB_machine_OR( T1 *operand, T2 addend ) {
  133. _InterlockedOr((long volatile *)operand, (long)addend);
  134. }
  135. template <typename T1,typename T2>
  136. inline void __TBB_machine_AND( T1 *operand, T2 addend ) {
  137. _InterlockedAnd((long volatile *)operand, (long)addend);
  138. }