gcc_arm.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. Copyright (c) 2005-2020 Intel Corporation
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. /*
  14. Platform isolation layer for the ARMv7-a architecture.
  15. */
  16. #ifndef __TBB_machine_H
  17. #error Do not include this file directly; include tbb_machine.h instead
  18. #endif
  19. #if __ARM_ARCH_7A__
  20. #include <sys/param.h>
  21. #include <unistd.h>
  22. #define __TBB_WORDSIZE 4
  23. // Traditionally ARM is little-endian.
  24. // Note that, since only the layout of aligned 32-bit words is of interest,
  25. // any apparent PDP-endianness of 32-bit words at half-word alignment or
  26. // any little-endian ordering of big-endian 32-bit words in 64-bit quantities
  27. // may be disregarded for this setting.
  28. #if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
  29. #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
  30. #elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
  31. #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
  32. #elif defined(__BYTE_ORDER__)
  33. #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
  34. #else
  35. #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
  36. #endif
  37. #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
  38. #define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
  39. #define __TBB_control_consistency_helper() __TBB_full_memory_fence()
  40. #define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
  41. #define __TBB_release_consistency_helper() __TBB_full_memory_fence()
  42. //--------------------------------------------------
  43. // Compare and swap
  44. //--------------------------------------------------
  45. /**
  46. * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
  47. * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
  48. * @param value value to assign *ptr to if *ptr==comparand
  49. * @param comparand value to compare with *ptr
  50. * @return value originally in memory at ptr, regardless of success
  51. */
  52. static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
  53. {
  54. int32_t oldval, res;
  55. __TBB_full_memory_fence();
  56. do {
  57. __asm__ __volatile__(
  58. "ldrex %1, [%3]\n"
  59. "mov %0, #0\n"
  60. "cmp %1, %4\n"
  61. "it eq\n"
  62. "strexeq %0, %5, [%3]\n"
  63. : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
  64. : "r" ((volatile int32_t *)ptr), "Ir" (comparand), "r" (value)
  65. : "cc");
  66. } while (res);
  67. __TBB_full_memory_fence();
  68. return oldval;
  69. }
  70. /**
  71. * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
  72. * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
  73. * @param value value to assign *ptr to if *ptr==comparand
  74. * @param comparand value to compare with *ptr
  75. * @return value originally in memory at ptr, regardless of success
  76. */
  77. static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
  78. {
  79. int64_t oldval;
  80. int32_t res;
  81. __TBB_full_memory_fence();
  82. do {
  83. __asm__ __volatile__(
  84. "mov %0, #0\n"
  85. "ldrexd %1, %H1, [%3]\n"
  86. "cmp %1, %4\n"
  87. "it eq\n"
  88. "cmpeq %H1, %H4\n"
  89. "it eq\n"
  90. "strexdeq %0, %5, %H5, [%3]"
  91. : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
  92. : "r" ((volatile int64_t *)ptr), "r" (comparand), "r" (value)
  93. : "cc");
  94. } while (res);
  95. __TBB_full_memory_fence();
  96. return oldval;
  97. }
  98. static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
  99. {
  100. unsigned long tmp;
  101. int32_t result, tmp2;
  102. __TBB_full_memory_fence();
  103. __asm__ __volatile__(
  104. "1: ldrex %0, [%4]\n"
  105. " add %3, %0, %5\n"
  106. " strex %1, %3, [%4]\n"
  107. " cmp %1, #0\n"
  108. " bne 1b\n"
  109. : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
  110. : "r" ((volatile int32_t *)ptr), "Ir" (addend)
  111. : "cc");
  112. __TBB_full_memory_fence();
  113. return result;
  114. }
  115. static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
  116. {
  117. unsigned long tmp;
  118. int64_t result, tmp2;
  119. __TBB_full_memory_fence();
  120. __asm__ __volatile__(
  121. "1: ldrexd %0, %H0, [%4]\n"
  122. " adds %3, %0, %5\n"
  123. " adc %H3, %H0, %H5\n"
  124. " strexd %1, %3, %H3, [%4]\n"
  125. " cmp %1, #0\n"
  126. " bne 1b"
  127. : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
  128. : "r" ((volatile int64_t *)ptr), "r" (addend)
  129. : "cc");
  130. __TBB_full_memory_fence();
  131. return result;
  132. }
  133. namespace tbb {
  134. namespace internal {
  135. template <typename T, size_t S>
  136. struct machine_load_store_relaxed {
  137. static inline T load ( const volatile T& location ) {
  138. const T value = location;
  139. /*
  140. * An extra memory barrier is required for errata #761319
  141. * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
  142. */
  143. __TBB_acquire_consistency_helper();
  144. return value;
  145. }
  146. static inline void store ( volatile T& location, T value ) {
  147. location = value;
  148. }
  149. };
  150. }} // namespaces internal, tbb
  151. // Machine specific atomic operations
  152. #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
  153. #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
  154. // Use generics for some things
  155. #define __TBB_USE_GENERIC_PART_WORD_CAS 1
  156. #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
  157. #define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
  158. #define __TBB_USE_GENERIC_FETCH_STORE 1
  159. #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
  160. #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
  161. #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
  162. #elif defined __aarch64__
  163. // Generic gcc implementations are fine for ARMv8-a except __TBB_PAUSE.
  164. #include "gcc_generic.h"
  165. #else
  166. #error compilation requires an ARMv7-a or ARMv8-a architecture.
  167. #endif // __ARM_ARCH_7A__
  168. inline void __TBB_machine_pause (int32_t delay)
  169. {
  170. while(delay>0)
  171. {
  172. __asm__ __volatile__("yield" ::: "memory");
  173. delay--;
  174. }
  175. }
  176. #define __TBB_Pause(V) __TBB_machine_pause(V)