fma4intrin.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __X86INTRIN_H
  24. #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
  25. #endif
  26. #ifndef __FMA4INTRIN_H
  27. #define __FMA4INTRIN_H
  28. #ifndef __FMA4__
  29. # error "FMA4 instruction set is not enabled"
  30. #else
  31. #include <pmmintrin.h>
  32. /* Define the default attributes for the functions in this file. */
  33. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
  34. static __inline__ __m128 __DEFAULT_FN_ATTRS
  35. _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
  36. {
  37. return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
  38. }
  39. static __inline__ __m128d __DEFAULT_FN_ATTRS
  40. _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
  41. {
  42. return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
  43. }
  44. static __inline__ __m128 __DEFAULT_FN_ATTRS
  45. _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
  46. {
  47. return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
  48. }
  49. static __inline__ __m128d __DEFAULT_FN_ATTRS
  50. _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
  51. {
  52. return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
  53. }
  54. static __inline__ __m128 __DEFAULT_FN_ATTRS
  55. _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
  56. {
  57. return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
  58. }
  59. static __inline__ __m128d __DEFAULT_FN_ATTRS
  60. _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
  61. {
  62. return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
  63. }
  64. static __inline__ __m128 __DEFAULT_FN_ATTRS
  65. _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
  66. {
  67. return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
  68. }
  69. static __inline__ __m128d __DEFAULT_FN_ATTRS
  70. _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
  71. {
  72. return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
  73. }
  74. static __inline__ __m128 __DEFAULT_FN_ATTRS
  75. _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
  76. {
  77. return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
  78. }
  79. static __inline__ __m128d __DEFAULT_FN_ATTRS
  80. _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
  81. {
  82. return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
  83. }
  84. static __inline__ __m128 __DEFAULT_FN_ATTRS
  85. _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
  86. {
  87. return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
  88. }
  89. static __inline__ __m128d __DEFAULT_FN_ATTRS
  90. _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
  91. {
  92. return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
  93. }
  94. static __inline__ __m128 __DEFAULT_FN_ATTRS
  95. _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
  96. {
  97. return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
  98. }
  99. static __inline__ __m128d __DEFAULT_FN_ATTRS
  100. _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
  101. {
  102. return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
  103. }
  104. static __inline__ __m128 __DEFAULT_FN_ATTRS
  105. _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
  106. {
  107. return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
  108. }
  109. static __inline__ __m128d __DEFAULT_FN_ATTRS
  110. _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
  111. {
  112. return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
  113. }
  114. static __inline__ __m128 __DEFAULT_FN_ATTRS
  115. _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
  116. {
  117. return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
  118. }
  119. static __inline__ __m128d __DEFAULT_FN_ATTRS
  120. _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
  121. {
  122. return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
  123. }
  124. static __inline__ __m128 __DEFAULT_FN_ATTRS
  125. _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
  126. {
  127. return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
  128. }
  129. static __inline__ __m128d __DEFAULT_FN_ATTRS
  130. _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
  131. {
  132. return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
  133. }
  134. static __inline__ __m256 __DEFAULT_FN_ATTRS
  135. _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
  136. {
  137. return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
  138. }
  139. static __inline__ __m256d __DEFAULT_FN_ATTRS
  140. _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
  141. {
  142. return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
  143. }
  144. static __inline__ __m256 __DEFAULT_FN_ATTRS
  145. _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
  146. {
  147. return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
  148. }
  149. static __inline__ __m256d __DEFAULT_FN_ATTRS
  150. _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
  151. {
  152. return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
  153. }
  154. static __inline__ __m256 __DEFAULT_FN_ATTRS
  155. _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
  156. {
  157. return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
  158. }
  159. static __inline__ __m256d __DEFAULT_FN_ATTRS
  160. _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
  161. {
  162. return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
  163. }
  164. static __inline__ __m256 __DEFAULT_FN_ATTRS
  165. _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
  166. {
  167. return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
  168. }
  169. static __inline__ __m256d __DEFAULT_FN_ATTRS
  170. _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
  171. {
  172. return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
  173. }
  174. static __inline__ __m256 __DEFAULT_FN_ATTRS
  175. _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
  176. {
  177. return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
  178. }
  179. static __inline__ __m256d __DEFAULT_FN_ATTRS
  180. _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
  181. {
  182. return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
  183. }
  184. static __inline__ __m256 __DEFAULT_FN_ATTRS
  185. _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
  186. {
  187. return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
  188. }
  189. static __inline__ __m256d __DEFAULT_FN_ATTRS
  190. _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
  191. {
  192. return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
  193. }
  194. #undef __DEFAULT_FN_ATTRS
  195. #endif /* __FMA4__ */
  196. #endif /* __FMA4INTRIN_H */