math_sycl.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../sys/platform.h"
  5. #include "../sys/intrinsics.h"
  6. #include "constants.h"
  7. #include <cmath>
  8. namespace embree
  9. {
  10. __forceinline bool isvalid ( const float& v ) {
  11. return (v > -FLT_LARGE) & (v < +FLT_LARGE);
  12. }
  13. __forceinline int cast_f2i(float f) {
  14. return __builtin_bit_cast(int,f);
  15. }
  16. __forceinline float cast_i2f(int i) {
  17. return __builtin_bit_cast(float,i);
  18. }
  19. __forceinline int toInt (const float& a) { return int(a); }
  20. __forceinline float toFloat(const int& a) { return float(a); }
  21. __forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
  22. __forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
  23. //__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
  24. __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
  25. __forceinline float sqr ( const float x ) { return x*x; }
  26. __forceinline float rcp ( const float x ) {
  27. return sycl::native::recip(x);
  28. }
  29. __forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
  30. //__forceinline float signmsk ( const float x ) {
  31. // return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
  32. //}
  33. //__forceinline float xorf( const float x, const float y ) {
  34. // return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
  35. //}
  36. //__forceinline float andf( const float x, const unsigned y ) {
  37. // return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
  38. //}
  39. __forceinline float rsqrt( const float x ) {
  40. return sycl::rsqrt(x);
  41. }
  42. //__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
  43. //__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
  44. //__forceinline int roundf(float f) { return (int)(f + 0.5f); }
  45. __forceinline float abs ( const float x ) { return sycl::fabs(x); }
  46. __forceinline float acos ( const float x ) { return sycl::acos(x); }
  47. __forceinline float asin ( const float x ) { return sycl::asin(x); }
  48. __forceinline float atan ( const float x ) { return sycl::atan(x); }
  49. __forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
  50. __forceinline float cos ( const float x ) { return sycl::cos(x); }
  51. __forceinline float cosh ( const float x ) { return sycl::cosh(x); }
  52. __forceinline float exp ( const float x ) { return sycl::exp(x); }
  53. __forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
  54. __forceinline float log ( const float x ) { return sycl::log(x); }
  55. __forceinline float log10( const float x ) { return sycl::log10(x); }
  56. __forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
  57. __forceinline float sin ( const float x ) { return sycl::sin(x); }
  58. __forceinline float sinh ( const float x ) { return sycl::sinh(x); }
  59. __forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
  60. __forceinline float tan ( const float x ) { return sycl::tan(x); }
  61. __forceinline float tanh ( const float x ) { return sycl::tanh(x); }
  62. __forceinline float floor( const float x ) { return sycl::floor(x); }
  63. __forceinline float ceil ( const float x ) { return sycl::ceil(x); }
  64. __forceinline float frac ( const float x ) { return x-floor(x); }
  65. //__forceinline double abs ( const double x ) { return ::fabs(x); }
  66. //__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
  67. //__forceinline double acos ( const double x ) { return ::acos (x); }
  68. //__forceinline double asin ( const double x ) { return ::asin (x); }
  69. //__forceinline double atan ( const double x ) { return ::atan (x); }
  70. //__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
  71. //__forceinline double cos ( const double x ) { return ::cos (x); }
  72. //__forceinline double cosh ( const double x ) { return ::cosh (x); }
  73. //__forceinline double exp ( const double x ) { return ::exp (x); }
  74. //__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
  75. //__forceinline double log ( const double x ) { return ::log (x); }
  76. //__forceinline double log10( const double x ) { return ::log10(x); }
  77. //__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
  78. //__forceinline double rcp ( const double x ) { return 1.0/x; }
  79. //__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
  80. //__forceinline double sin ( const double x ) { return ::sin (x); }
  81. //__forceinline double sinh ( const double x ) { return ::sinh (x); }
  82. //__forceinline double sqr ( const double x ) { return x*x; }
  83. //__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
  84. //__forceinline double tan ( const double x ) { return ::tan (x); }
  85. //__forceinline double tanh ( const double x ) { return ::tanh (x); }
  86. //__forceinline double floor( const double x ) { return ::floor (x); }
  87. //__forceinline double ceil ( const double x ) { return ::ceil (x); }
  88. /*
  89. #if defined(__SSE4_1__)
  90. __forceinline float mini(float a, float b) {
  91. const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
  92. const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
  93. const __m128i ci = _mm_min_epi32(ai,bi);
  94. return _mm_cvtss_f32(_mm_castsi128_ps(ci));
  95. }
  96. #endif
  97. #if defined(__SSE4_1__)
  98. __forceinline float maxi(float a, float b) {
  99. const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
  100. const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
  101. const __m128i ci = _mm_max_epi32(ai,bi);
  102. return _mm_cvtss_f32(_mm_castsi128_ps(ci));
  103. }
  104. #endif
  105. */
  106. template<typename T>
  107. __forceinline T twice(const T& a) { return a+a; }
  108. __forceinline int min(int a, int b) { return sycl::min(a,b); }
  109. __forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
  110. __forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
  111. __forceinline float min(float a, float b) { return sycl::fmin(a,b); }
  112. __forceinline double min(double a, double b) { return sycl::fmin(a,b); }
  113. #if defined(__X86_64__)
  114. __forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
  115. #endif
  116. template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
  117. template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
  118. template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
  119. // template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
  120. // template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
  121. // template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
  122. __forceinline int max(int a, int b) { return sycl::max(a,b); }
  123. __forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
  124. __forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
  125. __forceinline float max(float a, float b) { return sycl::fmax(a,b); }
  126. __forceinline double max(double a, double b) { return sycl::fmax(a,b); }
  127. #if defined(__X86_64__)
  128. __forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
  129. #endif
  130. template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
  131. template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
  132. template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
  133. // template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
  134. // template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
  135. // template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
  136. template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
  137. template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
  138. template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
  139. template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
  140. template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
  141. template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
  142. __forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
  143. __forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
  144. __forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
  145. __forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
  146. /*! random functions */
  147. /*
  148. template<typename T> T random() { return T(0); }
  149. template<> __forceinline int random() { return int(rand()); }
  150. template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
  151. template<> __forceinline float random() { return rand()/float(RAND_MAX); }
  152. template<> __forceinline double random() { return rand()/double(RAND_MAX); }
  153. */
  154. /*! selects */
  155. __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
  156. __forceinline int select(bool s, int t, int f) { return s ? t : f; }
  157. __forceinline float select(bool s, float t, float f) { return s ? t : f; }
  158. __forceinline bool none(bool s) { return !s; }
  159. __forceinline bool all (bool s) { return s; }
  160. __forceinline bool any (bool s) { return s; }
  161. __forceinline unsigned movemask (bool s) { return (unsigned)s; }
  162. __forceinline float lerp(const float v0, const float v1, const float t) {
  163. return madd(1.0f-t,v0,t*v1);
  164. }
  165. template<typename T>
  166. __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
  167. return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
  168. }
  169. /*! exchange */
  170. template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
  171. /* load/store */
  172. template<typename Ty> struct mem;
  173. template<> struct mem<float> {
  174. static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
  175. static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
  176. static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
  177. static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
  178. };
  179. /*! bit reverse operation */
  180. template<class T>
  181. __forceinline T bitReverse(const T& vin)
  182. {
  183. T v = vin;
  184. v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
  185. v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
  186. v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
  187. v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
  188. v = ( v >> 16 ) | ( v << 16);
  189. return v;
  190. }
  191. /*! bit interleave operation */
  192. template<class T>
  193. __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
  194. {
  195. T x = xin, y = yin, z = zin;
  196. x = (x | (x << 16)) & 0x030000FF;
  197. x = (x | (x << 8)) & 0x0300F00F;
  198. x = (x | (x << 4)) & 0x030C30C3;
  199. x = (x | (x << 2)) & 0x09249249;
  200. y = (y | (y << 16)) & 0x030000FF;
  201. y = (y | (y << 8)) & 0x0300F00F;
  202. y = (y | (y << 4)) & 0x030C30C3;
  203. y = (y | (y << 2)) & 0x09249249;
  204. z = (z | (z << 16)) & 0x030000FF;
  205. z = (z | (z << 8)) & 0x0300F00F;
  206. z = (z | (z << 4)) & 0x030C30C3;
  207. z = (z | (z << 2)) & 0x09249249;
  208. return x | (y << 1) | (z << 2);
  209. }
  210. /*! bit interleave operation for 64bit data types*/
  211. template<class T>
  212. __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
  213. T x = xin & 0x1fffff;
  214. T y = yin & 0x1fffff;
  215. T z = zin & 0x1fffff;
  216. x = (x | x << 32) & 0x1f00000000ffff;
  217. x = (x | x << 16) & 0x1f0000ff0000ff;
  218. x = (x | x << 8) & 0x100f00f00f00f00f;
  219. x = (x | x << 4) & 0x10c30c30c30c30c3;
  220. x = (x | x << 2) & 0x1249249249249249;
  221. y = (y | y << 32) & 0x1f00000000ffff;
  222. y = (y | y << 16) & 0x1f0000ff0000ff;
  223. y = (y | y << 8) & 0x100f00f00f00f00f;
  224. y = (y | y << 4) & 0x10c30c30c30c30c3;
  225. y = (y | y << 2) & 0x1249249249249249;
  226. z = (z | z << 32) & 0x1f00000000ffff;
  227. z = (z | z << 16) & 0x1f0000ff0000ff;
  228. z = (z | z << 8) & 0x100f00f00f00f00f;
  229. z = (z | z << 4) & 0x10c30c30c30c30c3;
  230. z = (z | z << 2) & 0x1249249249249249;
  231. return x | (y << 1) | (z << 2);
  232. }
  233. }