vec3fa_sycl.h 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../sys/alloc.h"
  5. #include "emath.h"
  6. #include "../simd/sse.h"
  7. namespace embree
  8. {
  9. ////////////////////////////////////////////////////////////////////////////////
  10. /// SSE Vec3fa Type
  11. ////////////////////////////////////////////////////////////////////////////////
  12. struct __aligned(16) Vec3fa
  13. {
  14. //ALIGNED_STRUCT_(16);
  15. typedef float Scalar;
  16. enum { N = 3 };
  17. struct { float x,y,z, do_not_use; };
  18. ////////////////////////////////////////////////////////////////////////////////
  19. /// Constructors, Assignment & Cast Operators
  20. ////////////////////////////////////////////////////////////////////////////////
  21. __forceinline Vec3fa( ) {}
  22. //__forceinline Vec3fa( const __m128 a ) : m128(a) {}
  23. //__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
  24. __forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
  25. //__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
  26. __forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
  27. __forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
  28. __forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
  29. __forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
  30. __forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
  31. //__forceinline operator const __m128&() const { return m128; }
  32. //__forceinline operator __m128&() { return m128; }
  33. __forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
  34. //friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
  35. ////////////////////////////////////////////////////////////////////////////////
  36. /// Loads and Stores
  37. ////////////////////////////////////////////////////////////////////////////////
  38. static __forceinline Vec3fa load( const void* const a ) {
  39. const float* ptr = (const float*)a;
  40. return Vec3fa(ptr[0],ptr[1],ptr[2]);
  41. }
  42. static __forceinline Vec3fa loadu( const void* const a ) {
  43. const float* ptr = (const float*)a;
  44. return Vec3fa(ptr[0],ptr[1],ptr[2]);
  45. }
  46. static __forceinline void storeu ( void* a, const Vec3fa& v ) {
  47. float* ptr = (float*)a;
  48. ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
  49. }
  50. ////////////////////////////////////////////////////////////////////////////////
  51. /// Constants
  52. ////////////////////////////////////////////////////////////////////////////////
  53. __forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
  54. __forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
  55. __forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
  56. __forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
  57. ////////////////////////////////////////////////////////////////////////////////
  58. /// Array Access
  59. ////////////////////////////////////////////////////////////////////////////////
  60. __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
  61. __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
  62. };
  63. ////////////////////////////////////////////////////////////////////////////////
  64. /// Unary Operators
  65. ////////////////////////////////////////////////////////////////////////////////
  66. __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
  67. __forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
  68. __forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
  69. __forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
  70. //__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
  71. __forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
  72. __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
  73. __forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
  74. __forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
  75. __forceinline Vec3fa zero_fix(const Vec3fa& a) {
  76. const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
  77. const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
  78. const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
  79. return Vec3fa(x,y,z);
  80. }
  81. __forceinline Vec3fa rcp_safe(const Vec3fa& a) {
  82. return rcp(zero_fix(a));
  83. }
  84. __forceinline Vec3fa log ( const Vec3fa& a ) {
  85. return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
  86. }
  87. __forceinline Vec3fa exp ( const Vec3fa& a ) {
  88. return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
  89. }
  90. ////////////////////////////////////////////////////////////////////////////////
  91. /// Binary Operators
  92. ////////////////////////////////////////////////////////////////////////////////
  93. __forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
  94. __forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
  95. __forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
  96. __forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
  97. __forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
  98. __forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
  99. __forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
  100. __forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
  101. __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
  102. return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
  103. }
  104. __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
  105. return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
  106. }
  107. /*
  108. #if defined(__SSE4_1__)
  109. __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
  110. const vint4 ai = _mm_castps_si128(a);
  111. const vint4 bi = _mm_castps_si128(b);
  112. const vint4 ci = _mm_min_epi32(ai,bi);
  113. return _mm_castsi128_ps(ci);
  114. }
  115. #endif
  116. #if defined(__SSE4_1__)
  117. __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
  118. const vint4 ai = _mm_castps_si128(a);
  119. const vint4 bi = _mm_castps_si128(b);
  120. const vint4 ci = _mm_max_epi32(ai,bi);
  121. return _mm_castsi128_ps(ci);
  122. }
  123. #endif
  124. */
  125. __forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
  126. return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
  127. }
  128. ////////////////////////////////////////////////////////////////////////////////
  129. /// Ternary Operators
  130. ////////////////////////////////////////////////////////////////////////////////
  131. __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
  132. __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
  133. __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
  134. __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
  135. __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
  136. __forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
  137. __forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
  138. __forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
  139. ////////////////////////////////////////////////////////////////////////////////
  140. /// Assignment Operators
  141. ////////////////////////////////////////////////////////////////////////////////
  142. __forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
  143. __forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
  144. __forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
  145. __forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
  146. __forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
  147. __forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
  148. ////////////////////////////////////////////////////////////////////////////////
  149. /// Reductions
  150. ////////////////////////////////////////////////////////////////////////////////
  151. __forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
  152. __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
  153. __forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
  154. __forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
  155. ////////////////////////////////////////////////////////////////////////////////
  156. /// Comparison Operators
  157. ////////////////////////////////////////////////////////////////////////////////
  158. __forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
  159. __forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
  160. __forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
  161. __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
  162. __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
  163. __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
  164. __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
  165. __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
  166. __forceinline bool isvalid ( const Vec3fa& v ) {
  167. return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
  168. }
  169. __forceinline bool is_finite ( const Vec3fa& a ) {
  170. return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
  171. }
  172. ////////////////////////////////////////////////////////////////////////////////
  173. /// Euclidian Space Operators
  174. ////////////////////////////////////////////////////////////////////////////////
  175. __forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
  176. return reduce_add(a*b);
  177. }
  178. __forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
  179. return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
  180. }
  181. __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
  182. __forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
  183. __forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
  184. __forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
  185. __forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
  186. __forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
  187. __forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
  188. __forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
  189. __forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
  190. const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
  191. }
  192. /*! differentiated normalization */
  193. __forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
  194. {
  195. const float pp = dot(p,p);
  196. const float pdp = dot(p,dp);
  197. return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
  198. }
  199. ////////////////////////////////////////////////////////////////////////////////
  200. /// Select
  201. ////////////////////////////////////////////////////////////////////////////////
  202. __forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
  203. return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
  204. }
  205. __forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
  206. return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
  207. }
  208. __forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
  209. return madd(1.0f-t,v0,t*v1);
  210. }
  211. __forceinline int maxDim ( const Vec3fa& a )
  212. {
  213. const Vec3fa b = abs(a);
  214. if (b.x > b.y) {
  215. if (b.x > b.z) return 0; else return 2;
  216. } else {
  217. if (b.y > b.z) return 1; else return 2;
  218. }
  219. }
  220. ////////////////////////////////////////////////////////////////////////////////
  221. /// Rounding Functions
  222. ////////////////////////////////////////////////////////////////////////////////
  223. __forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
  224. __forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
  225. __forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
  226. ////////////////////////////////////////////////////////////////////////////////
  227. /// Output Operators
  228. ////////////////////////////////////////////////////////////////////////////////
  229. inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
  230. return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
  231. }
  232. __forceinline Vec2fa::Vec2fa(const Vec3fa& a)
  233. : x(a.x), y(a.y) {}
  234. __forceinline Vec3ia::Vec3ia( const Vec3fa& a )
  235. : x((int)a.x), y((int)a.y), z((int)a.z) {}
  236. typedef Vec3fa Vec3fa_t;
  237. ////////////////////////////////////////////////////////////////////////////////
  238. /// SSE Vec3fx Type
  239. ////////////////////////////////////////////////////////////////////////////////
  240. struct __aligned(16) Vec3fx
  241. {
  242. //ALIGNED_STRUCT_(16);
  243. typedef float Scalar;
  244. enum { N = 3 };
  245. struct { float x,y,z; union { int a; unsigned u; float w; }; };
  246. ////////////////////////////////////////////////////////////////////////////////
  247. /// Constructors, Assignment & Cast Operators
  248. ////////////////////////////////////////////////////////////////////////////////
  249. __forceinline Vec3fx( ) {}
  250. //__forceinline Vec3fx( const __m128 a ) : m128(a) {}
  251. __forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
  252. __forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
  253. __forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
  254. __forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
  255. //__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
  256. //__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
  257. //__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
  258. __forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
  259. __forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
  260. __forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
  261. __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
  262. __forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
  263. //__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
  264. //__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
  265. __forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
  266. __forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
  267. //__forceinline operator const __m128&() const { return m128; }
  268. //__forceinline operator __m128&() { return m128; }
  269. __forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
  270. //friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
  271. ////////////////////////////////////////////////////////////////////////////////
  272. /// Loads and Stores
  273. ////////////////////////////////////////////////////////////////////////////////
  274. static __forceinline Vec3fx load( const void* const a ) {
  275. const float* ptr = (const float*)a;
  276. return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
  277. }
  278. static __forceinline Vec3fx loadu( const void* const a ) {
  279. const float* ptr = (const float*)a;
  280. return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
  281. }
  282. static __forceinline void storeu ( void* a, const Vec3fx& v ) {
  283. float* ptr = (float*)a;
  284. ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
  285. }
  286. ////////////////////////////////////////////////////////////////////////////////
  287. /// Constants
  288. ////////////////////////////////////////////////////////////////////////////////
  289. __forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
  290. __forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
  291. __forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
  292. __forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
  293. ////////////////////////////////////////////////////////////////////////////////
  294. /// Array Access
  295. ////////////////////////////////////////////////////////////////////////////////
  296. __forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
  297. __forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
  298. };
  299. ////////////////////////////////////////////////////////////////////////////////
  300. /// Unary Operators
  301. ////////////////////////////////////////////////////////////////////////////////
  302. __forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
  303. __forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
  304. __forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
  305. __forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
  306. //__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
  307. __forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
  308. __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
  309. __forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
  310. __forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
  311. __forceinline Vec3fx zero_fix(const Vec3fx& a) {
  312. const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
  313. const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
  314. const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
  315. return Vec3fx(x,y,z);
  316. }
  317. __forceinline Vec3fx rcp_safe(const Vec3fx& a) {
  318. return rcp(zero_fix(a));
  319. }
  320. __forceinline Vec3fx log ( const Vec3fx& a ) {
  321. return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
  322. }
  323. __forceinline Vec3fx exp ( const Vec3fx& a ) {
  324. return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
  325. }
  326. ////////////////////////////////////////////////////////////////////////////////
  327. /// Binary Operators
  328. ////////////////////////////////////////////////////////////////////////////////
  329. __forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
  330. __forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
  331. __forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
  332. __forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
  333. __forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
  334. __forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
  335. __forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
  336. __forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
  337. __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
  338. return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
  339. }
  340. __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
  341. return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
  342. }
  343. /*
  344. #if defined(__SSE4_1__)
  345. __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
  346. const vint4 ai = _mm_castps_si128(a);
  347. const vint4 bi = _mm_castps_si128(b);
  348. const vint4 ci = _mm_min_epi32(ai,bi);
  349. return _mm_castsi128_ps(ci);
  350. }
  351. #endif
  352. #if defined(__SSE4_1__)
  353. __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
  354. const vint4 ai = _mm_castps_si128(a);
  355. const vint4 bi = _mm_castps_si128(b);
  356. const vint4 ci = _mm_max_epi32(ai,bi);
  357. return _mm_castsi128_ps(ci);
  358. }
  359. #endif
  360. __forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
  361. return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
  362. }
  363. */
  364. ////////////////////////////////////////////////////////////////////////////////
  365. /// Ternary Operators
  366. ////////////////////////////////////////////////////////////////////////////////
  367. __forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
  368. __forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
  369. __forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
  370. __forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
  371. __forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
  372. __forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
  373. __forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
  374. __forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
  375. ////////////////////////////////////////////////////////////////////////////////
  376. /// Assignment Operators
  377. ////////////////////////////////////////////////////////////////////////////////
  378. __forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
  379. __forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
  380. __forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
  381. __forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
  382. __forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
  383. __forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
  384. ////////////////////////////////////////////////////////////////////////////////
  385. /// Reductions
  386. ////////////////////////////////////////////////////////////////////////////////
  387. __forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
  388. __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
  389. __forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
  390. __forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
  391. ////////////////////////////////////////////////////////////////////////////////
  392. /// Comparison Operators
  393. ////////////////////////////////////////////////////////////////////////////////
  394. __forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
  395. __forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
  396. __forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
  397. __forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
  398. __forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
  399. __forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
  400. __forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
  401. __forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
  402. __forceinline bool isvalid ( const Vec3fx& v ) {
  403. return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
  404. }
  405. __forceinline bool is_finite ( const Vec3fx& a ) {
  406. return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
  407. }
  408. __forceinline bool isvalid4 ( const Vec3fx& v ) {
  409. const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
  410. const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
  411. const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
  412. const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
  413. return valid_x & valid_y & valid_z & valid_w;
  414. }
  415. __forceinline bool is_finite4 ( const Vec3fx& v ) {
  416. const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
  417. const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
  418. const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
  419. const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
  420. return finite_x & finite_y & finite_z & finite_w;
  421. }
  422. ////////////////////////////////////////////////////////////////////////////////
  423. /// Euclidian Space Operators
  424. ////////////////////////////////////////////////////////////////////////////////
  425. __forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
  426. return reduce_add(a*b);
  427. }
  428. __forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
  429. return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
  430. }
  431. __forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
  432. __forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
  433. __forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
  434. __forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
  435. __forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
  436. __forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
  437. __forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
  438. __forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
  439. __forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
  440. const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
  441. }
  442. /*! differentiated normalization */
  443. __forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
  444. {
  445. const float pp = dot(p,p);
  446. const float pdp = dot(p,dp);
  447. return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
  448. }
  449. ////////////////////////////////////////////////////////////////////////////////
  450. /// Select
  451. ////////////////////////////////////////////////////////////////////////////////
  452. __forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
  453. return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
  454. }
  455. __forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
  456. return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
  457. }
  458. __forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
  459. return madd(1.0f-t,v0,t*v1);
  460. }
  461. __forceinline int maxDim ( const Vec3fx& a )
  462. {
  463. const Vec3fx b = abs(a);
  464. if (b.x > b.y) {
  465. if (b.x > b.z) return 0; else return 2;
  466. } else {
  467. if (b.y > b.z) return 1; else return 2;
  468. }
  469. }
  470. ////////////////////////////////////////////////////////////////////////////////
  471. /// Rounding Functions
  472. ////////////////////////////////////////////////////////////////////////////////
  473. __forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
  474. __forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
  475. __forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
  476. ////////////////////////////////////////////////////////////////////////////////
  477. /// Output Operators
  478. ////////////////////////////////////////////////////////////////////////////////
  479. inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
  480. return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
  481. }
  482. typedef Vec3fx Vec3ff;
  483. //__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
  484. // : x(a.x), y(a.y) {}
  485. //__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
  486. // : x((int)a.x), y((int)a.y), z((int)a.z) {}
  487. }