ray.h 65 KB


  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "default.h"
  5. #include "instance_stack.h"
  6. // FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted
  7. namespace embree
  8. {
  9. /* Ray structure for K rays */
  10. template<int K>
  11. struct RayK
  12. {
  13. /* Default construction does nothing */
  14. __forceinline RayK() {}
  15. /* Constructs a ray from origin, direction, and ray segment. Near
  16. * has to be smaller than far */
  17. __forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
  18. const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
  19. const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
  20. : org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}
  21. /* Returns the size of the ray */
  22. static __forceinline size_t size() { return K; }
  23. /* Calculates if this is a valid ray that does not cause issues during traversal */
  24. __forceinline vbool<K> valid() const
  25. {
  26. const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));
  27. const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));
  28. const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));
  29. const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);
  30. const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);
  31. return vx & vy & vz & vn & vf;
  32. }
  33. __forceinline void get(RayK<1>* ray) const;
  34. __forceinline void get(size_t i, RayK<1>& ray) const;
  35. __forceinline void set(const RayK<1>* ray);
  36. __forceinline void set(size_t i, const RayK<1>& ray);
  37. __forceinline void copy(size_t dest, size_t source);
  38. __forceinline vint<K> octant() const
  39. {
  40. return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |
  41. select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |
  42. select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));
  43. }
  44. /* Ray data */
  45. Vec3vf<K> org; // ray origin
  46. vfloat<K> _tnear; // start of ray segment
  47. Vec3vf<K> dir; // ray direction
  48. vfloat<K> _time; // time of this ray for motion blur
  49. vfloat<K> tfar; // end of ray segment
  50. vint<K> mask; // used to mask out objects during traversal
  51. vint<K> id;
  52. vint<K> flags;
  53. __forceinline vfloat<K>& tnear() { return _tnear; }
  54. __forceinline vfloat<K>& time() { return _time; }
  55. __forceinline const vfloat<K>& tnear() const { return _tnear; }
  56. __forceinline const vfloat<K>& time() const { return _time; }
  57. };
  58. /* Ray+hit structure for K rays */
  59. template<int K>
  60. struct RayHitK : RayK<K>
  61. {
  62. using RayK<K>::org;
  63. using RayK<K>::_tnear;
  64. using RayK<K>::dir;
  65. using RayK<K>::_time;
  66. using RayK<K>::tfar;
  67. using RayK<K>::mask;
  68. using RayK<K>::id;
  69. using RayK<K>::flags;
  70. using RayK<K>::tnear;
  71. using RayK<K>::time;
  72. /* Default construction does nothing */
  73. __forceinline RayHitK() {}
  74. /* Constructs a ray from origin, direction, and ray segment. Near
  75. * has to be smaller than far */
  76. __forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
  77. const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
  78. const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
  79. : RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),
  80. geomID(RTC_INVALID_GEOMETRY_ID)
  81. {
  82. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
  83. instID[l] = RTC_INVALID_GEOMETRY_ID;
  84. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  85. instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
  86. #endif
  87. }
  88. }
  89. __forceinline RayHitK(const RayK<K>& ray)
  90. : RayK<K>(ray),
  91. geomID(RTC_INVALID_GEOMETRY_ID)
  92. {
  93. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
  94. instID[l] = RTC_INVALID_GEOMETRY_ID;
  95. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  96. instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
  97. #endif
  98. }
  99. }
  100. __forceinline RayHitK<K>& operator =(const RayK<K>& ray)
  101. {
  102. org = ray.org;
  103. _tnear = ray._tnear;
  104. dir = ray.dir;
  105. _time = ray._time;
  106. tfar = ray.tfar;
  107. mask = ray.mask;
  108. id = ray.id;
  109. flags = ray.flags;
  110. geomID = RTC_INVALID_GEOMETRY_ID;
  111. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
  112. instID[l] = RTC_INVALID_GEOMETRY_ID;
  113. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  114. instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
  115. #endif
  116. }
  117. return *this;
  118. }
  119. /* Calculates if the hit is valid */
  120. __forceinline void verifyHit(const vbool<K>& valid0) const
  121. {
  122. vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
  123. const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
  124. const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
  125. const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
  126. const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
  127. const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
  128. const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);
  129. if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");
  130. if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");
  131. if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");
  132. if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");
  133. if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");
  134. if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");
  135. }
  136. __forceinline void get(RayHitK<1>* ray) const;
  137. __forceinline void get(size_t i, RayHitK<1>& ray) const;
  138. __forceinline void set(const RayHitK<1>* ray);
  139. __forceinline void set(size_t i, const RayHitK<1>& ray);
  140. __forceinline void copy(size_t dest, size_t source);
  141. /* Hit data */
  142. Vec3vf<K> Ng; // geometry normal
  143. vfloat<K> u; // barycentric u coordinate of hit
  144. vfloat<K> v; // barycentric v coordinate of hit
  145. vuint<K> primID; // primitive ID
  146. vuint<K> geomID; // geometry ID
  147. vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
  148. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  149. vuint<K> instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance prim ID
  150. #endif
  151. };
  152. /* Specialization for a single ray */
  153. template<>
  154. struct RayK<1>
  155. {
  156. /* Default construction does nothing */
  157. __forceinline RayK() {}
  158. /* Constructs a ray from origin, direction, and ray segment. Near
  159. * has to be smaller than far */
  160. __forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
  161. : org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}
  162. /* Calculates if this is a valid ray that does not cause issues during traversal */
  163. __forceinline bool valid() const {
  164. return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);
  165. }
  166. /* checks if occlusion ray is done */
  167. __forceinline bool occluded() const {
  168. return tfar < 0.0f;
  169. }
  170. /* Ray data */
  171. Vec3ff org; // 3 floats for ray origin, 1 float for tnear
  172. //float tnear; // start of ray segment
  173. Vec3ff dir; // 3 floats for ray direction, 1 float for time
  174. // float time;
  175. float tfar; // end of ray segment
  176. int mask; // used to mask out objects during traversal
  177. int id; // ray ID
  178. int flags; // ray flags
  179. __forceinline float& tnear() { return org.w; };
  180. __forceinline const float& tnear() const { return org.w; };
  181. __forceinline float& time() { return dir.w; };
  182. __forceinline const float& time() const { return dir.w; };
  183. };
  184. template<>
  185. struct RayHitK<1> : RayK<1>
  186. {
  187. /* Default construction does nothing */
  188. __forceinline RayHitK() {}
  189. /* Constructs a ray from origin, direction, and ray segment. Near
  190. * has to be smaller than far */
  191. __forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
  192. : RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),
  193. geomID(RTC_INVALID_GEOMETRY_ID) {}
  194. __forceinline RayHitK(const RayK<1>& ray)
  195. : RayK<1>(ray),
  196. geomID(RTC_INVALID_GEOMETRY_ID) {}
  197. __forceinline RayHitK<1>& operator =(const RayK<1>& ray)
  198. {
  199. org = ray.org;
  200. dir = ray.dir;
  201. tfar = ray.tfar;
  202. mask = ray.mask;
  203. id = ray.id;
  204. flags = ray.flags;
  205. geomID = RTC_INVALID_GEOMETRY_ID;
  206. return *this;
  207. }
  208. /* Calculates if the hit is valid */
  209. __forceinline void verifyHit() const
  210. {
  211. if (geomID == RTC_INVALID_GEOMETRY_ID) return;
  212. const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));
  213. const bool vu = (abs(u) <= FLT_LARGE);
  214. const bool vv = (abs(u) <= FLT_LARGE);
  215. const bool vnx = abs(Ng.x) <= FLT_LARGE;
  216. const bool vny = abs(Ng.y) <= FLT_LARGE;
  217. const bool vnz = abs(Ng.z) <= FLT_LARGE;
  218. if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");
  219. if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");
  220. if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");
  221. if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");
  222. if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");
  223. if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");
  224. }
  225. /* Hit data */
  226. Vec3f Ng; // not normalized geometry normal
  227. float u; // barycentric u coordinate of hit
  228. float v; // barycentric v coordinate of hit
  229. unsigned int primID; // primitive ID
  230. unsigned int geomID; // geometry ID
  231. unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
  232. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  233. unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
  234. #endif
  235. };
  236. /* Converts ray packet to single rays */
  237. template<int K>
  238. __forceinline void RayK<K>::get(RayK<1>* ray) const
  239. {
  240. for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
  241. {
  242. ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];
  243. ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];
  244. ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];
  245. }
  246. }
  247. template<int K>
  248. __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
  249. {
  250. // FIXME: use SIMD transpose
  251. for (size_t i = 0; i < K; i++)
  252. get(i, ray[i]);
  253. }
  254. /* Extracts a single ray out of a ray packet*/
  255. template<int K>
  256. __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
  257. {
  258. ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
  259. ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];
  260. ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
  261. }
  262. template<int K>
  263. __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
  264. {
  265. ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
  266. ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];
  267. ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
  268. ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];
  269. ray.u = u[i]; ray.v = v[i];
  270. ray.primID = primID[i]; ray.geomID = geomID[i];
  271. instance_id_stack::copy_VU<K>(instID, ray.instID, i);
  272. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  273. instance_id_stack::copy_VU<K>(instPrimID, ray.instPrimID, i);
  274. #endif
  275. }
  276. /* Converts single rays to ray packet */
  277. template<int K>
  278. __forceinline void RayK<K>::set(const RayK<1>* ray)
  279. {
  280. // FIXME: use SIMD transpose
  281. for (size_t i = 0; i < K; i++)
  282. set(i, ray[i]);
  283. }
  284. template<int K>
  285. __forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
  286. {
  287. // FIXME: use SIMD transpose
  288. for (size_t i = 0; i < K; i++)
  289. set(i, ray[i]);
  290. }
  291. /* inserts a single ray into a ray packet element */
  292. template<int K>
  293. __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
  294. {
  295. org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
  296. dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
  297. tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
  298. }
  299. template<int K>
  300. __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
  301. {
  302. org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
  303. dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
  304. tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
  305. Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;
  306. u[i] = ray.u; v[i] = ray.v;
  307. primID[i] = ray.primID; geomID[i] = ray.geomID;
  308. instance_id_stack::copy_UV<K>(ray.instID, instID, i);
  309. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  310. instance_id_stack::copy_UV<K>(ray.instPrimID, instPrimID, i);
  311. #endif
  312. }
  313. /* copies a ray packet element into another element*/
  314. template<int K>
  315. __forceinline void RayK<K>::copy(size_t dest, size_t source)
  316. {
  317. org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
  318. dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
  319. tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
  320. }
  321. template<int K>
  322. __forceinline void RayHitK<K>::copy(size_t dest, size_t source)
  323. {
  324. org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
  325. dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
  326. tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
  327. Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];
  328. u[dest] = u[source]; v[dest] = v[source];
  329. primID[dest] = primID[source]; geomID[dest] = geomID[source];
  330. instance_id_stack::copy_VV<K>(instID, instID, source, dest);
  331. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  332. instance_id_stack::copy_VV<K>(instPrimID, instPrimID, source, dest);
  333. #endif
  334. }
  335. /* Shortcuts */
  336. typedef RayK<1> Ray;
  337. typedef RayK<4> Ray4;
  338. typedef RayK<8> Ray8;
  339. typedef RayK<16> Ray16;
  340. typedef RayK<VSIZEX> Rayx;
  341. struct RayN;
  342. typedef RayHitK<1> RayHit;
  343. typedef RayHitK<4> RayHit4;
  344. typedef RayHitK<8> RayHit8;
  345. typedef RayHitK<16> RayHit16;
  346. typedef RayHitK<VSIZEX> RayHitx;
  347. struct RayHitN;
  348. template<int K, bool intersect>
  349. struct RayTypeHelper;
  350. template<int K>
  351. struct RayTypeHelper<K, true>
  352. {
  353. typedef RayHitK<K> Ty;
  354. };
  355. template<int K>
  356. struct RayTypeHelper<K, false>
  357. {
  358. typedef RayK<K> Ty;
  359. };
  360. template<bool intersect>
  361. using RayType = typename RayTypeHelper<1, intersect>::Ty;
  362. template<int K, bool intersect>
  363. using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;
  364. /* Outputs ray to stream */
  365. template<int K>
  366. __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
  367. {
  368. return cout << "{ " << embree_endl
  369. << " org = " << ray.org << embree_endl
  370. << " dir = " << ray.dir << embree_endl
  371. << " near = " << ray.tnear() << embree_endl
  372. << " far = " << ray.tfar << embree_endl
  373. << " time = " << ray.time() << embree_endl
  374. << " mask = " << ray.mask << embree_endl
  375. << " id = " << ray.id << embree_endl
  376. << " flags = " << ray.flags << embree_endl
  377. << "}";
  378. }
  379. template<int K>
  380. __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
  381. {
  382. cout << "{ " << embree_endl
  383. << " org = " << ray.org << embree_endl
  384. << " dir = " << ray.dir << embree_endl
  385. << " near = " << ray.tnear() << embree_endl
  386. << " far = " << ray.tfar << embree_endl
  387. << " time = " << ray.time() << embree_endl
  388. << " mask = " << ray.mask << embree_endl
  389. << " id = " << ray.id << embree_endl
  390. << " flags = " << ray.flags << embree_endl
  391. << " Ng = " << ray.Ng
  392. << " u = " << ray.u << embree_endl
  393. << " v = " << ray.v << embree_endl
  394. << " primID = " << ray.primID << embree_endl
  395. << " geomID = " << ray.geomID << embree_endl
  396. << " instID =";
  397. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  398. {
  399. cout << " " << ray.instID[l];
  400. }
  401. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  402. cout << " instPrimID =";
  403. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
  404. {
  405. cout << " " << ray.instPrimID[l];
  406. }
  407. #endif
  408. cout << embree_endl;
  409. return cout << "}";
  410. }
  411. struct RayStreamSOA
  412. {
  413. __forceinline RayStreamSOA(void* rays, size_t N)
  414. : ptr((char*)rays), N(N) {}
  415. /* ray data access functions */
  416. __forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin
  417. __forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin
  418. __forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin
  419. __forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment
  420. __forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction
  421. __forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction
  422. __forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction
  423. __forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur
  424. __forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)
  425. __forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)
  426. __forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id
  427. __forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags
  428. /* hit data access functions */
  429. __forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal
  430. __forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal
  431. __forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal
  432. __forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit
  433. __forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit
  434. __forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID
  435. __forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID
  436. __forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID
  437. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  438. __forceinline unsigned int* instPrimID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+RTC_MAX_INSTANCE_LEVEL_COUNT*4*N+level*4*N+offset]; }; // instance primitive ID
  439. #endif
  440. __forceinline Ray getRayByOffset(size_t offset)
  441. {
  442. Ray ray;
  443. ray.org.x = org_x(offset)[0];
  444. ray.org.y = org_y(offset)[0];
  445. ray.org.z = org_z(offset)[0];
  446. ray.tnear() = tnear(offset)[0];
  447. ray.dir.x = dir_x(offset)[0];
  448. ray.dir.y = dir_y(offset)[0];
  449. ray.dir.z = dir_z(offset)[0];
  450. ray.time() = time(offset)[0];
  451. ray.tfar = tfar(offset)[0];
  452. ray.mask = mask(offset)[0];
  453. ray.id = id(offset)[0];
  454. ray.flags = flags(offset)[0];
  455. return ray;
  456. }
  457. template<int K>
  458. __forceinline RayK<K> getRayByOffset(size_t offset)
  459. {
  460. RayK<K> ray;
  461. ray.org.x = vfloat<K>::loadu(org_x(offset));
  462. ray.org.y = vfloat<K>::loadu(org_y(offset));
  463. ray.org.z = vfloat<K>::loadu(org_z(offset));
  464. ray.tnear = vfloat<K>::loadu(tnear(offset));
  465. ray.dir.x = vfloat<K>::loadu(dir_x(offset));
  466. ray.dir.y = vfloat<K>::loadu(dir_y(offset));
  467. ray.dir.z = vfloat<K>::loadu(dir_z(offset));
  468. ray.time = vfloat<K>::loadu(time(offset));
  469. ray.tfar = vfloat<K>::loadu(tfar(offset));
  470. ray.mask = vint<K>::loadu(mask(offset));
  471. ray.id = vint<K>::loadu(id(offset));
  472. ray.flags = vint<K>::loadu(flags(offset));
  473. return ray;
  474. }
  475. template<int K>
  476. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
  477. {
  478. RayK<K> ray;
  479. ray.org.x = vfloat<K>::loadu(valid, org_x(offset));
  480. ray.org.y = vfloat<K>::loadu(valid, org_y(offset));
  481. ray.org.z = vfloat<K>::loadu(valid, org_z(offset));
  482. ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));
  483. ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));
  484. ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));
  485. ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));
  486. ray.time() = vfloat<K>::loadu(valid, time(offset));
  487. ray.tfar = vfloat<K>::loadu(valid, tfar(offset));
  488. #if !defined(__AVX__)
  489. /* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,
  490. because the SSE masked loads always access the entire vector */
  491. if (unlikely(!all(valid)))
  492. {
  493. ray.mask = zero;
  494. ray.id = zero;
  495. ray.flags = zero;
  496. for (size_t k = 0; k < K; k++)
  497. {
  498. if (likely(valid[k]))
  499. {
  500. ray.mask[k] = mask(offset)[k];
  501. ray.id[k] = id(offset)[k];
  502. ray.flags[k] = flags(offset)[k];
  503. }
  504. }
  505. }
  506. else
  507. #endif
  508. {
  509. ray.mask = vint<K>::loadu(valid, mask(offset));
  510. ray.id = vint<K>::loadu(valid, id(offset));
  511. ray.flags = vint<K>::loadu(valid, flags(offset));
  512. }
  513. return ray;
  514. }
  515. template<int K>
  516. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
  517. {
  518. /*
  519. * valid_i: stores which of the input rays exist (do not access nonexistent rays!)
  520. * valid: stores which of the rays actually hit something.
  521. */
  522. vbool<K> valid = valid_i;
  523. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  524. if (likely(any(valid)))
  525. {
  526. vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
  527. vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);
  528. vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);
  529. vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);
  530. vfloat<K>::storeu(valid, u(offset), ray.u);
  531. vfloat<K>::storeu(valid, v(offset), ray.v);
  532. #if !defined(__AVX__)
  533. /* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,
  534. because the SSE masked stores always access the entire vector */
  535. if (unlikely(!all(valid_i)))
  536. {
  537. for (size_t k = 0; k < K; k++)
  538. {
  539. if (likely(valid[k]))
  540. {
  541. primID(offset)[k] = ray.primID[k];
  542. geomID(offset)[k] = ray.geomID[k];
  543. instID(0, offset)[k] = ray.instID[0][k];
  544. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  545. instPrimID(0, offset)[k] = ray.instPrimID[0][k];
  546. #endif
  547. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  548. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {
  549. instID(l, offset)[k] = ray.instID[l][k];
  550. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  551. instPrimID(l, offset)[k] = ray.instPrimID[l][k];
  552. #endif
  553. }
  554. #endif
  555. }
  556. }
  557. }
  558. else
  559. #endif
  560. {
  561. vuint<K>::storeu(valid, primID(offset), ray.primID);
  562. vuint<K>::storeu(valid, geomID(offset), ray.geomID);
  563. vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);
  564. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  565. vuint<K>::storeu(valid, instPrimID(0, offset), ray.instPrimID[0]);
  566. #endif
  567. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  568. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
  569. vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);
  570. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  571. vuint<K>::storeu(valid, instPrimID(l, offset), ray.instPrimID[l]);
  572. #endif
  573. }
  574. #endif
  575. }
  576. }
  577. }
  578. template<int K>
  579. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
  580. {
  581. vbool<K> valid = valid_i;
  582. valid &= (ray.tfar < 0.0f);
  583. if (likely(any(valid)))
  584. vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
  585. }
  586. __forceinline size_t getOctantByOffset(size_t offset)
  587. {
  588. const float dx = dir_x(offset)[0];
  589. const float dy = dir_y(offset)[0];
  590. const float dz = dir_z(offset)[0];
  591. const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
  592. return octantID;
  593. }
  594. __forceinline bool isValidByOffset(size_t offset)
  595. {
  596. const float nnear = tnear(offset)[0];
  597. const float ffar = tfar(offset)[0];
  598. return nnear <= ffar;
  599. }
  600. template<int K>
  601. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
  602. {
  603. RayK<K> ray;
  604. #if defined(__AVX2__)
  605. ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);
  606. ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);
  607. ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);
  608. ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);
  609. ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);
  610. ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);
  611. ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);
  612. ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);
  613. ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);
  614. ray.mask = vint<K>::template gather<1>(valid, mask(), offset);
  615. ray.id = vint<K>::template gather<1>(valid, id(), offset);
  616. ray.flags = vint<K>::template gather<1>(valid, flags(), offset);
  617. #else
  618. ray.org = zero;
  619. ray.tnear() = zero;
  620. ray.dir = zero;
  621. ray.time() = zero;
  622. ray.tfar = zero;
  623. ray.mask = zero;
  624. ray.id = zero;
  625. ray.flags = zero;
  626. for (size_t k = 0; k < K; k++)
  627. {
  628. if (likely(valid[k]))
  629. {
  630. const size_t ofs = offset[k];
  631. ray.org.x[k] = *org_x(ofs);
  632. ray.org.y[k] = *org_y(ofs);
  633. ray.org.z[k] = *org_z(ofs);
  634. ray.tnear()[k] = *tnear(ofs);
  635. ray.dir.x[k] = *dir_x(ofs);
  636. ray.dir.y[k] = *dir_y(ofs);
  637. ray.dir.z[k] = *dir_z(ofs);
  638. ray.time()[k] = *time(ofs);
  639. ray.tfar[k] = *tfar(ofs);
  640. ray.mask[k] = *mask(ofs);
  641. ray.id[k] = *id(ofs);
  642. ray.flags[k] = *flags(ofs);
  643. }
  644. }
  645. #endif
  646. return ray;
  647. }
  648. template<int K>
  649. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
  650. {
  651. vbool<K> valid = valid_i;
  652. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  653. if (likely(any(valid)))
  654. {
  655. #if defined(__AVX512F__)
  656. vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
  657. vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);
  658. vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);
  659. vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);
  660. vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);
  661. vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);
  662. vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);
  663. vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);
  664. vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);
  665. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  666. vuint<K>::template scatter<1>(valid, instPrimID(0), offset, ray.instPrimID[0]);
  667. #endif
  668. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  669. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
  670. vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);
  671. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  672. vuint<K>::template scatter<1>(valid, instPrimID(l), offset, ray.instPrimID[l]);
  673. #endif
  674. }
  675. #endif
  676. #else
  677. size_t valid_bits = movemask(valid);
  678. while (valid_bits != 0)
  679. {
  680. const size_t k = bscf(valid_bits);
  681. const size_t ofs = offset[k];
  682. *tfar(ofs) = ray.tfar[k];
  683. *Ng_x(ofs) = ray.Ng.x[k];
  684. *Ng_y(ofs) = ray.Ng.y[k];
  685. *Ng_z(ofs) = ray.Ng.z[k];
  686. *u(ofs) = ray.u[k];
  687. *v(ofs) = ray.v[k];
  688. *primID(ofs) = ray.primID[k];
  689. *geomID(ofs) = ray.geomID[k];
  690. *instID(0, ofs) = ray.instID[0][k];
  691. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  692. *instPrimID(0, ofs) = ray.instPrimID[0][k];
  693. #endif
  694. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  695. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {
  696. *instID(l, ofs) = ray.instID[l][k];
  697. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  698. *instPrimID(l, ofs) = ray.instPrimID[l][k];
  699. #endif
  700. }
  701. #endif
  702. }
  703. #endif
  704. }
  705. }
  706. template<int K>
  707. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
  708. {
  709. vbool<K> valid = valid_i;
  710. valid &= (ray.tfar < 0.0f);
  711. if (likely(any(valid)))
  712. {
  713. #if defined(__AVX512F__)
  714. vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
  715. #else
  716. size_t valid_bits = movemask(valid);
  717. while (valid_bits != 0)
  718. {
  719. const size_t k = bscf(valid_bits);
  720. const size_t ofs = offset[k];
  721. *tfar(ofs) = ray.tfar[k];
  722. }
  723. #endif
  724. }
  725. }
  726. char* __restrict__ ptr;
  727. size_t N;
  728. };
  729. template<size_t MAX_K>
  730. struct StackRayStreamSOA : public RayStreamSOA
  731. {
  732. __forceinline StackRayStreamSOA(size_t K)
  733. : RayStreamSOA(data, K) { assert(K <= MAX_K); }
  734. char data[MAX_K / 4 * sizeof(RayHit4)];
  735. };
  736. struct RayStreamSOP
  737. {
  738. template<class T>
  739. __forceinline void init(T& t)
  740. {
  741. org_x = (float*)&t.org.x;
  742. org_y = (float*)&t.org.y;
  743. org_z = (float*)&t.org.z;
  744. tnear = (float*)&t.tnear;
  745. dir_x = (float*)&t.dir.x;
  746. dir_y = (float*)&t.dir.y;
  747. dir_z = (float*)&t.dir.z;
  748. time = (float*)&t.time;
  749. tfar = (float*)&t.tfar;
  750. mask = (unsigned int*)&t.mask;
  751. id = (unsigned int*)&t.id;
  752. flags = (unsigned int*)&t.flags;
  753. Ng_x = (float*)&t.Ng.x;
  754. Ng_y = (float*)&t.Ng.y;
  755. Ng_z = (float*)&t.Ng.z;
  756. u = (float*)&t.u;
  757. v = (float*)&t.v;
  758. primID = (unsigned int*)&t.primID;
  759. geomID = (unsigned int*)&t.geomID;
  760. for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
  761. instID[l] = (unsigned int*)&t.instID[l];
  762. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  763. instPrimID[l] = (unsigned int*)&t.instPrimID[l];
  764. #endif
  765. }
  766. }
  767. __forceinline Ray getRayByOffset(size_t offset)
  768. {
  769. Ray ray;
  770. ray.org.x = *(float* __restrict__)((char*)org_x + offset);
  771. ray.org.y = *(float* __restrict__)((char*)org_y + offset);
  772. ray.org.z = *(float* __restrict__)((char*)org_z + offset);
  773. ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);
  774. ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);
  775. ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);
  776. ray.tfar = *(float* __restrict__)((char*)tfar + offset);
  777. ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
  778. ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;
  779. ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;
  780. ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;
  781. ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;
  782. return ray;
  783. }
  784. template<int K>
  785. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
  786. {
  787. RayK<K> ray;
  788. ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));
  789. ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));
  790. ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));
  791. ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
  792. ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
  793. ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
  794. ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
  795. ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
  796. ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;
  797. ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;
  798. ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;
  799. ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;
  800. return ray;
  801. }
  802. template<int K>
  803. __forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)
  804. {
  805. Vec3vf<K> dir;
  806. dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
  807. dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
  808. dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
  809. return dir;
  810. }
  811. __forceinline void setHitByOffset(size_t offset, const RayHit& ray)
  812. {
  813. if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
  814. {
  815. *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
  816. if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;
  817. if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;
  818. if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;
  819. *(float* __restrict__)((char*)u + offset) = ray.u;
  820. *(float* __restrict__)((char*)v + offset) = ray.v;
  821. *(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;
  822. *(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;
  823. if (likely(instID[0])) {
  824. *(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];
  825. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  826. *(unsigned int* __restrict__)((char*)instPrimID[0] + offset) = ray.instPrimID[0];
  827. #endif
  828. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  829. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l) {
  830. *(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];
  831. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  832. *(unsigned int* __restrict__)((char*)instPrimID[l] + offset) = ray.instPrimID[l];
  833. #endif
  834. }
  835. #endif
  836. }
  837. }
  838. }
  839. __forceinline void setHitByOffset(size_t offset, const Ray& ray)
  840. {
  841. *(float* __restrict__)((char*)tfar + offset) = ray.tfar;
  842. }
  843. template<int K>
  844. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
  845. {
  846. vbool<K> valid = valid_i;
  847. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  848. if (likely(any(valid)))
  849. {
  850. vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
  851. if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);
  852. if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);
  853. if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);
  854. vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);
  855. vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);
  856. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);
  857. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);
  858. if (likely(instID[0])) {
  859. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);
  860. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  861. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instPrimID[0] + offset), ray.instPrimID[0]);
  862. #endif
  863. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  864. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
  865. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);
  866. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  867. vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instPrimID[l] + offset), ray.instPrimID[l]);
  868. #endif
  869. }
  870. #endif
  871. }
  872. }
  873. }
  874. template<int K>
  875. __forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
  876. {
  877. vbool<K> valid = valid_i;
  878. valid &= (ray.tfar < 0.0f);
  879. if (likely(any(valid)))
  880. vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
  881. }
  882. __forceinline size_t getOctantByOffset(size_t offset)
  883. {
  884. const float dx = *(float* __restrict__)((char*)dir_x + offset);
  885. const float dy = *(float* __restrict__)((char*)dir_y + offset);
  886. const float dz = *(float* __restrict__)((char*)dir_z + offset);
  887. const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
  888. return octantID;
  889. }
  890. __forceinline bool isValidByOffset(size_t offset)
  891. {
  892. const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
  893. const float ffar = *(float* __restrict__)((char*)tfar + offset);
  894. return nnear <= ffar;
  895. }
  896. template<int K>
  897. __forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)
  898. {
  899. const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
  900. const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
  901. return nnear <= ffar;
  902. }
  903. template<int K>
  904. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
  905. {
  906. RayK<K> ray;
  907. #if defined(__AVX2__)
  908. ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);
  909. ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);
  910. ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);
  911. ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);
  912. ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);
  913. ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);
  914. ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);
  915. ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);
  916. ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);
  917. ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);
  918. ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);
  919. ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);
  920. #else
  921. ray.org = zero;
  922. ray.tnear() = zero;
  923. ray.dir = zero;
  924. ray.tfar = zero;
  925. ray.time() = zero;
  926. ray.mask = zero;
  927. ray.id = zero;
  928. ray.flags = zero;
  929. for (size_t k = 0; k < K; k++)
  930. {
  931. if (likely(valid[k]))
  932. {
  933. const size_t ofs = offset[k];
  934. ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);
  935. ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);
  936. ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);
  937. ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);
  938. ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);
  939. ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);
  940. ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);
  941. ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;
  942. ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;
  943. ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;
  944. ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;
  945. ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;
  946. }
  947. }
  948. #endif
  949. return ray;
  950. }
  951. template<int K>
  952. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
  953. {
  954. vbool<K> valid = valid_i;
  955. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  956. if (likely(any(valid)))
  957. {
  958. #if defined(__AVX512F__)
  959. vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
  960. if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);
  961. if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);
  962. if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);
  963. vfloat<K>::template scatter<1>(valid, u, offset, ray.u);
  964. vfloat<K>::template scatter<1>(valid, v, offset, ray.v);
  965. vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);
  966. vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);
  967. if (likely(instID[0])) {
  968. vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);
  969. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  970. vuint<K>::template scatter<1>(valid, (unsigned int*)instPrimID[0], offset, ray.instPrimID[0]);
  971. #endif
  972. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  973. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
  974. vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);
  975. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  976. vuint<K>::template scatter<1>(valid, (unsigned int*)instPrimID[l], offset, ray.instPrimID[l]);
  977. #endif
  978. }
  979. #endif
  980. }
  981. #else
  982. size_t valid_bits = movemask(valid);
  983. while (valid_bits != 0)
  984. {
  985. const size_t k = bscf(valid_bits);
  986. const size_t ofs = offset[k];
  987. *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
  988. if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];
  989. if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];
  990. if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];
  991. *(float* __restrict__)((char*)u + ofs) = ray.u[k];
  992. *(float* __restrict__)((char*)v + ofs) = ray.v[k];
  993. *(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];
  994. *(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];
  995. if (likely(instID[0])) {
  996. *(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];
  997. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  998. *(unsigned int* __restrict__)((char*)instPrimID[0] + ofs) = ray.instPrimID[0][k];
  999. #endif
  1000. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  1001. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {
  1002. *(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];
  1003. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  1004. *(unsigned int* __restrict__)((char*)instPrimID[l] + ofs) = ray.instPrimID[l][k];
  1005. #endif
  1006. }
  1007. #endif
  1008. }
  1009. }
  1010. #endif
  1011. }
  1012. }
  1013. template<int K>
  1014. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
  1015. {
  1016. vbool<K> valid = valid_i;
  1017. valid &= (ray.tfar < 0.0f);
  1018. if (likely(any(valid)))
  1019. {
  1020. #if defined(__AVX512F__)
  1021. vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
  1022. #else
  1023. size_t valid_bits = movemask(valid);
  1024. while (valid_bits != 0)
  1025. {
  1026. const size_t k = bscf(valid_bits);
  1027. const size_t ofs = offset[k];
  1028. *(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
  1029. }
  1030. #endif
  1031. }
  1032. }
  1033. /* ray data */
  1034. float* __restrict__ org_x; // x coordinate of ray origin
  1035. float* __restrict__ org_y; // y coordinate of ray origin
  1036. float* __restrict__ org_z; // z coordinate of ray origin
  1037. float* __restrict__ tnear; // start of ray segment (optional)
  1038. float* __restrict__ dir_x; // x coordinate of ray direction
  1039. float* __restrict__ dir_y; // y coordinate of ray direction
  1040. float* __restrict__ dir_z; // z coordinate of ray direction
  1041. float* __restrict__ time; // time of this ray for motion blur (optional)
  1042. float* __restrict__ tfar; // end of ray segment (set to hit distance)
  1043. unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)
  1044. unsigned int* __restrict__ id; // ray ID
  1045. unsigned int* __restrict__ flags; // ray flags
  1046. /* hit data */
  1047. float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)
  1048. float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)
  1049. float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)
  1050. float* __restrict__ u; // barycentric u coordinate of hit
  1051. float* __restrict__ v; // barycentric v coordinate of hit
  1052. unsigned int* __restrict__ primID; // primitive ID
  1053. unsigned int* __restrict__ geomID; // geometry ID
  1054. unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
  1055. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  1056. unsigned int* __restrict__ instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID (optional)
  1057. #endif
  1058. };
  1059. struct RayStreamAOS
  1060. {
  1061. __forceinline RayStreamAOS(void* rays)
  1062. : ptr((Ray*)rays) {}
  1063. __forceinline Ray& getRayByOffset(size_t offset)
  1064. {
  1065. return *(Ray*)((char*)ptr + offset);
  1066. }
  1067. template<int K>
  1068. __forceinline RayK<K> getRayByOffset(const vint<K>& offset);
  1069. template<int K>
  1070. __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
  1071. {
  1072. const vint<K> valid_offset = select(valid, offset, vintx(zero));
  1073. return getRayByOffset<K>(valid_offset);
  1074. }
  1075. template<int K>
  1076. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
  1077. {
  1078. vbool<K> valid = valid_i;
  1079. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  1080. if (likely(any(valid)))
  1081. {
  1082. #if defined(__AVX512F__)
  1083. vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
  1084. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);
  1085. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);
  1086. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);
  1087. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);
  1088. vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);
  1089. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);
  1090. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);
  1091. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);
  1092. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  1093. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instPrimID[0], offset, ray.instPrimID[0]);
  1094. #endif
  1095. #if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
  1096. for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
  1097. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);
  1098. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  1099. vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instPrimID[l], offset, ray.instPrimID[l]);
  1100. #endif
  1101. }
  1102. #endif
  1103. #else
  1104. size_t valid_bits = movemask(valid);
  1105. while (valid_bits != 0)
  1106. {
  1107. const size_t k = bscf(valid_bits);
  1108. RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);
  1109. ray_k->tfar = ray.tfar[k];
  1110. ray_k->Ng.x = ray.Ng.x[k];
  1111. ray_k->Ng.y = ray.Ng.y[k];
  1112. ray_k->Ng.z = ray.Ng.z[k];
  1113. ray_k->u = ray.u[k];
  1114. ray_k->v = ray.v[k];
  1115. ray_k->primID = ray.primID[k];
  1116. ray_k->geomID = ray.geomID[k];
  1117. instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
  1118. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  1119. instance_id_stack::copy_VU<K>(ray.instPrimID, ray_k->instPrimID, k);
  1120. #endif
  1121. }
  1122. #endif
  1123. }
  1124. }
  1125. template<int K>
  1126. __forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
  1127. {
  1128. vbool<K> valid = valid_i;
  1129. valid &= (ray.tfar < 0.0f);
  1130. if (likely(any(valid)))
  1131. {
  1132. #if defined(__AVX512F__)
  1133. vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
  1134. #else
  1135. size_t valid_bits = movemask(valid);
  1136. while (valid_bits != 0)
  1137. {
  1138. const size_t k = bscf(valid_bits);
  1139. Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);
  1140. ray_k->tfar = ray.tfar[k];
  1141. }
  1142. #endif
  1143. }
  1144. }
  1145. Ray* __restrict__ ptr;
  1146. };
  1147. template<>
  1148. __forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset)
  1149. {
  1150. Ray4 ray;
  1151. /* load and transpose: org.x, org.y, org.z, tnear */
  1152. const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
  1153. const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
  1154. const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
  1155. const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
  1156. transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
  1157. /* load and transpose: dir.x, dir.y, dir.z, time */
  1158. const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);
  1159. const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);
  1160. const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);
  1161. const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);
  1162. transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1163. /* load and transpose: tfar, mask, id, flags */
  1164. const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
  1165. const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
  1166. const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
  1167. const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
  1168. vfloat4 maskf, idf, flagsf;
  1169. transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
  1170. ray.mask = asInt(maskf);
  1171. ray.id = asInt(idf);
  1172. ray.flags = asInt(flagsf);
  1173. return ray;
  1174. }
  1175. #if defined(__AVX__)
  1176. template<>
  1177. __forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset)
  1178. {
  1179. Ray8 ray;
  1180. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1181. const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
  1182. const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
  1183. const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
  1184. const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
  1185. const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
  1186. const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
  1187. const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
  1188. const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);
  1189. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1190. /* load and transpose: tfar, mask, id, flags */
  1191. const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
  1192. const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
  1193. const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
  1194. const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
  1195. const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
  1196. const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
  1197. const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
  1198. const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);
  1199. vfloat8 maskf, idf, flagsf;
  1200. transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
  1201. ray.mask = asInt(maskf);
  1202. ray.id = asInt(idf);
  1203. ray.flags = asInt(flagsf);
  1204. return ray;
  1205. }
  1206. #endif
  1207. #if defined(__AVX512F__)
  1208. template<>
  1209. __forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset)
  1210. {
  1211. Ray16 ray;
  1212. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1213. const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
  1214. const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
  1215. const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
  1216. const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
  1217. const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
  1218. const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
  1219. const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
  1220. const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
  1221. const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
  1222. const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
  1223. const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
  1224. const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
  1225. const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
  1226. const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
  1227. const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
  1228. const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);
  1229. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
  1230. ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1231. /* load and transpose: tfar, mask, id, flags */
  1232. const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
  1233. const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
  1234. const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
  1235. const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
  1236. const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
  1237. const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
  1238. const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
  1239. const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
  1240. const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
  1241. const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
  1242. const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
  1243. const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
  1244. const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
  1245. const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
  1246. const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
  1247. const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);
  1248. vfloat16 maskf, idf, flagsf;
  1249. transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
  1250. ray.tfar, maskf, idf, flagsf);
  1251. ray.mask = asInt(maskf);
  1252. ray.id = asInt(idf);
  1253. ray.flags = asInt(flagsf);
  1254. return ray;
  1255. }
  1256. #endif
  1257. struct RayStreamAOP
  1258. {
  1259. __forceinline RayStreamAOP(void* rays)
  1260. : ptr((Ray**)rays) {}
  1261. __forceinline Ray& getRayByIndex(size_t index)
  1262. {
  1263. return *ptr[index];
  1264. }
  1265. template<int K>
  1266. __forceinline RayK<K> getRayByIndex(const vint<K>& index);
  1267. template<int K>
  1268. __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)
  1269. {
  1270. const vint<K> valid_index = select(valid, index, vintx(zero));
  1271. return getRayByIndex<K>(valid_index);
  1272. }
  1273. template<int K>
  1274. __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)
  1275. {
  1276. vbool<K> valid = valid_i;
  1277. valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
  1278. if (likely(any(valid)))
  1279. {
  1280. size_t valid_bits = movemask(valid);
  1281. while (valid_bits != 0)
  1282. {
  1283. const size_t k = bscf(valid_bits);
  1284. RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];
  1285. ray_k->tfar = ray.tfar[k];
  1286. ray_k->Ng.x = ray.Ng.x[k];
  1287. ray_k->Ng.y = ray.Ng.y[k];
  1288. ray_k->Ng.z = ray.Ng.z[k];
  1289. ray_k->u = ray.u[k];
  1290. ray_k->v = ray.v[k];
  1291. ray_k->primID = ray.primID[k];
  1292. ray_k->geomID = ray.geomID[k];
  1293. instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
  1294. #if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
  1295. instance_id_stack::copy_VU<K>(ray.instPrimID, ray_k->instPrimID, k);
  1296. #endif
  1297. }
  1298. }
  1299. }
  1300. template<int K>
  1301. __forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)
  1302. {
  1303. vbool<K> valid = valid_i;
  1304. valid &= (ray.tfar < 0.0f);
  1305. if (likely(any(valid)))
  1306. {
  1307. size_t valid_bits = movemask(valid);
  1308. while (valid_bits != 0)
  1309. {
  1310. const size_t k = bscf(valid_bits);
  1311. Ray* __restrict__ ray_k = ptr[index[k]];
  1312. ray_k->tfar = ray.tfar[k];
  1313. }
  1314. }
  1315. }
  1316. Ray** __restrict__ ptr;
  1317. };
  1318. template<>
  1319. __forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index)
  1320. {
  1321. Ray4 ray;
  1322. /* load and transpose: org.x, org.y, org.z, tnear */
  1323. const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);
  1324. const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);
  1325. const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);
  1326. const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);
  1327. transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
  1328. /* load and transpose: dir.x, dir.y, dir.z, time */
  1329. const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);
  1330. const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);
  1331. const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);
  1332. const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);
  1333. transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1334. /* load and transpose: tfar, mask, id, flags */
  1335. const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
  1336. const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
  1337. const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
  1338. const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
  1339. vfloat4 maskf, idf, flagsf;
  1340. transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
  1341. ray.mask = asInt(maskf);
  1342. ray.id = asInt(idf);
  1343. ray.flags = asInt(flagsf);
  1344. return ray;
  1345. }
  1346. #if defined(__AVX__)
  1347. template<>
  1348. __forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index)
  1349. {
  1350. Ray8 ray;
  1351. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1352. const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
  1353. const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
  1354. const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
  1355. const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
  1356. const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
  1357. const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
  1358. const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
  1359. const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
  1360. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1361. /* load and transpose: tfar, mask, id, flags */
  1362. const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
  1363. const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
  1364. const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
  1365. const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
  1366. const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
  1367. const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
  1368. const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
  1369. const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
  1370. vfloat8 maskf, idf, flagsf;
  1371. transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
  1372. ray.mask = asInt(maskf);
  1373. ray.id = asInt(idf);
  1374. ray.flags = asInt(flagsf);
  1375. return ray;
  1376. }
  1377. #endif
  1378. #if defined(__AVX512F__)
  1379. template<>
  1380. __forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index)
  1381. {
  1382. Ray16 ray;
  1383. /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
  1384. const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
  1385. const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
  1386. const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
  1387. const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
  1388. const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
  1389. const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
  1390. const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
  1391. const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
  1392. const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);
  1393. const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);
  1394. const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
  1395. const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
  1396. const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
  1397. const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
  1398. const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
  1399. const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);
  1400. transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
  1401. ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
  1402. /* load and transpose: tfar, mask, id, flags */
  1403. const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
  1404. const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
  1405. const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
  1406. const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
  1407. const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
  1408. const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
  1409. const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
  1410. const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
  1411. const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);
  1412. const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);
  1413. const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
  1414. const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
  1415. const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
  1416. const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
  1417. const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
  1418. const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);
  1419. vfloat16 maskf, idf, flagsf;
  1420. transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
  1421. ray.tfar, maskf, idf, flagsf);
  1422. ray.mask = asInt(maskf);
  1423. ray.id = asInt(idf);
  1424. ray.flags = asInt(flagsf);
  1425. return ray;
  1426. }
  1427. #endif
  1428. }