grid_soa_intersector_packet.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "grid_soa.h"
  5. #include "../common/ray.h"
  6. #include "triangle_intersector_pluecker.h"
  7. namespace embree
  8. {
  9. namespace isa
  10. {
  11. template<int K>
  12. struct MapUV0
  13. {
  14. const float* const grid_uv;
  15. size_t ofs00, ofs01, ofs10, ofs11;
  16. __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
  17. : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
  18. __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {
  19. const vfloat<K> uv00(grid_uv[ofs00]);
  20. const vfloat<K> uv01(grid_uv[ofs01]);
  21. const vfloat<K> uv10(grid_uv[ofs10]);
  22. const vfloat<K> uv11(grid_uv[ofs11]);
  23. const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00);
  24. const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
  25. const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10);
  26. const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
  27. u = uv[0]; v = uv[1];
  28. }
  29. };
  30. template<int K>
  31. struct MapUV1
  32. {
  33. const float* const grid_uv;
  34. size_t ofs00, ofs01, ofs10, ofs11;
  35. __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
  36. : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
  37. __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {
  38. const vfloat<K> uv00(grid_uv[ofs00]);
  39. const vfloat<K> uv01(grid_uv[ofs01]);
  40. const vfloat<K> uv10(grid_uv[ofs10]);
  41. const vfloat<K> uv11(grid_uv[ofs11]);
  42. const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10);
  43. const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
  44. const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11);
  45. const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
  46. u = uv[0]; v = uv[1];
  47. }
  48. };
  49. template<int K>
  50. class GridSOAIntersectorK
  51. {
  52. public:
  53. typedef void Primitive;
  54. class Precalculations
  55. {
  56. #if defined(__AVX__)
  57. static const int M = 8;
  58. #else
  59. static const int M = 4;
  60. #endif
  61. public:
  62. __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray)
  63. : grid(nullptr), intersector(valid,ray) {}
  64. public:
  65. GridSOA* grid;
  66. PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector
  67. };
  68. /*! Intersect a ray with the primitive. */
  69. static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  70. {
  71. const size_t dim_offset = pre.grid->dim_offset;
  72. const size_t line_offset = pre.grid->width;
  73. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  74. const float* const grid_y = grid_x + 1 * dim_offset;
  75. const float* const grid_z = grid_x + 2 * dim_offset;
  76. const float* const grid_uv = grid_x + 3 * dim_offset;
  77. const size_t max_x = pre.grid->width == 2 ? 1 : 2;
  78. const size_t max_y = pre.grid->height == 2 ? 1 : 2;
  79. for (size_t y=0; y<max_y; y++)
  80. {
  81. for (size_t x=0; x<max_x; x++)
  82. {
  83. const size_t ofs00 = (y+0)*line_offset+(x+0);
  84. const size_t ofs01 = (y+0)*line_offset+(x+1);
  85. const size_t ofs10 = (y+1)*line_offset+(x+0);
  86. const size_t ofs11 = (y+1)*line_offset+(x+1);
  87. const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  88. const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  89. const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  90. const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  91. pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
  92. pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
  93. }
  94. }
  95. }
  96. /*! Test if the ray is occluded by the primitive */
  97. static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  98. {
  99. const size_t dim_offset = pre.grid->dim_offset;
  100. const size_t line_offset = pre.grid->width;
  101. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  102. const float* const grid_y = grid_x + 1 * dim_offset;
  103. const float* const grid_z = grid_x + 2 * dim_offset;
  104. const float* const grid_uv = grid_x + 3 * dim_offset;
  105. vbool<K> valid = valid_i;
  106. const size_t max_x = pre.grid->width == 2 ? 1 : 2;
  107. const size_t max_y = pre.grid->height == 2 ? 1 : 2;
  108. for (size_t y=0; y<max_y; y++)
  109. {
  110. for (size_t x=0; x<max_x; x++)
  111. {
  112. const size_t ofs00 = (y+0)*line_offset+(x+0);
  113. const size_t ofs01 = (y+0)*line_offset+(x+1);
  114. const size_t ofs10 = (y+1)*line_offset+(x+0);
  115. const size_t ofs11 = (y+1)*line_offset+(x+1);
  116. const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  117. const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  118. const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  119. const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  120. pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
  121. if (none(valid)) break;
  122. pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
  123. if (none(valid)) break;
  124. }
  125. }
  126. return !valid;
  127. }
  128. template<typename Loader>
  129. static __forceinline void intersect(RayHitK<K>& ray, size_t k,
  130. RayQueryContext* context,
  131. const float* const grid_x,
  132. const size_t line_offset,
  133. const size_t lines,
  134. Precalculations& pre)
  135. {
  136. typedef typename Loader::vfloat vfloat;
  137. const size_t dim_offset = pre.grid->dim_offset;
  138. const float* const grid_y = grid_x + 1 * dim_offset;
  139. const float* const grid_z = grid_x + 2 * dim_offset;
  140. const float* const grid_uv = grid_x + 3 * dim_offset;
  141. Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
  142. pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
  143. };
  144. template<typename Loader>
  145. static __forceinline bool occluded(RayK<K>& ray, size_t k,
  146. RayQueryContext* context,
  147. const float* const grid_x,
  148. const size_t line_offset,
  149. const size_t lines,
  150. Precalculations& pre)
  151. {
  152. typedef typename Loader::vfloat vfloat;
  153. const size_t dim_offset = pre.grid->dim_offset;
  154. const float* const grid_y = grid_x + 1 * dim_offset;
  155. const float* const grid_z = grid_x + 2 * dim_offset;
  156. const float* const grid_uv = grid_x + 3 * dim_offset;
  157. Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
  158. return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
  159. }
  160. /*! Intersect a ray with the primitive. */
  161. static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  162. {
  163. const size_t line_offset = pre.grid->width;
  164. const size_t lines = pre.grid->height;
  165. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  166. #if defined(__AVX__)
  167. intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
  168. #else
  169. intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre);
  170. if (likely(lines > 2))
  171. intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);
  172. #endif
  173. }
  174. /*! Test if the ray is occluded by the primitive */
  175. static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  176. {
  177. const size_t line_offset = pre.grid->width;
  178. const size_t lines = pre.grid->height;
  179. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  180. #if defined(__AVX__)
  181. return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
  182. #else
  183. if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true;
  184. if (likely(lines > 2))
  185. if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;
  186. #endif
  187. return false;
  188. }
  189. };
  190. template<int K>
  191. class GridSOAMBIntersectorK
  192. {
  193. public:
  194. typedef void Primitive;
  195. typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations;
  196. /*! Intersect a ray with the primitive. */
  197. static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  198. {
  199. vfloat<K> vftime;
  200. vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
  201. vbool<K> valid1 = valid_i;
  202. while (any(valid1)) {
  203. const size_t j = bsf(movemask(valid1));
  204. const int itime = vitime[j];
  205. const vbool<K> valid2 = valid1 & (itime == vitime);
  206. valid1 = valid1 & !valid2;
  207. intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
  208. }
  209. }
  210. /*! Intersect a ray with the primitive. */
  211. static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  212. {
  213. const size_t grid_offset = pre.grid->gridBytes >> 2;
  214. const size_t dim_offset = pre.grid->dim_offset;
  215. const size_t line_offset = pre.grid->width;
  216. const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
  217. const float* const grid_y = grid_x + 1 * dim_offset;
  218. const float* const grid_z = grid_x + 2 * dim_offset;
  219. const float* const grid_uv = grid_x + 3 * dim_offset;
  220. const size_t max_x = pre.grid->width == 2 ? 1 : 2;
  221. const size_t max_y = pre.grid->height == 2 ? 1 : 2;
  222. for (size_t y=0; y<max_y; y++)
  223. {
  224. for (size_t x=0; x<max_x; x++)
  225. {
  226. size_t ofs00 = (y+0)*line_offset+(x+0);
  227. size_t ofs01 = (y+0)*line_offset+(x+1);
  228. size_t ofs10 = (y+1)*line_offset+(x+0);
  229. size_t ofs11 = (y+1)*line_offset+(x+1);
  230. const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  231. const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  232. const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  233. const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  234. ofs00 += grid_offset;
  235. ofs01 += grid_offset;
  236. ofs10 += grid_offset;
  237. ofs11 += grid_offset;
  238. const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  239. const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  240. const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  241. const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  242. const Vec3vf<K> p00 = lerp(a00,b00,ftime);
  243. const Vec3vf<K> p01 = lerp(a01,b01,ftime);
  244. const Vec3vf<K> p10 = lerp(a10,b10,ftime);
  245. const Vec3vf<K> p11 = lerp(a11,b11,ftime);
  246. pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
  247. pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
  248. }
  249. }
  250. }
  251. /*! Test if the ray is occluded by the primitive */
  252. static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  253. {
  254. vfloat<K> vftime;
  255. vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
  256. vbool<K> valid_o = valid_i;
  257. vbool<K> valid1 = valid_i;
  258. while (any(valid1)) {
  259. const int j = int(bsf(movemask(valid1)));
  260. const int itime = vitime[j];
  261. const vbool<K> valid2 = valid1 & (itime == vitime);
  262. valid1 = valid1 & !valid2;
  263. valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
  264. }
  265. return !valid_o;
  266. }
  267. /*! Test if the ray is occluded by the primitive */
  268. static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  269. {
  270. const size_t grid_offset = pre.grid->gridBytes >> 2;
  271. const size_t dim_offset = pre.grid->dim_offset;
  272. const size_t line_offset = pre.grid->width;
  273. const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
  274. const float* const grid_y = grid_x + 1 * dim_offset;
  275. const float* const grid_z = grid_x + 2 * dim_offset;
  276. const float* const grid_uv = grid_x + 3 * dim_offset;
  277. vbool<K> valid = valid_i;
  278. const size_t max_x = pre.grid->width == 2 ? 1 : 2;
  279. const size_t max_y = pre.grid->height == 2 ? 1 : 2;
  280. for (size_t y=0; y<max_y; y++)
  281. {
  282. for (size_t x=0; x<max_x; x++)
  283. {
  284. size_t ofs00 = (y+0)*line_offset+(x+0);
  285. size_t ofs01 = (y+0)*line_offset+(x+1);
  286. size_t ofs10 = (y+1)*line_offset+(x+0);
  287. size_t ofs11 = (y+1)*line_offset+(x+1);
  288. const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  289. const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  290. const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  291. const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  292. ofs00 += grid_offset;
  293. ofs01 += grid_offset;
  294. ofs10 += grid_offset;
  295. ofs11 += grid_offset;
  296. const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  297. const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  298. const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  299. const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  300. const Vec3vf<K> p00 = lerp(a00,b00,ftime);
  301. const Vec3vf<K> p01 = lerp(a01,b01,ftime);
  302. const Vec3vf<K> p10 = lerp(a10,b10,ftime);
  303. const Vec3vf<K> p11 = lerp(a11,b11,ftime);
  304. pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
  305. if (none(valid)) break;
  306. pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
  307. if (none(valid)) break;
  308. }
  309. }
  310. return valid;
  311. }
  312. template<typename Loader>
  313. static __forceinline void intersect(RayHitK<K>& ray, size_t k,
  314. const float ftime,
  315. RayQueryContext* context,
  316. const float* const grid_x,
  317. const size_t line_offset,
  318. const size_t lines,
  319. Precalculations& pre)
  320. {
  321. typedef typename Loader::vfloat vfloat;
  322. const size_t grid_offset = pre.grid->gridBytes >> 2;
  323. const size_t dim_offset = pre.grid->dim_offset;
  324. const float* const grid_y = grid_x + 1 * dim_offset;
  325. const float* const grid_z = grid_x + 2 * dim_offset;
  326. const float* const grid_uv = grid_x + 3 * dim_offset;
  327. Vec3<vfloat> a0, a1, a2;
  328. Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
  329. Vec3<vfloat> b0, b1, b2;
  330. Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
  331. Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
  332. Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
  333. Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
  334. pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
  335. };
  336. template<typename Loader>
  337. static __forceinline bool occluded(RayK<K>& ray, size_t k,
  338. const float ftime,
  339. RayQueryContext* context,
  340. const float* const grid_x,
  341. const size_t line_offset,
  342. const size_t lines,
  343. Precalculations& pre)
  344. {
  345. typedef typename Loader::vfloat vfloat;
  346. const size_t grid_offset = pre.grid->gridBytes >> 2;
  347. const size_t dim_offset = pre.grid->dim_offset;
  348. const float* const grid_y = grid_x + 1 * dim_offset;
  349. const float* const grid_z = grid_x + 2 * dim_offset;
  350. const float* const grid_uv = grid_x + 3 * dim_offset;
  351. Vec3<vfloat> a0, a1, a2;
  352. Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
  353. Vec3<vfloat> b0, b1, b2;
  354. Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
  355. Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
  356. Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
  357. Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
  358. return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
  359. }
  360. /*! Intersect a ray with the primitive. */
  361. static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  362. {
  363. float ftime;
  364. int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
  365. const size_t line_offset = pre.grid->width;
  366. const size_t lines = pre.grid->height;
  367. const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
  368. #if defined(__AVX__)
  369. intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
  370. #else
  371. intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre);
  372. if (likely(lines > 2))
  373. intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);
  374. #endif
  375. }
  376. /*! Test if the ray is occluded by the primitive */
  377. static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)
  378. {
  379. float ftime;
  380. int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
  381. const size_t line_offset = pre.grid->width;
  382. const size_t lines = pre.grid->height;
  383. const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
  384. #if defined(__AVX__)
  385. return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
  386. #else
  387. if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true;
  388. if (likely(lines > 2))
  389. if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
  390. #endif
  391. return false;
  392. }
  393. };
  394. }
  395. }