grid_soa_intersector.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "grid_soa.h"
  18. #include "../common/ray.h"
  19. #include "triangle_intersector_pluecker.h"
  20. namespace embree
  21. {
  22. namespace isa
  23. {
  24. template<int K>
  25. class GridSOAIntersectorK
  26. {
  27. public:
  28. typedef void Primitive;
  29. typedef Vec3<vfloat<K>> Vec3vfK;
  30. class PrecalculationsBase
  31. {
  32. #if defined(__AVX__)
  33. static const int M = 8;
  34. #else
  35. static const int M = 4;
  36. #endif
  37. public:
  38. __forceinline PrecalculationsBase (const vbool<K>& valid, const RayK<K>& ray)
  39. : grid(nullptr), intersector(valid,ray) {}
  40. public:
  41. GridSOA* grid;
  42. PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector
  43. };
  44. typedef IntersectorKPrecalculations<K,PrecalculationsBase> Precalculations;
  45. struct MapUV0
  46. {
  47. const float* const grid_uv;
  48. size_t ofs00, ofs01, ofs10, ofs11;
  49. __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
  50. : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
  51. __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const {
  52. const vfloat<K> uv00(grid_uv[ofs00]);
  53. const vfloat<K> uv01(grid_uv[ofs01]);
  54. const vfloat<K> uv10(grid_uv[ofs10]);
  55. const vfloat<K> uv11(grid_uv[ofs11]);
  56. const Vec2<vfloat<K>> uv0 = GridSOA::decodeUV(uv00);
  57. const Vec2<vfloat<K>> uv1 = GridSOA::decodeUV(uv01);
  58. const Vec2<vfloat<K>> uv2 = GridSOA::decodeUV(uv10);
  59. const Vec2<vfloat<K>> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
  60. u = uv[0]; v = uv[1];
  61. }
  62. };
  63. struct MapUV1
  64. {
  65. const float* const grid_uv;
  66. size_t ofs00, ofs01, ofs10, ofs11;
  67. __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
  68. : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
  69. __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const {
  70. const vfloat<K> uv00(grid_uv[ofs00]);
  71. const vfloat<K> uv01(grid_uv[ofs01]);
  72. const vfloat<K> uv10(grid_uv[ofs10]);
  73. const vfloat<K> uv11(grid_uv[ofs11]);
  74. const Vec2<vfloat<K>> uv0 = GridSOA::decodeUV(uv10);
  75. const Vec2<vfloat<K>> uv1 = GridSOA::decodeUV(uv01);
  76. const Vec2<vfloat<K>> uv2 = GridSOA::decodeUV(uv11);
  77. const Vec2<vfloat<K>> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
  78. u = uv[0]; v = uv[1];
  79. }
  80. };
  81. /*! Intersect a ray with the primitive. */
  82. static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
  83. {
  84. const size_t dim_offset = pre.grid->dim_offset;
  85. const size_t line_offset = pre.grid->width;
  86. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  87. const float* const grid_y = grid_x + 1 * dim_offset;
  88. const float* const grid_z = grid_x + 2 * dim_offset;
  89. const float* const grid_uv = grid_x + 3 * dim_offset;
  90. for (size_t y=0; y<2; y++)
  91. {
  92. for (size_t x=0; x<2; x++)
  93. {
  94. const size_t ofs00 = (y+0)*line_offset+(x+0);
  95. const size_t ofs01 = (y+0)*line_offset+(x+1);
  96. const size_t ofs10 = (y+1)*line_offset+(x+0);
  97. const size_t ofs11 = (y+1)*line_offset+(x+1);
  98. const Vec3vfK p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  99. const Vec3vfK p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  100. const Vec3vfK p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  101. const Vec3vfK p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  102. pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID,pre.grid->primID));
  103. pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID,pre.grid->primID));
  104. }
  105. }
  106. }
  107. /*! Test if the ray is occluded by the primitive */
  108. static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
  109. {
  110. const size_t dim_offset = pre.grid->dim_offset;
  111. const size_t line_offset = pre.grid->width;
  112. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  113. const float* const grid_y = grid_x + 1 * dim_offset;
  114. const float* const grid_z = grid_x + 2 * dim_offset;
  115. const float* const grid_uv = grid_x + 3 * dim_offset;
  116. vbool<K> valid = valid_i;
  117. for (size_t y=0; y<2; y++)
  118. {
  119. for (size_t x=0; x<2; x++)
  120. {
  121. const size_t ofs00 = (y+0)*line_offset+(x+0);
  122. const size_t ofs01 = (y+0)*line_offset+(x+1);
  123. const size_t ofs10 = (y+1)*line_offset+(x+0);
  124. const size_t ofs11 = (y+1)*line_offset+(x+1);
  125. const Vec3vfK p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
  126. const Vec3vfK p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
  127. const Vec3vfK p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
  128. const Vec3vfK p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
  129. pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID,pre.grid->primID));
  130. if (none(valid)) break;
  131. pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID,pre.grid->primID));
  132. if (none(valid)) break;
  133. }
  134. }
  135. return !valid;
  136. }
  137. template<typename Loader>
  138. static __forceinline void intersect(RayK<K>& ray, size_t k,
  139. IntersectContext* context,
  140. const float* const grid_x,
  141. const size_t line_offset,
  142. const size_t lines,
  143. Precalculations& pre)
  144. {
  145. typedef typename Loader::vfloat vfloat;
  146. const size_t dim_offset = pre.grid->dim_offset;
  147. const float* const grid_y = grid_x + 1 * dim_offset;
  148. const float* const grid_z = grid_x + 2 * dim_offset;
  149. const float* const grid_uv = grid_x + 3 * dim_offset;
  150. Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
  151. pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID,pre.grid->primID));
  152. };
  153. template<typename Loader>
  154. static __forceinline bool occluded(RayK<K>& ray, size_t k,
  155. IntersectContext* context,
  156. const float* const grid_x,
  157. const size_t line_offset,
  158. const size_t lines,
  159. Precalculations& pre)
  160. {
  161. typedef typename Loader::vfloat vfloat;
  162. const size_t dim_offset = pre.grid->dim_offset;
  163. const float* const grid_y = grid_x + 1 * dim_offset;
  164. const float* const grid_z = grid_x + 2 * dim_offset;
  165. const float* const grid_uv = grid_x + 3 * dim_offset;
  166. Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
  167. return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID,pre.grid->primID));
  168. }
  169. /*! Intersect a ray with the primitive. */
  170. static __forceinline void intersect(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
  171. {
  172. const size_t line_offset = pre.grid->width;
  173. const size_t lines = pre.grid->height;
  174. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  175. #if defined(__AVX__)
  176. intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
  177. #else
  178. intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre);
  179. if (likely(lines > 2))
  180. intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);
  181. #endif
  182. }
  183. /*! Test if the ray is occluded by the primitive */
  184. static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
  185. {
  186. const size_t line_offset = pre.grid->width;
  187. const size_t lines = pre.grid->height;
  188. const float* const grid_x = pre.grid->decodeLeaf(0,prim);
  189. #if defined(__AVX__)
  190. return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
  191. #else
  192. if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true;
  193. if (likely(lines > 2))
  194. if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;
  195. #endif
  196. return false;
  197. }
  198. };
  199. template<int K>
  200. class GridSOAMBlurIntersectorK
  201. {
  202. public:
  203. typedef void Primitive;
  204. typedef Vec3<vfloat<K>> Vec3vfK;
  205. typedef typename GridSOAIntersectorK<K>::PrecalculationsBase PrecalculationsBase;
  206. typedef IntersectorKPrecalculationsMB<K,PrecalculationsBase> Precalculations;
  207. template<typename Loader>
  208. static __forceinline void intersect(RayK<K>& ray, size_t k,
  209. const float ftime,
  210. IntersectContext* context,
  211. const float* const grid_x,
  212. const size_t line_offset,
  213. const size_t lines,
  214. Precalculations& pre)
  215. {
  216. typedef typename Loader::vfloat vfloat;
  217. const size_t grid_offset = pre.grid->gridBytes >> 2;
  218. const size_t dim_offset = pre.grid->dim_offset;
  219. const float* const grid_y = grid_x + 1 * dim_offset;
  220. const float* const grid_z = grid_x + 2 * dim_offset;
  221. const float* const grid_uv = grid_x + 3 * dim_offset;
  222. Vec3<vfloat> a0, a1, a2;
  223. Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
  224. Vec3<vfloat> b0, b1, b2;
  225. Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
  226. Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
  227. Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
  228. Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
  229. pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID,pre.grid->primID));
  230. };
  231. template<typename Loader>
  232. static __forceinline bool occluded(RayK<K>& ray, size_t k,
  233. const float ftime,
  234. IntersectContext* context,
  235. const float* const grid_x,
  236. const size_t line_offset,
  237. const size_t lines,
  238. Precalculations& pre)
  239. {
  240. typedef typename Loader::vfloat vfloat;
  241. const size_t grid_offset = pre.grid->gridBytes >> 2;
  242. const size_t dim_offset = pre.grid->dim_offset;
  243. const float* const grid_y = grid_x + 1 * dim_offset;
  244. const float* const grid_z = grid_x + 2 * dim_offset;
  245. const float* const grid_uv = grid_x + 3 * dim_offset;
  246. Vec3<vfloat> a0, a1, a2;
  247. Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
  248. Vec3<vfloat> b0, b1, b2;
  249. Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
  250. Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
  251. Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
  252. Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
  253. return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID,pre.grid->primID));
  254. }
  255. /*! Intersect a ray with the primitive. */
  256. static __forceinline void intersect(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
  257. {
  258. float ftime;
  259. const size_t itime = getTimeSegment(ray.time[k], float(pre.grid->time_steps-1), ftime);
  260. const size_t line_offset = pre.grid->width;
  261. const size_t lines = pre.grid->height;
  262. const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
  263. #if defined(__AVX__)
  264. intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
  265. #else
  266. intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x , line_offset, lines, pre);
  267. if (likely(lines > 2))
  268. intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);
  269. #endif
  270. }
  271. /*! Test if the ray is occluded by the primitive */
  272. static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, size_t& lazy_node)
  273. {
  274. float ftime;
  275. const size_t itime = getTimeSegment(ray.time[k], float(pre.grid->time_steps-1), ftime);
  276. const size_t line_offset = pre.grid->width;
  277. const size_t lines = pre.grid->height;
  278. const float* const grid_x = pre.grid->decodeLeaf(itime,prim);
  279. #if defined(__AVX__)
  280. return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
  281. #else
  282. if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x , line_offset, lines, pre)) return true;
  283. if (likely(lines > 2))
  284. if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
  285. #endif
  286. return false;
  287. }
  288. };
  289. }
  290. }