quad_intersector_moeller.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "quadv.h"
  18. #include "triangle_intersector_moeller.h"
  19. namespace embree
  20. {
  21. namespace isa
  22. {
  23. template<int M>
  24. struct QuadHitM
  25. {
  26. __forceinline QuadHitM(const vbool<M>& valid,
  27. const vfloat<M>& U,
  28. const vfloat<M>& V,
  29. const vfloat<M>& T,
  30. const vfloat<M>& absDen,
  31. const Vec3<vfloat<M>>& Ng,
  32. const vbool<M>& flags)
  33. : U(U), V(V), T(T), absDen(absDen), tri_Ng(Ng), valid(valid), flags(flags) {}
  34. __forceinline void finalize()
  35. {
  36. const vfloat<M> rcpAbsDen = rcp(absDen);
  37. vt = T * rcpAbsDen;
  38. const vfloat<M> u = U * rcpAbsDen;
  39. const vfloat<M> v = V * rcpAbsDen;
  40. const vfloat<M> u1 = vfloat<M>(1.0f) - u;
  41. const vfloat<M> v1 = vfloat<M>(1.0f) - v;
  42. #if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING)
  43. vu = select(flags,u1,u);
  44. vv = select(flags,v1,v);
  45. vNg = Vec3<vfloat<M>>(tri_Ng.x,tri_Ng.y,tri_Ng.z);
  46. #else
  47. const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f));
  48. vv = select(flags,u1,v);
  49. vu = select(flags,v1,u);
  50. vNg = Vec3<vfloat<M>>(flip*tri_Ng.x,flip*tri_Ng.y,flip*tri_Ng.z);
  51. #endif
  52. }
  53. __forceinline Vec2f uv(const size_t i)
  54. {
  55. const float u = vu[i];
  56. const float v = vv[i];
  57. return Vec2f(u,v);
  58. }
  59. __forceinline float t(const size_t i) { return vt[i]; }
  60. __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); }
  61. private:
  62. vfloat<M> U;
  63. vfloat<M> V;
  64. vfloat<M> T;
  65. vfloat<M> absDen;
  66. Vec3<vfloat<M>> tri_Ng;
  67. public:
  68. vbool<M> valid;
  69. vfloat<M> vu;
  70. vfloat<M> vv;
  71. vfloat<M> vt;
  72. Vec3<vfloat<M>> vNg;
  73. public:
  74. const vbool<M> flags;
  75. };
  76. template<int K>
  77. struct QuadHitK
  78. {
  79. __forceinline QuadHitK(const vfloat<K>& U,
  80. const vfloat<K>& V,
  81. const vfloat<K>& T,
  82. const vfloat<K>& absDen,
  83. const Vec3<vfloat<K>>& Ng,
  84. const vbool<K>& flags)
  85. : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng) {}
  86. __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3<vfloat<K>>> operator() () const
  87. {
  88. const vfloat<K> rcpAbsDen = rcp(absDen);
  89. const vfloat<K> t = T * rcpAbsDen;
  90. const vfloat<K> u0 = U * rcpAbsDen;
  91. const vfloat<K> v0 = V * rcpAbsDen;
  92. const vfloat<K> u1 = vfloat<K>(1.0f) - u0;
  93. const vfloat<K> v1 = vfloat<K>(1.0f) - v0;
  94. const vfloat<K> u = select(flags,u1,u0);
  95. const vfloat<K> v = select(flags,v1,v0);
  96. const Vec3<vfloat<K>> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z);
  97. return std::make_tuple(u,v,t,Ng);
  98. }
  99. private:
  100. const vfloat<K> U;
  101. const vfloat<K> V;
  102. const vfloat<K> T;
  103. const vfloat<K> absDen;
  104. const vbool<K> flags;
  105. const Vec3<vfloat<K>> tri_Ng;
  106. };
  107. /* ----------------------------- */
  108. /* -- single ray intersectors -- */
  109. /* ----------------------------- */
  110. template<int M, bool filter>
  111. struct QuadMIntersector1MoellerTrumbore;
  112. /*! Intersects M quads with 1 ray */
  113. template<int M, bool filter>
  114. struct QuadMIntersector1MoellerTrumbore
  115. {
  116. __forceinline QuadMIntersector1MoellerTrumbore() {}
  117. __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
  118. __forceinline void intersect(Ray& ray, IntersectContext* context,
  119. const Vec3<vfloat<M>>& v0, const Vec3<vfloat<M>>& v1, const Vec3<vfloat<M>>& v2, const Vec3<vfloat<M>>& v3,
  120. const vint<M>& geomID, const vint<M>& primID) const
  121. {
  122. MoellerTrumboreHitM<M> hit;
  123. MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
  124. Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
  125. /* intersect first triangle */
  126. if (intersector.intersect(ray,v0,v1,v3,hit))
  127. epilog(hit.valid,hit);
  128. /* intersect second triangle */
  129. if (intersector.intersect(ray,v2,v3,v1,hit))
  130. {
  131. hit.U = hit.absDen - hit.U;
  132. hit.V = hit.absDen - hit.V;
  133. epilog(hit.valid,hit);
  134. }
  135. }
  136. __forceinline bool occluded(Ray& ray, IntersectContext* context,
  137. const Vec3<vfloat<M>>& v0, const Vec3<vfloat<M>>& v1, const Vec3<vfloat<M>>& v2, const Vec3<vfloat<M>>& v3,
  138. const vint<M>& geomID, const vint<M>& primID) const
  139. {
  140. MoellerTrumboreHitM<M> hit;
  141. MoellerTrumboreIntersector1<M> intersector(ray,nullptr);
  142. Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID);
  143. /* intersect first triangle */
  144. if (intersector.intersect(ray,v0,v1,v3,hit))
  145. {
  146. if (epilog(hit.valid,hit))
  147. return true;
  148. }
  149. /* intersect second triangle */
  150. if (intersector.intersect(ray,v2,v3,v1,hit))
  151. {
  152. hit.U = hit.absDen - hit.U;
  153. hit.V = hit.absDen - hit.V;
  154. if (epilog(hit.valid,hit))
  155. return true;
  156. }
  157. return false;
  158. }
  159. };
  160. #if defined(__AVX512F__)
  161. /*! Intersects 4 quads with 1 ray using AVX512 */
  162. template<bool filter>
  163. struct QuadMIntersector1MoellerTrumbore<4,filter>
  164. {
  165. __forceinline QuadMIntersector1MoellerTrumbore() {}
  166. __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
  167. template<typename Epilog>
  168. __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
  169. {
  170. const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
  171. select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
  172. select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
  173. #if !defined(EMBREE_BACKFACE_CULLING)
  174. const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
  175. const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
  176. #else
  177. const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
  178. select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
  179. select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
  180. const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
  181. select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
  182. select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
  183. #endif
  184. const vbool16 flags(0xf0f0);
  185. MoellerTrumboreHitM<16> hit;
  186. MoellerTrumboreIntersector1<16> intersector(ray,nullptr);
  187. if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
  188. {
  189. vfloat16 U = hit.U, V = hit.V, absDen = hit.absDen;
  190. #if !defined(EMBREE_BACKFACE_CULLING)
  191. hit.U = select(flags,absDen-V,U);
  192. hit.V = select(flags,absDen-U,V);
  193. hit.vNg *= select(flags,vfloat16(-1.0f),vfloat16(1.0f)); // FIXME: use XOR
  194. #else
  195. hit.U = select(flags,absDen-U,U);
  196. hit.V = select(flags,absDen-V,V);
  197. #endif
  198. if (likely(epilog(hit.valid,hit)))
  199. return true;
  200. }
  201. return false;
  202. }
  203. __forceinline bool intersect(Ray& ray, IntersectContext* context,
  204. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  205. const vint4& geomID, const vint4& primID) const
  206. {
  207. return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vint8(geomID),vint8(primID)));
  208. }
  209. __forceinline bool occluded(Ray& ray, IntersectContext* context,
  210. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  211. const vint4& geomID, const vint4& primID) const
  212. {
  213. return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vint8(geomID),vint8(primID)));
  214. }
  215. };
  216. #elif defined (__AVX__)
  217. /*! Intersects 4 quads with 1 ray using AVX */
  218. template<bool filter>
  219. struct QuadMIntersector1MoellerTrumbore<4,filter>
  220. {
  221. __forceinline QuadMIntersector1MoellerTrumbore() {}
  222. __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {}
  223. template<typename Epilog>
  224. __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
  225. {
  226. const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
  227. #if !defined(EMBREE_BACKFACE_CULLING)
  228. const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
  229. const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
  230. #else
  231. const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
  232. const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
  233. #endif
  234. MoellerTrumboreHitM<8> hit;
  235. MoellerTrumboreIntersector1<8> intersector(ray,nullptr);
  236. const vbool8 flags(0,0,0,0,1,1,1,1);
  237. if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit)))
  238. {
  239. vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen;
  240. #if !defined(EMBREE_BACKFACE_CULLING)
  241. hit.U = select(flags,absDen-V,U);
  242. hit.V = select(flags,absDen-U,V);
  243. hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); // FIXME: use XOR
  244. #else
  245. hit.U = select(flags,absDen-U,U);
  246. hit.V = select(flags,absDen-V,V);
  247. #endif
  248. if (unlikely(epilog(hit.valid,hit)))
  249. return true;
  250. }
  251. return false;
  252. }
  253. __forceinline bool intersect(Ray& ray, IntersectContext* context,
  254. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  255. const vint4& geomID, const vint4& primID) const
  256. {
  257. return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vint8(geomID),vint8(primID)));
  258. }
  259. __forceinline bool occluded(Ray& ray, IntersectContext* context,
  260. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  261. const vint4& geomID, const vint4& primID) const
  262. {
  263. return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vint8(geomID),vint8(primID)));
  264. }
  265. };
  266. #endif
  267. /* ----------------------------- */
  268. /* -- ray packet intersectors -- */
  269. /* ----------------------------- */
  270. struct MoellerTrumboreIntersector1KTriangleM
  271. {
  272. /*! Intersect k'th ray from ray packet of size K with M triangles. */
  273. template<int M, int K, typename Epilog>
  274. static __forceinline bool intersect(RayK<K>& ray,
  275. size_t k,
  276. const Vec3<vfloat<M>>& tri_v0,
  277. const Vec3<vfloat<M>>& tri_e1,
  278. const Vec3<vfloat<M>>& tri_e2,
  279. const Vec3<vfloat<M>>& tri_Ng,
  280. const vbool<M>& flags,
  281. const Epilog& epilog)
  282. {
  283. /* calculate denominator */
  284. typedef Vec3<vfloat<M>> Vec3vfM;
  285. const Vec3vfM O = broadcast<vfloat<M>>(ray.org,k);
  286. const Vec3vfM D = broadcast<vfloat<M>>(ray.dir,k);
  287. const Vec3vfM C = Vec3vfM(tri_v0) - O;
  288. const Vec3vfM R = cross(D,C);
  289. const vfloat<M> den = dot(Vec3vfM(tri_Ng),D);
  290. const vfloat<M> absDen = abs(den);
  291. const vfloat<M> sgnDen = signmsk(den);
  292. /* perform edge tests */
  293. const vfloat<M> U = dot(R,Vec3vfM(tri_e2)) ^ sgnDen;
  294. const vfloat<M> V = dot(R,Vec3vfM(tri_e1)) ^ sgnDen;
  295. /* perform backface culling */
  296. #if defined(EMBREE_BACKFACE_CULLING)
  297. vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
  298. #else
  299. vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen);
  300. #endif
  301. if (likely(none(valid))) return false;
  302. /* perform depth test */
  303. const vfloat<M> T = dot(Vec3vfM(tri_Ng),C) ^ sgnDen;
  304. valid &= (T > absDen*vfloat<M>(ray.tnear[k])) & (T < absDen*vfloat<M>(ray.tfar[k]));
  305. if (likely(none(valid))) return false;
  306. /* calculate hit information */
  307. QuadHitM<M> hit(valid,U,V,T,absDen,tri_Ng,flags);
  308. return epilog(valid,hit);
  309. }
  310. template<int M, int K, typename Epilog>
  311. static __forceinline bool intersect1(RayK<K>& ray,
  312. size_t k,
  313. const Vec3<vfloat<M>>& v0,
  314. const Vec3<vfloat<M>>& v1,
  315. const Vec3<vfloat<M>>& v2,
  316. const vbool<M>& flags,
  317. const Epilog& epilog)
  318. {
  319. const Vec3<vfloat<M>> e1 = v0-v1;
  320. const Vec3<vfloat<M>> e2 = v2-v0;
  321. const Vec3<vfloat<M>> Ng = cross(e1,e2);
  322. return intersect(ray,k,v0,e1,e2,Ng,flags,epilog);
  323. }
  324. };
  325. template<int M, int K, bool filter>
  326. struct QuadMIntersectorKMoellerTrumboreBase
  327. {
  328. __forceinline QuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {}
  329. /*! Intersects K rays with one of M triangles. */
  330. template<typename Epilog>
  331. __forceinline vbool<K> intersectK(const vbool<K>& valid0,
  332. RayK<K>& ray,
  333. const Vec3<vfloat<K>>& tri_v0,
  334. const Vec3<vfloat<K>>& tri_e1,
  335. const Vec3<vfloat<K>>& tri_e2,
  336. const Vec3<vfloat<K>>& tri_Ng,
  337. const vbool<K>& flags,
  338. const Epilog& epilog) const
  339. {
  340. /* type shortcuts */
  341. typedef Vec3<vfloat<K>> Vec3vfK;
  342. /* calculate denominator */
  343. vbool<K> valid = valid0;
  344. const Vec3vfK C = tri_v0 - ray.org;
  345. const Vec3vfK R = cross(ray.dir,C);
  346. const vfloat<K> den = dot(tri_Ng,ray.dir);
  347. const vfloat<K> absDen = abs(den);
  348. const vfloat<K> sgnDen = signmsk(den);
  349. /* test against edge p2 p0 */
  350. const vfloat<K> U = dot(R,tri_e2) ^ sgnDen;
  351. valid &= U >= 0.0f;
  352. if (likely(none(valid))) return false;
  353. /* test against edge p0 p1 */
  354. const vfloat<K> V = dot(R,tri_e1) ^ sgnDen;
  355. valid &= V >= 0.0f;
  356. if (likely(none(valid))) return false;
  357. /* test against edge p1 p2 */
  358. const vfloat<K> W = absDen-U-V;
  359. valid &= W >= 0.0f;
  360. if (likely(none(valid))) return false;
  361. /* perform depth test */
  362. const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen;
  363. valid &= (T >= absDen*ray.tnear) & (absDen*ray.tfar >= T);
  364. if (unlikely(none(valid))) return false;
  365. /* perform backface culling */
  366. #if defined(EMBREE_BACKFACE_CULLING)
  367. valid &= den < vfloat<K>(zero);
  368. if (unlikely(none(valid))) return false;
  369. #else
  370. valid &= den != vfloat<K>(zero);
  371. if (unlikely(none(valid))) return false;
  372. #endif
  373. /* calculate hit information */
  374. QuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags);
  375. return epilog(valid,hit);
  376. }
  377. /*! Intersects K rays with one of M quads. */
  378. template<typename Epilog>
  379. __forceinline vbool<K> intersectK(const vbool<K>& valid0,
  380. RayK<K>& ray,
  381. const Vec3<vfloat<K>>& tri_v0,
  382. const Vec3<vfloat<K>>& tri_v1,
  383. const Vec3<vfloat<K>>& tri_v2,
  384. const vbool<K>& flags,
  385. const Epilog& epilog) const
  386. {
  387. typedef Vec3<vfloat<K>> Vec3vfK;
  388. const Vec3vfK e1 = tri_v0-tri_v1;
  389. const Vec3vfK e2 = tri_v2-tri_v0;
  390. const Vec3vfK Ng = cross(e1,e2);
  391. return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,epilog);
  392. }
  393. /*! Intersects K rays with one of M quads. */
  394. template<typename Epilog>
  395. __forceinline bool intersectK(const vbool<K>& valid0,
  396. RayK<K>& ray,
  397. const Vec3<vfloat<K>>& v0,
  398. const Vec3<vfloat<K>>& v1,
  399. const Vec3<vfloat<K>>& v2,
  400. const Vec3<vfloat<K>>& v3,
  401. const Epilog& epilog) const
  402. {
  403. intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),epilog);
  404. if (none(valid0)) return true;
  405. intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),epilog);
  406. return none(valid0);
  407. }
  408. };
  409. template<int M, int K, bool filter>
  410. struct QuadMIntersectorKMoellerTrumbore : public QuadMIntersectorKMoellerTrumboreBase<M,K,filter>
  411. {
  412. __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
  413. : QuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {}
  414. __forceinline void intersect1(RayK<K>& ray, size_t k, IntersectContext* context,
  415. const Vec3<vfloat<M>>& v0, const Vec3<vfloat<M>>& v1, const Vec3<vfloat<M>>& v2, const Vec3<vfloat<M>>& v3,
  416. const vint<M>& geomID, const vint<M>& primID) const
  417. {
  418. Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
  419. MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog);
  420. MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog);
  421. }
  422. __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
  423. const Vec3<vfloat<M>>& v0, const Vec3<vfloat<M>>& v1, const Vec3<vfloat<M>>& v2, const Vec3<vfloat<M>>& v3,
  424. const vint<M>& geomID, const vint<M>& primID) const
  425. {
  426. Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID);
  427. if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true;
  428. if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true;
  429. return false;
  430. }
  431. };
  432. #if defined(__AVX512F__)
  433. /*! Intersects 4 quads with 1 ray using AVX512 */
  434. template<int K, bool filter>
  435. struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
  436. {
  437. __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
  438. : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
  439. template<typename Epilog>
  440. __forceinline bool intersect1(RayK<K>& ray, size_t k,
  441. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
  442. {
  443. const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)),
  444. select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)),
  445. select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z)));
  446. #if !defined(EMBREE_BACKFACE_CULLING)
  447. const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z));
  448. const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z));
  449. #else
  450. const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)),
  451. select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)),
  452. select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z)));
  453. const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)),
  454. select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)),
  455. select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z)));
  456. #endif
  457. const vbool16 flags(0xf0f0);
  458. return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
  459. }
  460. __forceinline bool intersect1(RayK<K>& ray, size_t k, IntersectContext* context,
  461. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  462. const vint4& geomID, const vint4& primID) const
  463. {
  464. return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vint8(geomID),vint8(primID)));
  465. }
  466. __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
  467. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  468. const vint4& geomID, const vint4& primID) const
  469. {
  470. return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vint8(geomID),vint8(primID)));
  471. }
  472. };
  473. #elif defined (__AVX__)
  474. /*! Intersects 4 quads with 1 ray using AVX */
  475. template<int K, bool filter>
  476. struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter>
  477. {
  478. __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray)
  479. : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {}
  480. template<typename Epilog>
  481. __forceinline bool intersect1(RayK<K>& ray, size_t k,
  482. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const
  483. {
  484. const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z));
  485. #if !defined(EMBREE_BACKFACE_CULLING)
  486. const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z));
  487. const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z));
  488. #else
  489. const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z));
  490. const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z));
  491. #endif
  492. const vbool8 flags(0,0,0,0,1,1,1,1);
  493. return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog);
  494. }
  495. __forceinline bool intersect1(RayK<K>& ray, size_t k, IntersectContext* context,
  496. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  497. const vint4& geomID, const vint4& primID) const
  498. {
  499. return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vint8(geomID),vint8(primID)));
  500. }
  501. __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context,
  502. const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3,
  503. const vint4& geomID, const vint4& primID) const
  504. {
  505. return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vint8(geomID),vint8(primID)));
  506. }
  507. };
  508. #endif
  509. }
  510. }