node_intersector_packet.h 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "node_intersector.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. //////////////////////////////////////////////////////////////////////////////////////
  10. // Ray packet structure used in hybrid traversal
  11. //////////////////////////////////////////////////////////////////////////////////////
  12. template<int K, bool robust>
  13. struct TravRayK;
  14. /* Fast variant */
  15. template<int K>
  16. struct TravRayK<K, false>
  17. {
  18. __forceinline TravRayK() {}
  19. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  20. {
  21. init(ray_org, ray_dir, N);
  22. }
  23. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
  24. {
  25. init(ray_org, ray_dir, N);
  26. tnear = ray_tnear;
  27. tfar = ray_tfar;
  28. }
  29. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  30. {
  31. org = ray_org;
  32. dir = ray_dir;
  33. rdir = rcp_safe(ray_dir);
  34. #if defined(__AVX2__) || defined(__ARM_NEON)
  35. org_rdir = org * rdir;
  36. #endif
  37. if (N)
  38. {
  39. const int size = sizeof(float)*N;
  40. nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
  41. nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
  42. nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
  43. }
  44. }
  45. Vec3vf<K> org;
  46. Vec3vf<K> dir;
  47. Vec3vf<K> rdir;
  48. #if defined(__AVX2__) || defined(__ARM_NEON)
  49. Vec3vf<K> org_rdir;
  50. #endif
  51. Vec3vi<K> nearXYZ;
  52. vfloat<K> tnear;
  53. vfloat<K> tfar;
  54. };
  55. template<int K>
  56. using TravRayKFast = TravRayK<K, false>;
  57. /* Robust variant */
  58. template<int K>
  59. struct TravRayK<K, true>
  60. {
  61. __forceinline TravRayK() {}
  62. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  63. {
  64. init(ray_org, ray_dir, N);
  65. }
  66. __forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
  67. {
  68. init(ray_org, ray_dir, N);
  69. tnear = ray_tnear;
  70. tfar = ray_tfar;
  71. }
  72. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
  73. {
  74. org = ray_org;
  75. dir = ray_dir;
  76. rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
  77. if (N)
  78. {
  79. const int size = sizeof(float)*N;
  80. nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
  81. nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
  82. nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
  83. }
  84. }
  85. Vec3vf<K> org;
  86. Vec3vf<K> dir;
  87. Vec3vf<K> rdir;
  88. Vec3vi<K> nearXYZ;
  89. vfloat<K> tnear;
  90. vfloat<K> tfar;
  91. };
  92. template<int K>
  93. using TravRayKRobust = TravRayK<K, true>;
  94. //////////////////////////////////////////////////////////////////////////////////////
  95. // Fast AABBNode intersection
  96. //////////////////////////////////////////////////////////////////////////////////////
  97. template<int N, int K>
  98. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
  99. const TravRayKFast<K>& ray, vfloat<K>& dist)
  100. {
  101. #if defined(__AVX2__) || defined(__ARM_NEON)
  102. const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
  103. const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
  104. const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
  105. const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
  106. const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
  107. const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
  108. #else
  109. const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
  110. const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
  111. const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
  112. const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
  113. const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
  114. const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
  115. #endif
  116. #if defined(__AVX512F__) // SKX
  117. if (K == 16)
  118. {
  119. /* use mixed float/int min/max */
  120. const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  121. const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  122. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  123. dist = lnearP;
  124. return lhit;
  125. }
  126. else
  127. #endif
  128. {
  129. const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  130. const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  131. #if defined(__AVX512F__) // SKX
  132. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  133. #else
  134. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  135. #endif
  136. dist = lnearP;
  137. return lhit;
  138. }
  139. }
  140. //////////////////////////////////////////////////////////////////////////////////////
  141. // Robust AABBNode intersection
  142. //////////////////////////////////////////////////////////////////////////////////////
  143. template<int N, int K>
  144. __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
  145. const TravRayKRobust<K>& ray, vfloat<K>& dist)
  146. {
  147. // FIXME: use per instruction rounding for AVX512
  148. const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
  149. const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
  150. const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
  151. const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
  152. const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
  153. const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
  154. const float round_up = 1.0f+3.0f*float(ulp);
  155. const float round_down = 1.0f-3.0f*float(ulp);
  156. const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
  157. const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
  158. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  159. dist = lnearP;
  160. return lhit;
  161. }
  162. //////////////////////////////////////////////////////////////////////////////////////
  163. // Fast AABBNodeMB intersection
  164. //////////////////////////////////////////////////////////////////////////////////////
  165. template<int N, int K>
  166. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
  167. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  168. {
  169. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  170. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  171. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  172. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  173. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  174. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  175. #if defined(__AVX2__) || defined(__ARM_NEON)
  176. const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
  177. const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
  178. const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
  179. const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
  180. const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
  181. const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
  182. #else
  183. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  184. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  185. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  186. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  187. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  188. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  189. #endif
  190. #if defined(__AVX512F__) // SKX
  191. if (K == 16)
  192. {
  193. /* use mixed float/int min/max */
  194. const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  195. const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  196. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  197. dist = lnearP;
  198. return lhit;
  199. }
  200. else
  201. #endif
  202. {
  203. const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  204. const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  205. #if defined(__AVX512F__) // SKX
  206. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  207. #else
  208. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  209. #endif
  210. dist = lnearP;
  211. return lhit;
  212. }
  213. }
  214. //////////////////////////////////////////////////////////////////////////////////////
  215. // Robust AABBNodeMB intersection
  216. //////////////////////////////////////////////////////////////////////////////////////
  217. template<int N, int K>
  218. __forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
  219. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  220. {
  221. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  222. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  223. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  224. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  225. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  226. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  227. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  228. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  229. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  230. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  231. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  232. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  233. const float round_up = 1.0f+3.0f*float(ulp);
  234. const float round_down = 1.0f-3.0f*float(ulp);
  235. #if defined(__AVX512F__) // SKX
  236. if (K == 16)
  237. {
  238. const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  239. const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  240. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  241. dist = lnearP;
  242. return lhit;
  243. }
  244. else
  245. #endif
  246. {
  247. const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  248. const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  249. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  250. dist = lnearP;
  251. return lhit;
  252. }
  253. }
  254. //////////////////////////////////////////////////////////////////////////////////////
  255. // Fast AABBNodeMB4D intersection
  256. //////////////////////////////////////////////////////////////////////////////////////
  257. template<int N, int K>
  258. __forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
  259. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  260. {
  261. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  262. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  263. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  264. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  265. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  266. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  267. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  268. #if defined(__AVX2__) || defined(__ARM_NEON)
  269. const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
  270. const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
  271. const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
  272. const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
  273. const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
  274. const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
  275. #else
  276. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  277. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  278. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  279. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  280. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  281. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  282. #endif
  283. const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
  284. const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
  285. vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  286. if (unlikely(ref.isAABBNodeMB4D())) {
  287. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  288. lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
  289. }
  290. dist = lnearP;
  291. return lhit;
  292. }
  293. //////////////////////////////////////////////////////////////////////////////////////
  294. // Robust AABBNodeMB4D intersection
  295. //////////////////////////////////////////////////////////////////////////////////////
  296. template<int N, int K>
  297. __forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
  298. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
  299. {
  300. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  301. const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
  302. const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
  303. const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
  304. const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
  305. const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
  306. const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
  307. const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
  308. const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
  309. const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
  310. const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
  311. const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
  312. const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
  313. const float round_up = 1.0f+3.0f*float(ulp);
  314. const float round_down = 1.0f-3.0f*float(ulp);
  315. const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
  316. const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
  317. vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  318. if (unlikely(ref.isAABBNodeMB4D())) {
  319. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  320. lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
  321. }
  322. dist = lnearP;
  323. return lhit;
  324. }
  325. //////////////////////////////////////////////////////////////////////////////////////
  326. // Fast OBBNode intersection
  327. //////////////////////////////////////////////////////////////////////////////////////
  328. template<int N, int K, bool robust>
  329. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
  330. const TravRayK<K,robust>& ray, vfloat<K>& dist)
  331. {
  332. const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
  333. Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
  334. Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
  335. Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
  336. const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
  337. const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
  338. const Vec3vf<K> org = xfmPoint(naabb, ray.org);
  339. const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
  340. const vfloat<K> lclipMinY = org.y * nrdir.y;
  341. const vfloat<K> lclipMinZ = org.z * nrdir.z;
  342. const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
  343. const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
  344. const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
  345. vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  346. vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  347. if (robust) {
  348. lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
  349. lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
  350. }
  351. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  352. dist = lnearP;
  353. return lhit;
  354. }
  355. //////////////////////////////////////////////////////////////////////////////////////
  356. // Fast OBBNodeMB intersection
  357. //////////////////////////////////////////////////////////////////////////////////////
  358. template<int N, int K, bool robust>
  359. __forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
  360. const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
  361. {
  362. const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
  363. Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
  364. Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
  365. Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
  366. const Vec3vf<K> b0_lower = zero;
  367. const Vec3vf<K> b0_upper = one;
  368. const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
  369. const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
  370. const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
  371. const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
  372. const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
  373. const Vec3vf<K> rdir = rcp_safe(dir);
  374. const Vec3vf<K> org = xfmPoint(xfm, ray.org);
  375. const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
  376. const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
  377. const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
  378. const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
  379. const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
  380. const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
  381. vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  382. vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  383. if (robust) {
  384. lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
  385. lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
  386. }
  387. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  388. dist = lnearP;
  389. return lhit;
  390. }
  391. //////////////////////////////////////////////////////////////////////////////////////
  392. // QuantizedBaseNode intersection
  393. //////////////////////////////////////////////////////////////////////////////////////
  394. template<int N, int K>
  395. __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
  396. const TravRayK<K,false>& ray, vfloat<K>& dist)
  397. {
  398. assert(movemask(node->validMask()) & ((size_t)1 << i));
  399. const vfloat<N> lower_x = node->dequantizeLowerX();
  400. const vfloat<N> upper_x = node->dequantizeUpperX();
  401. const vfloat<N> lower_y = node->dequantizeLowerY();
  402. const vfloat<N> upper_y = node->dequantizeUpperY();
  403. const vfloat<N> lower_z = node->dequantizeLowerZ();
  404. const vfloat<N> upper_z = node->dequantizeUpperZ();
  405. #if defined(__AVX2__) || defined(__ARM_NEON)
  406. const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
  407. const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
  408. const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
  409. const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
  410. const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
  411. const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
  412. #else
  413. const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
  414. const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
  415. const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
  416. const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
  417. const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
  418. const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
  419. #endif
  420. #if defined(__AVX512F__) // SKX
  421. if (K == 16)
  422. {
  423. /* use mixed float/int min/max */
  424. const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  425. const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  426. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  427. dist = lnearP;
  428. return lhit;
  429. }
  430. else
  431. #endif
  432. {
  433. const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
  434. const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
  435. #if defined(__AVX512F__) // SKX
  436. const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
  437. #else
  438. const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
  439. #endif
  440. dist = lnearP;
  441. return lhit;
  442. }
  443. }
  444. template<int N, int K>
  445. __forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
  446. const TravRayK<K,true>& ray, vfloat<K>& dist)
  447. {
  448. assert(movemask(node->validMask()) & ((size_t)1 << i));
  449. const vfloat<N> lower_x = node->dequantizeLowerX();
  450. const vfloat<N> upper_x = node->dequantizeUpperX();
  451. const vfloat<N> lower_y = node->dequantizeLowerY();
  452. const vfloat<N> upper_y = node->dequantizeUpperY();
  453. const vfloat<N> lower_z = node->dequantizeLowerZ();
  454. const vfloat<N> upper_z = node->dequantizeUpperZ();
  455. const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
  456. const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
  457. const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
  458. const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
  459. const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
  460. const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
  461. const float round_up = 1.0f+3.0f*float(ulp);
  462. const float round_down = 1.0f-3.0f*float(ulp);
  463. const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  464. const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  465. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  466. dist = lnearP;
  467. return lhit;
  468. }
  469. template<int N, int K>
  470. __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  471. const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
  472. {
  473. assert(movemask(node->validMask()) & ((size_t)1 << i));
  474. const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
  475. const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
  476. const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
  477. const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
  478. const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
  479. const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
  480. #if defined(__AVX2__) || defined(__ARM_NEON)
  481. const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
  482. const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
  483. const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
  484. const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
  485. const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
  486. const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
  487. #else
  488. const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
  489. const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
  490. const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
  491. const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
  492. const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
  493. const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
  494. #endif
  495. const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  496. const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  497. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  498. dist = lnearP;
  499. return lhit;
  500. }
  501. template<int N, int K>
  502. __forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  503. const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
  504. {
  505. assert(movemask(node->validMask()) & ((size_t)1 << i));
  506. const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
  507. const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
  508. const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
  509. const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
  510. const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
  511. const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
  512. const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
  513. const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
  514. const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
  515. const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
  516. const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
  517. const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
  518. const float round_up = 1.0f+3.0f*float(ulp);
  519. const float round_down = 1.0f-3.0f*float(ulp);
  520. const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
  521. const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
  522. const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
  523. dist = lnearP;
  524. return lhit;
  525. }
  526. //////////////////////////////////////////////////////////////////////////////////////
  527. // Node intersectors used in hybrid traversal
  528. //////////////////////////////////////////////////////////////////////////////////////
  529. /*! Intersects N nodes with K rays */
  530. template<int N, int K, int types, bool robust>
  531. struct BVHNNodeIntersectorK;
  532. template<int N, int K>
  533. struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
  534. {
  535. /* vmask is both an input and an output parameter! Its initial value should be the parent node
  536. hit mask, which is used for correctly computing the current hit mask. The parent hit mask
  537. is actually required only for motion blur node intersections (because different rays may
  538. have different times), so for regular nodes vmask is simply overwritten. */
  539. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  540. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  541. {
  542. vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
  543. return true;
  544. }
  545. };
  546. template<int N, int K>
  547. struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
  548. {
  549. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  550. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  551. {
  552. vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
  553. return true;
  554. }
  555. };
  556. template<int N, int K>
  557. struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
  558. {
  559. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  560. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  561. {
  562. vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  563. return true;
  564. }
  565. };
  566. template<int N, int K>
  567. struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
  568. {
  569. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  570. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  571. {
  572. vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  573. return true;
  574. }
  575. };
  576. template<int N, int K>
  577. struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
  578. {
  579. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  580. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  581. {
  582. if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
  583. else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
  584. return true;
  585. }
  586. };
  587. template<int N, int K>
  588. struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
  589. {
  590. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  591. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  592. {
  593. if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
  594. else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
  595. return true;
  596. }
  597. };
  598. template<int N, int K>
  599. struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
  600. {
  601. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  602. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  603. {
  604. if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  605. else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  606. return true;
  607. }
  608. };
  609. template<int N, int K>
  610. struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
  611. {
  612. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  613. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  614. {
  615. if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
  616. else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  617. return true;
  618. }
  619. };
  620. template<int N, int K>
  621. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
  622. {
  623. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  624. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  625. {
  626. vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
  627. return true;
  628. }
  629. };
  630. template<int N, int K>
  631. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
  632. {
  633. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  634. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  635. {
  636. vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
  637. return true;
  638. }
  639. };
  640. template<int N, int K>
  641. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
  642. {
  643. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  644. const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  645. {
  646. if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
  647. vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
  648. } else /*if (unlikely(node.isOBBNodeMB()))*/ {
  649. assert(node.isOBBNodeMB());
  650. vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  651. }
  652. return true;
  653. }
  654. };
  655. template<int N, int K>
  656. struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
  657. {
  658. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
  659. const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
  660. {
  661. if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
  662. vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
  663. } else /*if (unlikely(node.isOBBNodeMB()))*/ {
  664. assert(node.isOBBNodeMB());
  665. vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
  666. }
  667. return true;
  668. }
  669. };
  670. /*! Intersects N nodes with K rays */
  671. template<int N, int K, bool robust>
  672. struct BVHNQuantizedBaseNodeIntersectorK;
  673. template<int N, int K>
  674. struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
  675. {
  676. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
  677. const TravRayK<K,false>& ray, vfloat<K>& dist)
  678. {
  679. return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
  680. }
  681. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  682. const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
  683. {
  684. return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
  685. }
  686. };
  687. template<int N, int K>
  688. struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
  689. {
  690. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
  691. const TravRayK<K,true>& ray, vfloat<K>& dist)
  692. {
  693. return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
  694. }
  695. static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
  696. const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
  697. {
  698. return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
  699. }
  700. };
  701. }
  702. }