bvh_intersector_hybrid.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "bvh_intersector_hybrid.h"
  4. #include "bvh_traverser1.h"
  5. #include "node_intersector1.h"
  6. #include "node_intersector_packet.h"
  7. #include "../geometry/intersector_iterators.h"
  8. #include "../geometry/triangle_intersector.h"
  9. #include "../geometry/trianglev_intersector.h"
  10. #include "../geometry/trianglev_mb_intersector.h"
  11. #include "../geometry/trianglei_intersector.h"
  12. #include "../geometry/quadv_intersector.h"
  13. #include "../geometry/quadi_intersector.h"
  14. #include "../geometry/curveNv_intersector.h"
  15. #include "../geometry/curveNi_intersector.h"
  16. #include "../geometry/curveNi_mb_intersector.h"
  17. #include "../geometry/linei_intersector.h"
  18. #include "../geometry/subdivpatch1_intersector.h"
  19. #include "../geometry/object_intersector.h"
  20. #include "../geometry/instance_intersector.h"
  21. #include "../geometry/instance_array_intersector.h"
  22. #include "../geometry/subgrid_intersector.h"
  23. #include "../geometry/subgrid_mb_intersector.h"
  24. #include "../geometry/curve_intersector_virtual.h"
  25. #define SWITCH_DURING_DOWN_TRAVERSAL 1
  26. #define FORCE_SINGLE_MODE 0
  27. #define ENABLE_FAST_COHERENT_CODEPATHS 1
  28. namespace embree
  29. {
  30. namespace isa
  31. {
  32. template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
  33. void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
  34. const BVH* bvh,
  35. NodeRef root,
  36. size_t k,
  37. Precalculations& pre,
  38. RayHitK<K>& ray,
  39. const TravRayK<K, robust>& tray,
  40. RayQueryContext* context)
  41. {
  42. /* stack state */
  43. StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
  44. StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
  45. StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
  46. stack[0].ptr = root;
  47. stack[0].dist = neg_inf;
  48. /* load the ray into SIMD registers */
  49. TravRay<N,robust> tray1;
  50. tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
  51. /* pop loop */
  52. while (true) pop:
  53. {
  54. /* pop next node */
  55. if (unlikely(stackPtr == stack)) break;
  56. stackPtr--;
  57. NodeRef cur = NodeRef(stackPtr->ptr);
  58. /* if popped node is too far, pop next one */
  59. if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
  60. continue;
  61. /* downtraversal loop */
  62. while (true)
  63. {
  64. /* intersect node */
  65. size_t mask; vfloat<N> tNear;
  66. STAT3(normal.trav_nodes, 1, 1, 1);
  67. bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
  68. if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
  69. /* if no child is hit, pop next node */
  70. if (unlikely(mask == 0))
  71. goto pop;
  72. /* select next child and push other children */
  73. BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
  74. }
  75. /* this is a leaf node */
  76. assert(cur != BVH::emptyNode);
  77. STAT3(normal.trav_leaves, 1, 1, 1);
  78. size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
  79. size_t lazy_node = 0;
  80. PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
  81. tray1.tfar = ray.tfar[k];
  82. if (unlikely(lazy_node)) {
  83. stackPtr->ptr = lazy_node;
  84. stackPtr->dist = neg_inf;
  85. stackPtr++;
  86. }
  87. }
  88. }
  89. template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
  90. void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
  91. Accel::Intersectors* __restrict__ This,
  92. RayHitK<K>& __restrict__ ray,
  93. RayQueryContext* __restrict__ context)
  94. {
  95. BVH* __restrict__ bvh = (BVH*)This->ptr;
  96. /* we may traverse an empty BVH in case all geometry was invalid */
  97. if (bvh->root == BVH::emptyNode)
  98. return;
  99. #if ENABLE_FAST_COHERENT_CODEPATHS == 1
  100. assert(context);
  101. if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
  102. {
  103. intersectCoherent(valid_i, This, ray, context);
  104. return;
  105. }
  106. #endif
  107. /* filter out invalid rays */
  108. vbool<K> valid = *valid_i == -1;
  109. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  110. valid &= ray.valid();
  111. #endif
  112. /* return if there are no valid rays */
  113. size_t valid_bits = movemask(valid);
  114. #if defined(__AVX__)
  115. STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
  116. #endif
  117. if (unlikely(valid_bits == 0)) return;
  118. /* verify correct input */
  119. assert(all(valid, ray.valid()));
  120. assert(all(valid, ray.tnear() >= 0.0f));
  121. assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
  122. Precalculations pre(valid, ray);
  123. /* load ray */
  124. TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
  125. const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
  126. const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
  127. if (single)
  128. {
  129. tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
  130. tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
  131. for (; valid_bits!=0; ) {
  132. const size_t i = bscf(valid_bits);
  133. intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
  134. }
  135. return;
  136. }
  137. /* determine switch threshold based on flags */
  138. const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
  139. vint<K> octant = ray.octant();
  140. octant = select(valid, octant, vint<K>(0xffffffff));
  141. /* test whether we have ray with opposing direction signs in the packet */
  142. bool split = false;
  143. {
  144. size_t bits = valid_bits;
  145. vbool<K> vsplit( false );
  146. do
  147. {
  148. const size_t valid_index = bsf(bits);
  149. vbool<K> octant_valid = octant[valid_index] == octant;
  150. bits &= ~(size_t)movemask(octant_valid);
  151. vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
  152. } while (bits);
  153. if (any(vsplit)) split = true;
  154. }
  155. do
  156. {
  157. const size_t valid_index = bsf(valid_bits);
  158. const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
  159. const vint<K> count_diff_octant = \
  160. ((diff_octant >> 2) & 1) +
  161. ((diff_octant >> 1) & 1) +
  162. ((diff_octant >> 0) & 1);
  163. vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
  164. if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
  165. octant = select(octant_valid,vint<K>(0xffffffff),octant);
  166. valid_bits &= ~(size_t)movemask(octant_valid);
  167. tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
  168. tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
  169. /* allocate stack and push root node */
  170. vfloat<K> stack_near[stackSizeChunk];
  171. NodeRef stack_node[stackSizeChunk];
  172. stack_node[0] = BVH::invalidNode;
  173. stack_near[0] = inf;
  174. stack_node[1] = bvh->root;
  175. stack_near[1] = tray.tnear;
  176. NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
  177. NodeRef* __restrict__ sptr_node = stack_node + 2;
  178. vfloat<K>* __restrict__ sptr_near = stack_near + 2;
  179. while (1) pop:
  180. {
  181. /* pop next node from stack */
  182. assert(sptr_node > stack_node);
  183. sptr_node--;
  184. sptr_near--;
  185. NodeRef cur = *sptr_node;
  186. if (unlikely(cur == BVH::invalidNode)) {
  187. assert(sptr_node == stack_node);
  188. break;
  189. }
  190. /* cull node if behind closest hit point */
  191. vfloat<K> curDist = *sptr_near;
  192. const vbool<K> active = curDist < tray.tfar;
  193. if (unlikely(none(active)))
  194. continue;
  195. /* switch to single ray traversal */
  196. #if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
  197. #if FORCE_SINGLE_MODE == 0
  198. if (single)
  199. #endif
  200. {
  201. size_t bits = movemask(active);
  202. #if FORCE_SINGLE_MODE == 0
  203. if (unlikely(popcnt(bits) <= switchThreshold))
  204. #endif
  205. {
  206. for (; bits!=0; ) {
  207. const size_t i = bscf(bits);
  208. intersect1(This, bvh, cur, i, pre, ray, tray, context);
  209. }
  210. tray.tfar = min(tray.tfar, ray.tfar);
  211. continue;
  212. }
  213. }
  214. #endif
  215. while (likely(!cur.isLeaf()))
  216. {
  217. /* process nodes */
  218. const vbool<K> valid_node = tray.tfar > curDist;
  219. STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
  220. const NodeRef nodeRef = cur;
  221. const BaseNode* __restrict__ const node = nodeRef.baseNode();
  222. /* set cur to invalid */
  223. cur = BVH::emptyNode;
  224. curDist = pos_inf;
  225. size_t num_child_hits = 0;
  226. for (unsigned i = 0; i < N; i++)
  227. {
  228. const NodeRef child = node->children[i];
  229. if (unlikely(child == BVH::emptyNode)) break;
  230. vfloat<K> lnearP;
  231. vbool<K> lhit = valid_node;
  232. BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
  233. /* if we hit the child we choose to continue with that child if it
  234. is closer than the current next child, or we push it onto the stack */
  235. if (likely(any(lhit)))
  236. {
  237. assert(sptr_node < stackEnd);
  238. assert(child != BVH::emptyNode);
  239. const vfloat<K> childDist = select(lhit, lnearP, inf);
  240. /* push cur node onto stack and continue with hit child */
  241. if (any(childDist < curDist))
  242. {
  243. if (likely(cur != BVH::emptyNode)) {
  244. num_child_hits++;
  245. *sptr_node = cur; sptr_node++;
  246. *sptr_near = curDist; sptr_near++;
  247. }
  248. curDist = childDist;
  249. cur = child;
  250. }
  251. /* push hit child onto stack */
  252. else {
  253. num_child_hits++;
  254. *sptr_node = child; sptr_node++;
  255. *sptr_near = childDist; sptr_near++;
  256. }
  257. }
  258. }
  259. #if defined(__AVX__)
  260. //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
  261. #endif
  262. if (unlikely(cur == BVH::emptyNode))
  263. goto pop;
  264. /* improved distance sorting for 3 or more hits */
  265. if (unlikely(num_child_hits >= 2))
  266. {
  267. if (any(sptr_near[-2] < sptr_near[-1]))
  268. {
  269. std::swap(sptr_near[-2],sptr_near[-1]);
  270. std::swap(sptr_node[-2],sptr_node[-1]);
  271. }
  272. if (unlikely(num_child_hits >= 3))
  273. {
  274. if (any(sptr_near[-3] < sptr_near[-1]))
  275. {
  276. std::swap(sptr_near[-3],sptr_near[-1]);
  277. std::swap(sptr_node[-3],sptr_node[-1]);
  278. }
  279. if (any(sptr_near[-3] < sptr_near[-2]))
  280. {
  281. std::swap(sptr_near[-3],sptr_near[-2]);
  282. std::swap(sptr_node[-3],sptr_node[-2]);
  283. }
  284. }
  285. }
  286. #if SWITCH_DURING_DOWN_TRAVERSAL == 1
  287. if (single)
  288. {
  289. // seems to be the best place for testing utilization
  290. if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
  291. {
  292. *sptr_node++ = cur;
  293. *sptr_near++ = curDist;
  294. goto pop;
  295. }
  296. }
  297. #endif
  298. }
  299. /* return if stack is empty */
  300. if (unlikely(cur == BVH::invalidNode)) {
  301. assert(sptr_node == stack_node);
  302. break;
  303. }
  304. /* intersect leaf */
  305. assert(cur != BVH::emptyNode);
  306. const vbool<K> valid_leaf = tray.tfar > curDist;
  307. STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
  308. if (unlikely(none(valid_leaf))) continue;
  309. size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
  310. size_t lazy_node = 0;
  311. PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
  312. tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
  313. if (unlikely(lazy_node)) {
  314. *sptr_node = lazy_node; sptr_node++;
  315. *sptr_near = neg_inf; sptr_near++;
  316. }
  317. }
  318. } while(valid_bits);
  319. }
  320. template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
  321. void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
  322. Accel::Intersectors* __restrict__ This,
  323. RayHitK<K>& __restrict__ ray,
  324. RayQueryContext* context)
  325. {
  326. BVH* __restrict__ bvh = (BVH*)This->ptr;
  327. /* filter out invalid rays */
  328. vbool<K> valid = *valid_i == -1;
  329. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  330. valid &= ray.valid();
  331. #endif
  332. /* return if there are no valid rays */
  333. size_t valid_bits = movemask(valid);
  334. if (unlikely(valid_bits == 0)) return;
  335. /* verify correct input */
  336. assert(all(valid, ray.valid()));
  337. assert(all(valid, ray.tnear() >= 0.0f));
  338. assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
  339. Precalculations pre(valid, ray);
  340. /* load ray */
  341. TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
  342. const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
  343. const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
  344. vint<K> octant = ray.octant();
  345. octant = select(valid, octant, vint<K>(0xffffffff));
  346. do
  347. {
  348. const size_t valid_index = bsf(valid_bits);
  349. const vbool<K> octant_valid = octant[valid_index] == octant;
  350. valid_bits &= ~(size_t)movemask(octant_valid);
  351. tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
  352. tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
  353. Frustum<robust> frustum;
  354. frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
  355. StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
  356. StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
  357. stack[0].ptr = bvh->root;
  358. stack[0].dist = neg_inf;
  359. while (1) pop:
  360. {
  361. /* pop next node from stack */
  362. if (unlikely(stackPtr == stack)) break;
  363. stackPtr--;
  364. NodeRef cur = NodeRef(stackPtr->ptr);
  365. /* cull node if behind closest hit point */
  366. vfloat<K> curDist = *(float*)&stackPtr->dist;
  367. const vbool<K> active = curDist < tray.tfar;
  368. if (unlikely(none(active))) continue;
  369. while (likely(!cur.isLeaf()))
  370. {
  371. /* process nodes */
  372. //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
  373. const NodeRef nodeRef = cur;
  374. const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
  375. vfloat<N> fmin;
  376. size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
  377. if (unlikely(!m_frustum_node)) goto pop;
  378. cur = BVH::emptyNode;
  379. curDist = pos_inf;
  380. #if defined(__AVX__)
  381. //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
  382. #endif
  383. size_t num_child_hits = 0;
  384. do {
  385. const size_t i = bscf(m_frustum_node);
  386. vfloat<K> lnearP;
  387. vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
  388. STAT3(normal.trav_nodes, 1, 1, 1);
  389. BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
  390. if (likely(any(lhit)))
  391. {
  392. const vfloat<K> childDist = fmin[i];
  393. const NodeRef child = node->child(i);
  394. BVHN<N>::prefetch(child);
  395. if (any(childDist < curDist))
  396. {
  397. if (likely(cur != BVH::emptyNode)) {
  398. num_child_hits++;
  399. stackPtr->ptr = cur;
  400. *(float*)&stackPtr->dist = toScalar(curDist);
  401. stackPtr++;
  402. }
  403. curDist = childDist;
  404. cur = child;
  405. }
  406. /* push hit child onto stack */
  407. else {
  408. num_child_hits++;
  409. stackPtr->ptr = child;
  410. *(float*)&stackPtr->dist = toScalar(childDist);
  411. stackPtr++;
  412. }
  413. }
  414. } while(m_frustum_node);
  415. if (unlikely(cur == BVH::emptyNode)) goto pop;
  416. /* improved distance sorting for 3 or more hits */
  417. if (unlikely(num_child_hits >= 2))
  418. {
  419. if (stackPtr[-2].dist < stackPtr[-1].dist)
  420. std::swap(stackPtr[-2],stackPtr[-1]);
  421. if (unlikely(num_child_hits >= 3))
  422. {
  423. if (stackPtr[-3].dist < stackPtr[-1].dist)
  424. std::swap(stackPtr[-3],stackPtr[-1]);
  425. if (stackPtr[-3].dist < stackPtr[-2].dist)
  426. std::swap(stackPtr[-3],stackPtr[-2]);
  427. }
  428. }
  429. }
  430. /* intersect leaf */
  431. assert(cur != BVH::invalidNode);
  432. assert(cur != BVH::emptyNode);
  433. const vbool<K> valid_leaf = tray.tfar > curDist;
  434. STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
  435. if (unlikely(none(valid_leaf))) continue;
  436. size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
  437. size_t lazy_node = 0;
  438. PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
  439. /* reduce max distance interval on successful intersection */
  440. if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
  441. {
  442. tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
  443. frustum.template updateMaxDist<K>(tray.tfar);
  444. }
  445. if (unlikely(lazy_node)) {
  446. stackPtr->ptr = lazy_node;
  447. stackPtr->dist = neg_inf;
  448. stackPtr++;
  449. }
  450. }
  451. } while(valid_bits);
  452. }
  453. // ===================================================================================================================================================================
  454. // ===================================================================================================================================================================
  455. // ===================================================================================================================================================================
  456. template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
  457. bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
  458. const BVH* bvh,
  459. NodeRef root,
  460. size_t k,
  461. Precalculations& pre,
  462. RayK<K>& ray,
  463. const TravRayK<K, robust>& tray,
  464. RayQueryContext* context)
  465. {
  466. /* stack state */
  467. NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
  468. NodeRef* stackPtr = stack+1; // current stack pointer
  469. NodeRef* stackEnd = stack+stackSizeSingle;
  470. stack[0] = root;
  471. /* load the ray into SIMD registers */
  472. TravRay<N,robust> tray1;
  473. tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
  474. /* pop loop */
  475. while (true) pop:
  476. {
  477. /* pop next node */
  478. if (unlikely(stackPtr == stack)) break;
  479. stackPtr--;
  480. NodeRef cur = (NodeRef)*stackPtr;
  481. /* downtraversal loop */
  482. while (true)
  483. {
  484. /* intersect node */
  485. size_t mask; vfloat<N> tNear;
  486. STAT3(shadow.trav_nodes, 1, 1, 1);
  487. bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
  488. if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
  489. /* if no child is hit, pop next node */
  490. if (unlikely(mask == 0))
  491. goto pop;
  492. /* select next child and push other children */
  493. BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
  494. }
  495. /* this is a leaf node */
  496. assert(cur != BVH::emptyNode);
  497. STAT3(shadow.trav_leaves, 1, 1, 1);
  498. size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
  499. size_t lazy_node = 0;
  500. if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
  501. ray.tfar[k] = neg_inf;
  502. return true;
  503. }
  504. if (unlikely(lazy_node)) {
  505. *stackPtr = lazy_node;
  506. stackPtr++;
  507. }
  508. }
  509. return false;
  510. }
  511. template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
  512. void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
  513. Accel::Intersectors* __restrict__ This,
  514. RayK<K>& __restrict__ ray,
  515. RayQueryContext* context)
  516. {
  517. BVH* __restrict__ bvh = (BVH*)This->ptr;
  518. /* we may traverse an empty BVH in case all geometry was invalid */
  519. if (bvh->root == BVH::emptyNode)
  520. return;
  521. #if ENABLE_FAST_COHERENT_CODEPATHS == 1
  522. assert(context);
  523. if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
  524. {
  525. occludedCoherent(valid_i, This, ray, context);
  526. return;
  527. }
  528. #endif
  529. /* filter out already occluded and invalid rays */
  530. vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
  531. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  532. valid &= ray.valid();
  533. #endif
  534. /* return if there are no valid rays */
  535. const size_t valid_bits = movemask(valid);
  536. if (unlikely(valid_bits == 0)) return;
  537. /* verify correct input */
  538. assert(all(valid, ray.valid()));
  539. assert(all(valid, ray.tnear() >= 0.0f));
  540. assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
  541. Precalculations pre(valid, ray);
  542. /* load ray */
  543. TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
  544. const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
  545. const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
  546. tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
  547. tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
  548. vbool<K> terminated = !valid;
  549. const vfloat<K> inf = vfloat<K>(pos_inf);
  550. /* determine switch threshold based on flags */
  551. const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
  552. /* allocate stack and push root node */
  553. vfloat<K> stack_near[stackSizeChunk];
  554. NodeRef stack_node[stackSizeChunk];
  555. stack_node[0] = BVH::invalidNode;
  556. stack_near[0] = inf;
  557. stack_node[1] = bvh->root;
  558. stack_near[1] = tray.tnear;
  559. NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
  560. NodeRef* __restrict__ sptr_node = stack_node + 2;
  561. vfloat<K>* __restrict__ sptr_near = stack_near + 2;
  562. while (1) pop:
  563. {
  564. /* pop next node from stack */
  565. assert(sptr_node > stack_node);
  566. sptr_node--;
  567. sptr_near--;
  568. NodeRef cur = *sptr_node;
  569. if (unlikely(cur == BVH::invalidNode)) {
  570. assert(sptr_node == stack_node);
  571. break;
  572. }
  573. /* cull node if behind closest hit point */
  574. vfloat<K> curDist = *sptr_near;
  575. const vbool<K> active = curDist < tray.tfar;
  576. if (unlikely(none(active)))
  577. continue;
  578. /* switch to single ray traversal */
  579. #if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
  580. #if FORCE_SINGLE_MODE == 0
  581. if (single)
  582. #endif
  583. {
  584. size_t bits = movemask(active);
  585. #if FORCE_SINGLE_MODE == 0
  586. if (unlikely(popcnt(bits) <= switchThreshold))
  587. #endif
  588. {
  589. for (; bits!=0; ) {
  590. const size_t i = bscf(bits);
  591. if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
  592. set(terminated, i);
  593. }
  594. if (all(terminated)) break;
  595. tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
  596. continue;
  597. }
  598. }
  599. #endif
  600. while (likely(!cur.isLeaf()))
  601. {
  602. /* process nodes */
  603. const vbool<K> valid_node = tray.tfar > curDist;
  604. STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
  605. const NodeRef nodeRef = cur;
  606. const BaseNode* __restrict__ const node = nodeRef.baseNode();
  607. /* set cur to invalid */
  608. cur = BVH::emptyNode;
  609. curDist = pos_inf;
  610. for (unsigned i = 0; i < N; i++)
  611. {
  612. const NodeRef child = node->children[i];
  613. if (unlikely(child == BVH::emptyNode)) break;
  614. vfloat<K> lnearP;
  615. vbool<K> lhit = valid_node;
  616. BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
  617. /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
  618. if (likely(any(lhit)))
  619. {
  620. assert(sptr_node < stackEnd);
  621. assert(child != BVH::emptyNode);
  622. const vfloat<K> childDist = select(lhit, lnearP, inf);
  623. /* push 'cur' node onto stack and continue with hit child */
  624. if (likely(cur != BVH::emptyNode)) {
  625. *sptr_node = cur; sptr_node++;
  626. *sptr_near = curDist; sptr_near++;
  627. }
  628. curDist = childDist;
  629. cur = child;
  630. }
  631. }
  632. if (unlikely(cur == BVH::emptyNode))
  633. goto pop;
  634. #if SWITCH_DURING_DOWN_TRAVERSAL == 1
  635. if (single)
  636. {
  637. // seems to be the best place for testing utilization
  638. if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
  639. {
  640. *sptr_node++ = cur;
  641. *sptr_near++ = curDist;
  642. goto pop;
  643. }
  644. }
  645. #endif
  646. }
  647. /* return if stack is empty */
  648. if (unlikely(cur == BVH::invalidNode)) {
  649. assert(sptr_node == stack_node);
  650. break;
  651. }
  652. /* intersect leaf */
  653. assert(cur != BVH::emptyNode);
  654. const vbool<K> valid_leaf = tray.tfar > curDist;
  655. STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
  656. if (unlikely(none(valid_leaf))) continue;
  657. size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
  658. size_t lazy_node = 0;
  659. terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
  660. if (all(terminated)) break;
  661. tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
  662. if (unlikely(lazy_node)) {
  663. *sptr_node = lazy_node; sptr_node++;
  664. *sptr_near = neg_inf; sptr_near++;
  665. }
  666. }
  667. vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
  668. }
  669. template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
  670. void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
  671. Accel::Intersectors* __restrict__ This,
  672. RayK<K>& __restrict__ ray,
  673. RayQueryContext* context)
  674. {
  675. BVH* __restrict__ bvh = (BVH*)This->ptr;
  676. /* filter out invalid rays */
  677. vbool<K> valid = *valid_i == -1;
  678. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  679. valid &= ray.valid();
  680. #endif
  681. /* return if there are no valid rays */
  682. size_t valid_bits = movemask(valid);
  683. if (unlikely(valid_bits == 0)) return;
  684. /* verify correct input */
  685. assert(all(valid, ray.valid()));
  686. assert(all(valid, ray.tnear() >= 0.0f));
  687. assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
  688. Precalculations pre(valid,ray);
  689. /* load ray */
  690. TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
  691. const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
  692. const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
  693. vbool<K> terminated = !valid;
  694. vint<K> octant = ray.octant();
  695. octant = select(valid, octant, vint<K>(0xffffffff));
  696. do
  697. {
  698. const size_t valid_index = bsf(valid_bits);
  699. vbool<K> octant_valid = octant[valid_index] == octant;
  700. valid_bits &= ~(size_t)movemask(octant_valid);
  701. tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
  702. tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
  703. Frustum<robust> frustum;
  704. frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
  705. StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
  706. StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
  707. stack[0].ptr = bvh->root;
  708. stack[0].mask = movemask(octant_valid);
  709. while (1) pop:
  710. {
  711. /* pop next node from stack */
  712. if (unlikely(stackPtr == stack)) break;
  713. stackPtr--;
  714. NodeRef cur = NodeRef(stackPtr->ptr);
  715. /* cull node of active rays have already been terminated */
  716. size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
  717. if (unlikely(m_active == 0)) continue;
  718. while (likely(!cur.isLeaf()))
  719. {
  720. /* process nodes */
  721. //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
  722. const NodeRef nodeRef = cur;
  723. const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
  724. vfloat<N> fmin;
  725. size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
  726. if (unlikely(!m_frustum_node)) goto pop;
  727. cur = BVH::emptyNode;
  728. m_active = 0;
  729. #if defined(__AVX__)
  730. //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
  731. #endif
  732. //size_t num_child_hits = 0;
  733. do {
  734. const size_t i = bscf(m_frustum_node);
  735. vfloat<K> lnearP;
  736. vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
  737. STAT3(normal.trav_nodes, 1, 1, 1);
  738. BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
  739. if (likely(any(lhit)))
  740. {
  741. const NodeRef child = node->child(i);
  742. assert(child != BVH::emptyNode);
  743. BVHN<N>::prefetch(child);
  744. if (likely(cur != BVH::emptyNode)) {
  745. //num_child_hits++;
  746. stackPtr->ptr = cur;
  747. stackPtr->mask = m_active;
  748. stackPtr++;
  749. }
  750. cur = child;
  751. m_active = movemask(lhit);
  752. }
  753. } while(m_frustum_node);
  754. if (unlikely(cur == BVH::emptyNode)) goto pop;
  755. }
  756. /* intersect leaf */
  757. assert(cur != BVH::invalidNode);
  758. assert(cur != BVH::emptyNode);
  759. #if defined(__AVX__)
  760. STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
  761. #endif
  762. if (unlikely(!m_active)) continue;
  763. size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
  764. size_t lazy_node = 0;
  765. terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
  766. octant_valid &= !terminated;
  767. if (unlikely(none(octant_valid))) break;
  768. tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
  769. if (unlikely(lazy_node)) {
  770. stackPtr->ptr = lazy_node;
  771. stackPtr->mask = movemask(octant_valid);
  772. stackPtr++;
  773. }
  774. }
  775. } while(valid_bits);
  776. vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
  777. }
  778. }
  779. }