123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918 |
- // Copyright 2009-2021 Intel Corporation
- // SPDX-License-Identifier: Apache-2.0
- #include "bvh_intersector_hybrid.h"
- #include "bvh_traverser1.h"
- #include "node_intersector1.h"
- #include "node_intersector_packet.h"
- #include "../geometry/intersector_iterators.h"
- #include "../geometry/triangle_intersector.h"
- #include "../geometry/trianglev_intersector.h"
- #include "../geometry/trianglev_mb_intersector.h"
- #include "../geometry/trianglei_intersector.h"
- #include "../geometry/quadv_intersector.h"
- #include "../geometry/quadi_intersector.h"
- #include "../geometry/curveNv_intersector.h"
- #include "../geometry/curveNi_intersector.h"
- #include "../geometry/curveNi_mb_intersector.h"
- #include "../geometry/linei_intersector.h"
- #include "../geometry/subdivpatch1_intersector.h"
- #include "../geometry/object_intersector.h"
- #include "../geometry/instance_intersector.h"
- #include "../geometry/instance_array_intersector.h"
- #include "../geometry/subgrid_intersector.h"
- #include "../geometry/subgrid_mb_intersector.h"
- #include "../geometry/curve_intersector_virtual.h"
- #define SWITCH_DURING_DOWN_TRAVERSAL 1
- #define FORCE_SINGLE_MODE 0
- #define ENABLE_FAST_COHERENT_CODEPATHS 1
- namespace embree
- {
- namespace isa
- {
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
- void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
- const BVH* bvh,
- NodeRef root,
- size_t k,
- Precalculations& pre,
- RayHitK<K>& ray,
- const TravRayK<K, robust>& tray,
- RayQueryContext* context)
- {
- /* stack state */
- StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
- StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
- StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
- stack[0].ptr = root;
- stack[0].dist = neg_inf;
- /* load the ray into SIMD registers */
- TravRay<N,robust> tray1;
- tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
- /* if popped node is too far, pop next one */
- if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
- continue;
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<N> tNear;
- STAT3(normal.trav_nodes, 1, 1, 1);
- bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
- if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
- /* select next child and push other children */
- BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
- }
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(normal.trav_leaves, 1, 1, 1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
- tray1.tfar = ray.tfar[k];
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->dist = neg_inf;
- stackPtr++;
- }
- }
- }
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
- void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
- Accel::Intersectors* __restrict__ This,
- RayHitK<K>& __restrict__ ray,
- RayQueryContext* __restrict__ context)
- {
- BVH* __restrict__ bvh = (BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return;
-
- #if ENABLE_FAST_COHERENT_CODEPATHS == 1
- assert(context);
- if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
- {
- intersectCoherent(valid_i, This, ray, context);
- return;
- }
- #endif
- /* filter out invalid rays */
- vbool<K> valid = *valid_i == -1;
- #if defined(EMBREE_IGNORE_INVALID_RAYS)
- valid &= ray.valid();
- #endif
- /* return if there are no valid rays */
- size_t valid_bits = movemask(valid);
- #if defined(__AVX__)
- STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
- #endif
- if (unlikely(valid_bits == 0)) return;
- /* verify correct input */
- assert(all(valid, ray.valid()));
- assert(all(valid, ray.tnear() >= 0.0f));
- assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
- Precalculations pre(valid, ray);
- /* load ray */
- TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
- const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
- const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
- if (single)
- {
- tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
- tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
-
- for (; valid_bits!=0; ) {
- const size_t i = bscf(valid_bits);
- intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
- }
- return;
- }
- /* determine switch threshold based on flags */
- const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
- vint<K> octant = ray.octant();
- octant = select(valid, octant, vint<K>(0xffffffff));
- /* test whether we have ray with opposing direction signs in the packet */
- bool split = false;
- {
- size_t bits = valid_bits;
- vbool<K> vsplit( false );
- do
- {
- const size_t valid_index = bsf(bits);
- vbool<K> octant_valid = octant[valid_index] == octant;
- bits &= ~(size_t)movemask(octant_valid);
- vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
- } while (bits);
- if (any(vsplit)) split = true;
- }
- do
- {
- const size_t valid_index = bsf(valid_bits);
- const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
- const vint<K> count_diff_octant = \
- ((diff_octant >> 2) & 1) +
- ((diff_octant >> 1) & 1) +
- ((diff_octant >> 0) & 1);
- vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
- if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
- octant = select(octant_valid,vint<K>(0xffffffff),octant);
- valid_bits &= ~(size_t)movemask(octant_valid);
- tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
- tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
- /* allocate stack and push root node */
- vfloat<K> stack_near[stackSizeChunk];
- NodeRef stack_node[stackSizeChunk];
- stack_node[0] = BVH::invalidNode;
- stack_near[0] = inf;
- stack_node[1] = bvh->root;
- stack_near[1] = tray.tnear;
- NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
- NodeRef* __restrict__ sptr_node = stack_node + 2;
- vfloat<K>* __restrict__ sptr_near = stack_near + 2;
- while (1) pop:
- {
- /* pop next node from stack */
- assert(sptr_node > stack_node);
- sptr_node--;
- sptr_near--;
- NodeRef cur = *sptr_node;
- if (unlikely(cur == BVH::invalidNode)) {
- assert(sptr_node == stack_node);
- break;
- }
- /* cull node if behind closest hit point */
- vfloat<K> curDist = *sptr_near;
- const vbool<K> active = curDist < tray.tfar;
- if (unlikely(none(active)))
- continue;
- /* switch to single ray traversal */
- #if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
- #if FORCE_SINGLE_MODE == 0
- if (single)
- #endif
- {
- size_t bits = movemask(active);
- #if FORCE_SINGLE_MODE == 0
- if (unlikely(popcnt(bits) <= switchThreshold))
- #endif
- {
- for (; bits!=0; ) {
- const size_t i = bscf(bits);
- intersect1(This, bvh, cur, i, pre, ray, tray, context);
- }
- tray.tfar = min(tray.tfar, ray.tfar);
- continue;
- }
- }
- #endif
- while (likely(!cur.isLeaf()))
- {
- /* process nodes */
- const vbool<K> valid_node = tray.tfar > curDist;
- STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
- const NodeRef nodeRef = cur;
- const BaseNode* __restrict__ const node = nodeRef.baseNode();
- /* set cur to invalid */
- cur = BVH::emptyNode;
- curDist = pos_inf;
- size_t num_child_hits = 0;
- for (unsigned i = 0; i < N; i++)
- {
- const NodeRef child = node->children[i];
- if (unlikely(child == BVH::emptyNode)) break;
- vfloat<K> lnearP;
- vbool<K> lhit = valid_node;
- BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
- /* if we hit the child we choose to continue with that child if it
- is closer than the current next child, or we push it onto the stack */
- if (likely(any(lhit)))
- {
- assert(sptr_node < stackEnd);
- assert(child != BVH::emptyNode);
- const vfloat<K> childDist = select(lhit, lnearP, inf);
- /* push cur node onto stack and continue with hit child */
- if (any(childDist < curDist))
- {
- if (likely(cur != BVH::emptyNode)) {
- num_child_hits++;
- *sptr_node = cur; sptr_node++;
- *sptr_near = curDist; sptr_near++;
- }
- curDist = childDist;
- cur = child;
- }
- /* push hit child onto stack */
- else {
- num_child_hits++;
- *sptr_node = child; sptr_node++;
- *sptr_near = childDist; sptr_near++;
- }
- }
- }
- #if defined(__AVX__)
- //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
- #endif
- if (unlikely(cur == BVH::emptyNode))
- goto pop;
-
- /* improved distance sorting for 3 or more hits */
- if (unlikely(num_child_hits >= 2))
- {
- if (any(sptr_near[-2] < sptr_near[-1]))
- {
- std::swap(sptr_near[-2],sptr_near[-1]);
- std::swap(sptr_node[-2],sptr_node[-1]);
- }
- if (unlikely(num_child_hits >= 3))
- {
- if (any(sptr_near[-3] < sptr_near[-1]))
- {
- std::swap(sptr_near[-3],sptr_near[-1]);
- std::swap(sptr_node[-3],sptr_node[-1]);
- }
- if (any(sptr_near[-3] < sptr_near[-2]))
- {
- std::swap(sptr_near[-3],sptr_near[-2]);
- std::swap(sptr_node[-3],sptr_node[-2]);
- }
- }
- }
- #if SWITCH_DURING_DOWN_TRAVERSAL == 1
- if (single)
- {
- // seems to be the best place for testing utilization
- if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
- {
- *sptr_node++ = cur;
- *sptr_near++ = curDist;
- goto pop;
- }
- }
- #endif
- }
- /* return if stack is empty */
- if (unlikely(cur == BVH::invalidNode)) {
- assert(sptr_node == stack_node);
- break;
- }
- /* intersect leaf */
- assert(cur != BVH::emptyNode);
- const vbool<K> valid_leaf = tray.tfar > curDist;
- STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
- if (unlikely(none(valid_leaf))) continue;
- size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
- size_t lazy_node = 0;
- PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
- tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
- if (unlikely(lazy_node)) {
- *sptr_node = lazy_node; sptr_node++;
- *sptr_near = neg_inf; sptr_near++;
- }
- }
- } while(valid_bits);
- }
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
- void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
- Accel::Intersectors* __restrict__ This,
- RayHitK<K>& __restrict__ ray,
- RayQueryContext* context)
- {
- BVH* __restrict__ bvh = (BVH*)This->ptr;
-
- /* filter out invalid rays */
- vbool<K> valid = *valid_i == -1;
- #if defined(EMBREE_IGNORE_INVALID_RAYS)
- valid &= ray.valid();
- #endif
- /* return if there are no valid rays */
- size_t valid_bits = movemask(valid);
- if (unlikely(valid_bits == 0)) return;
- /* verify correct input */
- assert(all(valid, ray.valid()));
- assert(all(valid, ray.tnear() >= 0.0f));
- assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
- Precalculations pre(valid, ray);
- /* load ray */
- TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
- const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
- const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
- vint<K> octant = ray.octant();
- octant = select(valid, octant, vint<K>(0xffffffff));
- do
- {
- const size_t valid_index = bsf(valid_bits);
- const vbool<K> octant_valid = octant[valid_index] == octant;
- valid_bits &= ~(size_t)movemask(octant_valid);
- tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
- tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
- Frustum<robust> frustum;
- frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
- StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
- StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
- stack[0].ptr = bvh->root;
- stack[0].dist = neg_inf;
- while (1) pop:
- {
- /* pop next node from stack */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
- /* cull node if behind closest hit point */
- vfloat<K> curDist = *(float*)&stackPtr->dist;
- const vbool<K> active = curDist < tray.tfar;
- if (unlikely(none(active))) continue;
- while (likely(!cur.isLeaf()))
- {
- /* process nodes */
- //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
- const NodeRef nodeRef = cur;
- const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
- vfloat<N> fmin;
- size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
- if (unlikely(!m_frustum_node)) goto pop;
- cur = BVH::emptyNode;
- curDist = pos_inf;
-
- #if defined(__AVX__)
- //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
- #endif
- size_t num_child_hits = 0;
- do {
- const size_t i = bscf(m_frustum_node);
- vfloat<K> lnearP;
- vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
- STAT3(normal.trav_nodes, 1, 1, 1);
- BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
- if (likely(any(lhit)))
- {
- const vfloat<K> childDist = fmin[i];
- const NodeRef child = node->child(i);
- BVHN<N>::prefetch(child);
- if (any(childDist < curDist))
- {
- if (likely(cur != BVH::emptyNode)) {
- num_child_hits++;
- stackPtr->ptr = cur;
- *(float*)&stackPtr->dist = toScalar(curDist);
- stackPtr++;
- }
- curDist = childDist;
- cur = child;
- }
- /* push hit child onto stack */
- else {
- num_child_hits++;
- stackPtr->ptr = child;
- *(float*)&stackPtr->dist = toScalar(childDist);
- stackPtr++;
- }
- }
- } while(m_frustum_node);
- if (unlikely(cur == BVH::emptyNode)) goto pop;
- /* improved distance sorting for 3 or more hits */
- if (unlikely(num_child_hits >= 2))
- {
- if (stackPtr[-2].dist < stackPtr[-1].dist)
- std::swap(stackPtr[-2],stackPtr[-1]);
- if (unlikely(num_child_hits >= 3))
- {
- if (stackPtr[-3].dist < stackPtr[-1].dist)
- std::swap(stackPtr[-3],stackPtr[-1]);
- if (stackPtr[-3].dist < stackPtr[-2].dist)
- std::swap(stackPtr[-3],stackPtr[-2]);
- }
- }
- }
- /* intersect leaf */
- assert(cur != BVH::invalidNode);
- assert(cur != BVH::emptyNode);
- const vbool<K> valid_leaf = tray.tfar > curDist;
- STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
- if (unlikely(none(valid_leaf))) continue;
- size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
- size_t lazy_node = 0;
- PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
- /* reduce max distance interval on successful intersection */
- if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
- {
- tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
- frustum.template updateMaxDist<K>(tray.tfar);
- }
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->dist = neg_inf;
- stackPtr++;
- }
- }
-
- } while(valid_bits);
- }
- // ===================================================================================================================================================================
- // ===================================================================================================================================================================
- // ===================================================================================================================================================================
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
- bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
- const BVH* bvh,
- NodeRef root,
- size_t k,
- Precalculations& pre,
- RayK<K>& ray,
- const TravRayK<K, robust>& tray,
- RayQueryContext* context)
- {
- /* stack state */
- NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
- NodeRef* stackPtr = stack+1; // current stack pointer
- NodeRef* stackEnd = stack+stackSizeSingle;
- stack[0] = root;
- /* load the ray into SIMD registers */
- TravRay<N,robust> tray1;
- tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
- /* pop loop */
- while (true) pop:
- {
- /* pop next node */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = (NodeRef)*stackPtr;
- /* downtraversal loop */
- while (true)
- {
- /* intersect node */
- size_t mask; vfloat<N> tNear;
- STAT3(shadow.trav_nodes, 1, 1, 1);
- bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
- if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
- /* if no child is hit, pop next node */
- if (unlikely(mask == 0))
- goto pop;
- /* select next child and push other children */
- BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
- }
- /* this is a leaf node */
- assert(cur != BVH::emptyNode);
- STAT3(shadow.trav_leaves, 1, 1, 1);
- size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
- size_t lazy_node = 0;
- if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
- ray.tfar[k] = neg_inf;
- return true;
- }
- if (unlikely(lazy_node)) {
- *stackPtr = lazy_node;
- stackPtr++;
- }
- }
- return false;
- }
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
- void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
- Accel::Intersectors* __restrict__ This,
- RayK<K>& __restrict__ ray,
- RayQueryContext* context)
- {
- BVH* __restrict__ bvh = (BVH*)This->ptr;
-
- /* we may traverse an empty BVH in case all geometry was invalid */
- if (bvh->root == BVH::emptyNode)
- return;
-
- #if ENABLE_FAST_COHERENT_CODEPATHS == 1
- assert(context);
- if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
- {
- occludedCoherent(valid_i, This, ray, context);
- return;
- }
- #endif
- /* filter out already occluded and invalid rays */
- vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
- #if defined(EMBREE_IGNORE_INVALID_RAYS)
- valid &= ray.valid();
- #endif
- /* return if there are no valid rays */
- const size_t valid_bits = movemask(valid);
- if (unlikely(valid_bits == 0)) return;
- /* verify correct input */
- assert(all(valid, ray.valid()));
- assert(all(valid, ray.tnear() >= 0.0f));
- assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
- Precalculations pre(valid, ray);
- /* load ray */
- TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
- const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
- const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
- tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
- tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
- vbool<K> terminated = !valid;
- const vfloat<K> inf = vfloat<K>(pos_inf);
- /* determine switch threshold based on flags */
- const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
- /* allocate stack and push root node */
- vfloat<K> stack_near[stackSizeChunk];
- NodeRef stack_node[stackSizeChunk];
- stack_node[0] = BVH::invalidNode;
- stack_near[0] = inf;
- stack_node[1] = bvh->root;
- stack_near[1] = tray.tnear;
- NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
- NodeRef* __restrict__ sptr_node = stack_node + 2;
- vfloat<K>* __restrict__ sptr_near = stack_near + 2;
- while (1) pop:
- {
- /* pop next node from stack */
- assert(sptr_node > stack_node);
- sptr_node--;
- sptr_near--;
- NodeRef cur = *sptr_node;
- if (unlikely(cur == BVH::invalidNode)) {
- assert(sptr_node == stack_node);
- break;
- }
- /* cull node if behind closest hit point */
- vfloat<K> curDist = *sptr_near;
- const vbool<K> active = curDist < tray.tfar;
- if (unlikely(none(active)))
- continue;
- /* switch to single ray traversal */
- #if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
- #if FORCE_SINGLE_MODE == 0
- if (single)
- #endif
- {
- size_t bits = movemask(active);
- #if FORCE_SINGLE_MODE == 0
- if (unlikely(popcnt(bits) <= switchThreshold))
- #endif
- {
- for (; bits!=0; ) {
- const size_t i = bscf(bits);
- if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
- set(terminated, i);
- }
- if (all(terminated)) break;
- tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
- continue;
- }
- }
- #endif
- while (likely(!cur.isLeaf()))
- {
- /* process nodes */
- const vbool<K> valid_node = tray.tfar > curDist;
- STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
- const NodeRef nodeRef = cur;
- const BaseNode* __restrict__ const node = nodeRef.baseNode();
- /* set cur to invalid */
- cur = BVH::emptyNode;
- curDist = pos_inf;
- for (unsigned i = 0; i < N; i++)
- {
- const NodeRef child = node->children[i];
- if (unlikely(child == BVH::emptyNode)) break;
- vfloat<K> lnearP;
- vbool<K> lhit = valid_node;
- BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
- /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
- if (likely(any(lhit)))
- {
- assert(sptr_node < stackEnd);
- assert(child != BVH::emptyNode);
- const vfloat<K> childDist = select(lhit, lnearP, inf);
- /* push 'cur' node onto stack and continue with hit child */
- if (likely(cur != BVH::emptyNode)) {
- *sptr_node = cur; sptr_node++;
- *sptr_near = curDist; sptr_near++;
- }
- curDist = childDist;
- cur = child;
- }
- }
- if (unlikely(cur == BVH::emptyNode))
- goto pop;
- #if SWITCH_DURING_DOWN_TRAVERSAL == 1
- if (single)
- {
- // seems to be the best place for testing utilization
- if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
- {
- *sptr_node++ = cur;
- *sptr_near++ = curDist;
- goto pop;
- }
- }
- #endif
- }
- /* return if stack is empty */
- if (unlikely(cur == BVH::invalidNode)) {
- assert(sptr_node == stack_node);
- break;
- }
- /* intersect leaf */
- assert(cur != BVH::emptyNode);
- const vbool<K> valid_leaf = tray.tfar > curDist;
- STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
- if (unlikely(none(valid_leaf))) continue;
- size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
- size_t lazy_node = 0;
- terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
- if (all(terminated)) break;
- tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
- if (unlikely(lazy_node)) {
- *sptr_node = lazy_node; sptr_node++;
- *sptr_near = neg_inf; sptr_near++;
- }
- }
- vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
- }
- template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
- void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
- Accel::Intersectors* __restrict__ This,
- RayK<K>& __restrict__ ray,
- RayQueryContext* context)
- {
- BVH* __restrict__ bvh = (BVH*)This->ptr;
-
- /* filter out invalid rays */
- vbool<K> valid = *valid_i == -1;
- #if defined(EMBREE_IGNORE_INVALID_RAYS)
- valid &= ray.valid();
- #endif
- /* return if there are no valid rays */
- size_t valid_bits = movemask(valid);
- if (unlikely(valid_bits == 0)) return;
- /* verify correct input */
- assert(all(valid, ray.valid()));
- assert(all(valid, ray.tnear() >= 0.0f));
- assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
- Precalculations pre(valid,ray);
- /* load ray */
- TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
- const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
- const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
- vbool<K> terminated = !valid;
- vint<K> octant = ray.octant();
- octant = select(valid, octant, vint<K>(0xffffffff));
- do
- {
- const size_t valid_index = bsf(valid_bits);
- vbool<K> octant_valid = octant[valid_index] == octant;
- valid_bits &= ~(size_t)movemask(octant_valid);
- tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
- tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
- Frustum<robust> frustum;
- frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
- StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
- StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
- stack[0].ptr = bvh->root;
- stack[0].mask = movemask(octant_valid);
- while (1) pop:
- {
- /* pop next node from stack */
- if (unlikely(stackPtr == stack)) break;
- stackPtr--;
- NodeRef cur = NodeRef(stackPtr->ptr);
- /* cull node of active rays have already been terminated */
- size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
- if (unlikely(m_active == 0)) continue;
- while (likely(!cur.isLeaf()))
- {
- /* process nodes */
- //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
- const NodeRef nodeRef = cur;
- const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
- vfloat<N> fmin;
- size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
- if (unlikely(!m_frustum_node)) goto pop;
- cur = BVH::emptyNode;
- m_active = 0;
- #if defined(__AVX__)
- //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
- #endif
- //size_t num_child_hits = 0;
- do {
- const size_t i = bscf(m_frustum_node);
- vfloat<K> lnearP;
- vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
- STAT3(normal.trav_nodes, 1, 1, 1);
- BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
- if (likely(any(lhit)))
- {
- const NodeRef child = node->child(i);
- assert(child != BVH::emptyNode);
- BVHN<N>::prefetch(child);
- if (likely(cur != BVH::emptyNode)) {
- //num_child_hits++;
- stackPtr->ptr = cur;
- stackPtr->mask = m_active;
- stackPtr++;
- }
- cur = child;
- m_active = movemask(lhit);
- }
- } while(m_frustum_node);
- if (unlikely(cur == BVH::emptyNode)) goto pop;
- }
- /* intersect leaf */
- assert(cur != BVH::invalidNode);
- assert(cur != BVH::emptyNode);
- #if defined(__AVX__)
- STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
- #endif
- if (unlikely(!m_active)) continue;
- size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
- size_t lazy_node = 0;
- terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
- octant_valid &= !terminated;
- if (unlikely(none(octant_valid))) break;
- tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
- if (unlikely(lazy_node)) {
- stackPtr->ptr = lazy_node;
- stackPtr->mask = movemask(octant_valid);
- stackPtr++;
- }
- }
- } while(valid_bits);
- vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
- }
- }
- }
|