bvh_builder_msmblur.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #define MBLUR_NUM_TEMPORAL_BINS 2
  5. #define MBLUR_NUM_OBJECT_BINS 32
  6. #include "../bvh/bvh.h"
  7. #include "../common/primref_mb.h"
  8. #include "heuristic_binning_array_aligned.h"
  9. #include "heuristic_timesplit_array.h"
  10. namespace embree
  11. {
  12. namespace isa
  13. {
  14. template<typename T>
  15. struct SharedVector
  16. {
  17. __forceinline SharedVector() {}
  18. __forceinline SharedVector(T* ptr, size_t refCount = 1)
  19. : prims(ptr), refCount(refCount) {}
  20. __forceinline void incRef() {
  21. refCount++;
  22. }
  23. __forceinline void decRef()
  24. {
  25. if (--refCount == 0)
  26. delete prims;
  27. }
  28. T* prims;
  29. size_t refCount;
  30. };
  31. template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
  32. struct LocalChildListT
  33. {
  34. typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
  35. __forceinline LocalChildListT (const BuildRecord& record)
  36. : numChildren(1), numSharedPrimVecs(1)
  37. {
  38. /* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
  39. children[0] = record;
  40. primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
  41. }
  42. __forceinline ~LocalChildListT()
  43. {
  44. for (size_t i = 0; i < numChildren; i++)
  45. primvecs[i]->decRef();
  46. }
  47. __forceinline BuildRecord& operator[] ( const size_t i ) {
  48. return children[i];
  49. }
  50. __forceinline size_t size() const {
  51. return numChildren;
  52. }
  53. __forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
  54. {
  55. SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
  56. if (lrecord.prims.prims == bsharedPrimVec->prims) {
  57. primvecs[bestChild] = bsharedPrimVec;
  58. bsharedPrimVec->incRef();
  59. }
  60. else {
  61. primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
  62. }
  63. if (rrecord.prims.prims == bsharedPrimVec->prims) {
  64. primvecs[numChildren] = bsharedPrimVec;
  65. bsharedPrimVec->incRef();
  66. }
  67. else {
  68. primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
  69. }
  70. bsharedPrimVec->decRef();
  71. new_vector.release();
  72. children[bestChild] = lrecord;
  73. children[numChildren] = rrecord;
  74. numChildren++;
  75. }
  76. public:
  77. array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
  78. array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
  79. size_t numChildren;
  80. array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
  81. size_t numSharedPrimVecs;
  82. };
  83. template<typename Mesh>
  84. struct RecalculatePrimRef
  85. {
  86. Scene* scene;
  87. __forceinline RecalculatePrimRef (Scene* scene)
  88. : scene(scene) {}
  89. __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
  90. {
  91. const unsigned geomID = prim.geomID();
  92. const unsigned primID = prim.primID();
  93. const Mesh* mesh = scene->get<Mesh>(geomID);
  94. const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
  95. const range<int> tbounds = mesh->timeSegmentRange(time_range);
  96. return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
  97. }
  98. // __noinline is workaround for ICC16 bug under MacOSX
  99. __noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
  100. {
  101. const unsigned geomID = prim.geomID();
  102. const unsigned primID = prim.primID();
  103. const Mesh* mesh = scene->get<Mesh>(geomID);
  104. const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
  105. const range<int> tbounds = mesh->timeSegmentRange(time_range);
  106. return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
  107. }
  108. __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
  109. return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
  110. }
  111. // __noinline is workaround for ICC16 bug under MacOSX
  112. __noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
  113. return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
  114. }
  115. };
  116. struct VirtualRecalculatePrimRef
  117. {
  118. Scene* scene;
  119. __forceinline VirtualRecalculatePrimRef (Scene* scene)
  120. : scene(scene) {}
  121. __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
  122. {
  123. const unsigned geomID = prim.geomID();
  124. const unsigned primID = prim.primID();
  125. const Geometry* mesh = scene->get(geomID);
  126. const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
  127. const range<int> tbounds = mesh->timeSegmentRange(time_range);
  128. return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
  129. }
  130. __forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
  131. {
  132. const unsigned geomID = prim.geomID();
  133. const unsigned primID = prim.primID();
  134. const Geometry* mesh = scene->get(geomID);
  135. const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
  136. const range<int> tbounds = mesh->timeSegmentRange(time_range);
  137. return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
  138. }
  139. __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
  140. return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
  141. }
  142. __forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
  143. return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
  144. }
  145. };
  146. struct BVHBuilderMSMBlur
  147. {
  148. /*! settings for msmblur builder */
  149. struct Settings
  150. {
  151. /*! default settings */
  152. Settings ()
  153. : branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
  154. travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
  155. singleThreadThreshold(1024) {}
  156. Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
  157. : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
  158. travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
  159. {
  160. minLeafSize = min(minLeafSize,maxLeafSize);
  161. }
  162. public:
  163. size_t branchingFactor; //!< branching factor of BVH to build
  164. size_t maxDepth; //!< maximum depth of BVH to build
  165. size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
  166. size_t minLeafSize; //!< minimum size of a leaf
  167. size_t maxLeafSize; //!< maximum size of a leaf
  168. float travCost; //!< estimated cost of one traversal step
  169. float intCost; //!< estimated cost of one primitive intersection
  170. bool singleLeafTimeSegment; //!< split time to single time range
  171. size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
  172. };
  173. struct BuildRecord
  174. {
  175. public:
  176. __forceinline BuildRecord () {}
  177. __forceinline BuildRecord (size_t depth)
  178. : depth(depth) {}
  179. __forceinline BuildRecord (const SetMB& prims, size_t depth)
  180. : depth(depth), prims(prims) {}
  181. __forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
  182. return a.prims.size() < b.prims.size();
  183. }
  184. __forceinline size_t size() const {
  185. return prims.size();
  186. }
  187. public:
  188. size_t depth; //!< Depth of the root of this subtree.
  189. SetMB prims; //!< The list of primitives.
  190. };
  191. struct BuildRecordSplit : public BuildRecord
  192. {
  193. __forceinline BuildRecordSplit () {}
  194. __forceinline BuildRecordSplit (size_t depth)
  195. : BuildRecord(depth) {}
  196. __forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
  197. : BuildRecord(record), split(split) {}
  198. BinSplit<MBLUR_NUM_OBJECT_BINS> split;
  199. };
  200. template<
  201. typename NodeRef,
  202. typename RecalculatePrimRef,
  203. typename Allocator,
  204. typename CreateAllocFunc,
  205. typename CreateNodeFunc,
  206. typename SetNodeFunc,
  207. typename CreateLeafFunc,
  208. typename ProgressMonitor>
  209. class BuilderT
  210. {
  211. ALIGNED_CLASS_(16);
  212. static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
  213. static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
  214. typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
  215. typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
  216. typedef mvector<PrimRefMB>* PrimRefVector;
  217. typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
  218. typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
  219. typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
  220. public:
  221. BuilderT (MemoryMonitorInterface* device,
  222. const RecalculatePrimRef recalculatePrimRef,
  223. const CreateAllocFunc createAlloc,
  224. const CreateNodeFunc createNode,
  225. const SetNodeFunc setNode,
  226. const CreateLeafFunc createLeaf,
  227. const ProgressMonitor progressMonitor,
  228. const Settings& settings)
  229. : cfg(settings),
  230. heuristicObjectSplit(),
  231. heuristicTemporalSplit(device, recalculatePrimRef),
  232. recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
  233. progressMonitor(progressMonitor)
  234. {
  235. if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
  236. throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
  237. }
  238. /*! finds the best split */
  239. const Split find(const SetMB& set)
  240. {
  241. /* first try standard object split */
  242. const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
  243. const float object_split_sah = object_split.splitSAH();
  244. /* test temporal splits only when object split was bad */
  245. const float leaf_sah = set.leafSAH(cfg.logBlockSize);
  246. if (object_split_sah < 0.50f*leaf_sah)
  247. return object_split;
  248. /* do temporal splits only if the time range is big enough */
  249. if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
  250. {
  251. const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
  252. const float temporal_split_sah = temporal_split.splitSAH();
  253. /* take temporal split if it improved SAH */
  254. if (temporal_split_sah < object_split_sah)
  255. return temporal_split;
  256. }
  257. return object_split;
  258. }
  259. /*! array partitioning */
  260. __forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
  261. {
  262. /* perform object split */
  263. if (likely(split.data == Split::SPLIT_OBJECT)) {
  264. heuristicObjectSplit.split(split,set,lset,rset);
  265. }
  266. /* perform temporal split */
  267. else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
  268. return heuristicTemporalSplit.split(split,set,lset,rset);
  269. }
  270. /* perform fallback split */
  271. else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
  272. set.deterministic_order();
  273. splitFallback(set,lset,rset);
  274. }
  275. /* split by geometry */
  276. else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
  277. set.deterministic_order();
  278. splitByGeometry(set,lset,rset);
  279. }
  280. else
  281. assert(false);
  282. return std::unique_ptr<mvector<PrimRefMB>>();
  283. }
  284. /*! finds the best fallback split */
  285. __noinline Split findFallback(const SetMB& set)
  286. {
  287. /* split if primitives are not from same geometry */
  288. if (!sameGeometry(set))
  289. return Split(0.0f,Split::SPLIT_GEOMID);
  290. /* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
  291. if (cfg.singleLeafTimeSegment)
  292. {
  293. /* test if one primitive has more than one time segment in time range, if so split time */
  294. for (size_t i=set.begin(); i<set.end(); i++)
  295. {
  296. const PrimRefMB& prim = (*set.prims)[i];
  297. const range<int> itime_range = prim.timeSegmentRange(set.time_range);
  298. const int localTimeSegments = itime_range.size();
  299. assert(localTimeSegments > 0);
  300. if (localTimeSegments > 1) {
  301. const int icenter = (itime_range.begin() + itime_range.end())/2;
  302. const float splitTime = prim.timeStep(icenter);
  303. return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
  304. }
  305. }
  306. }
  307. /* otherwise return fallback split */
  308. return Split(0.0f,Split::SPLIT_FALLBACK);
  309. }
  310. /*! performs fallback split */
  311. void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
  312. {
  313. mvector<PrimRefMB>& prims = *set.prims;
  314. const size_t begin = set.begin();
  315. const size_t end = set.end();
  316. const size_t center = (begin + end)/2;
  317. PrimInfoMB linfo = empty;
  318. for (size_t i=begin; i<center; i++)
  319. linfo.add_primref(prims[i]);
  320. PrimInfoMB rinfo = empty;
  321. for (size_t i=center; i<end; i++)
  322. rinfo.add_primref(prims[i]);
  323. new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
  324. new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
  325. }
  326. /*! checks if all primitives are from the same geometry */
  327. __forceinline bool sameGeometry(const SetMB& set)
  328. {
  329. if (set.size() == 0) return true;
  330. mvector<PrimRefMB>& prims = *set.prims;
  331. const size_t begin = set.begin();
  332. const size_t end = set.end();
  333. unsigned int firstGeomID = prims[begin].geomID();
  334. for (size_t i=begin+1; i<end; i++) {
  335. if (prims[i].geomID() != firstGeomID){
  336. return false;
  337. }
  338. }
  339. return true;
  340. }
  341. /* split by geometry ID */
  342. void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
  343. {
  344. assert(set.size() > 1);
  345. mvector<PrimRefMB>& prims = *set.prims;
  346. const size_t begin = set.begin();
  347. const size_t end = set.end();
  348. PrimInfoMB left(empty);
  349. PrimInfoMB right(empty);
  350. unsigned int geomID = prims[begin].geomID();
  351. size_t center = serial_partitioning(prims.data(),begin,end,left,right,
  352. [&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
  353. [ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
  354. new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
  355. new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
  356. }
  357. const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
  358. {
  359. /* this should never occur but is a fatal error */
  360. if (in.depth > cfg.maxDepth)
  361. throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
  362. /* replace already found split by fallback split */
  363. const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
  364. /* special case when directly creating leaf without any splits that could shrink time_range */
  365. bool force_split = false;
  366. if (current.depth == 1 && current.size() > 0)
  367. {
  368. BBox1f c = empty;
  369. BBox1f p = current.prims.time_range;
  370. for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
  371. mvector<PrimRefMB>& prims = *current.prims.prims;
  372. c.extend(prims[i].time_range);
  373. }
  374. force_split = c.lower > p.lower || c.upper < p.upper;
  375. }
  376. /* create leaf for few primitives */
  377. if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
  378. return createLeaf(current,alloc);
  379. /* fill all children by always splitting the largest one */
  380. bool hasTimeSplits = false;
  381. NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
  382. LocalChildListSplit children(current);
  383. do {
  384. /* find best child with largest bounding box area */
  385. size_t bestChild = -1;
  386. size_t bestSize = 0;
  387. for (size_t i=0; i<children.size(); i++)
  388. {
  389. /* ignore leaves as they cannot get split */
  390. if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
  391. continue;
  392. force_split = false;
  393. /* remember child with largest size */
  394. if (children[i].size() > bestSize) {
  395. bestSize = children[i].size();
  396. bestChild = i;
  397. }
  398. }
  399. if (bestChild == -1) break;
  400. /* perform best found split */
  401. BuildRecordSplit& brecord = children[bestChild];
  402. BuildRecordSplit lrecord(current.depth+1);
  403. BuildRecordSplit rrecord(current.depth+1);
  404. std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
  405. hasTimeSplits |= new_vector != nullptr;
  406. /* find new splits */
  407. lrecord.split = findFallback(lrecord.prims);
  408. rrecord.split = findFallback(rrecord.prims);
  409. children.split(bestChild,lrecord,rrecord,std::move(new_vector));
  410. } while (children.size() < cfg.branchingFactor);
  411. /* detect time_ranges that have shrunken */
  412. for (size_t i=0; i<children.size(); i++) {
  413. const BBox1f c = children[i].prims.time_range;
  414. const BBox1f p = in.prims.time_range;
  415. hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
  416. }
  417. /* create node */
  418. auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
  419. /* recurse into each child and perform reduction */
  420. LBBox3fa gbounds = empty;
  421. for (size_t i=0; i<children.size(); i++) {
  422. values[i] = createLargeLeaf(children[i],alloc);
  423. gbounds.extend(values[i].lbounds);
  424. }
  425. setNode(current,children.children.data(),node,values,children.numChildren);
  426. /* calculate geometry bounds of this node */
  427. if (hasTimeSplits)
  428. return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
  429. else
  430. return NodeRecordMB4D(node,gbounds,current.prims.time_range);
  431. }
  432. const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
  433. {
  434. /* get thread local allocator */
  435. if (!alloc)
  436. alloc = createAlloc();
  437. /* call memory monitor function to signal progress */
  438. if (toplevel && current.size() <= cfg.singleThreadThreshold)
  439. progressMonitor(current.size());
  440. /*! find best split */
  441. const Split csplit = find(current.prims);
  442. /*! compute leaf and split cost */
  443. const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
  444. const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
  445. assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
  446. /*! create a leaf node when threshold reached or SAH tells us to stop */
  447. if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
  448. current.prims.deterministic_order();
  449. return createLargeLeaf(current,alloc);
  450. }
  451. /*! perform initial split */
  452. SetMB lprims,rprims;
  453. std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
  454. bool hasTimeSplits = new_vector != nullptr;
  455. NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
  456. LocalChildList children(current);
  457. {
  458. BuildRecord lrecord(lprims,current.depth+1);
  459. BuildRecord rrecord(rprims,current.depth+1);
  460. children.split(0,lrecord,rrecord,std::move(new_vector));
  461. }
  462. /*! split until node is full or SAH tells us to stop */
  463. while (children.size() < cfg.branchingFactor)
  464. {
  465. /*! find best child to split */
  466. float bestArea = neg_inf;
  467. ssize_t bestChild = -1;
  468. for (size_t i=0; i<children.size(); i++)
  469. {
  470. if (children[i].size() <= cfg.minLeafSize) continue;
  471. if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
  472. bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
  473. }
  474. }
  475. if (bestChild == -1) break;
  476. /* perform split */
  477. BuildRecord& brecord = children[bestChild];
  478. BuildRecord lrecord(current.depth+1);
  479. BuildRecord rrecord(current.depth+1);
  480. Split csplit = find(brecord.prims);
  481. std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
  482. hasTimeSplits |= new_vector != nullptr;
  483. children.split(bestChild,lrecord,rrecord,std::move(new_vector));
  484. }
  485. /* detect time_ranges that have shrunken */
  486. for (size_t i=0; i<children.size(); i++) {
  487. const BBox1f c = children[i].prims.time_range;
  488. const BBox1f p = current.prims.time_range;
  489. hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
  490. }
  491. /* sort buildrecords for simpler shadow ray traversal */
  492. //std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
  493. /*! create an inner node */
  494. auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
  495. LBBox3fa gbounds = empty;
  496. /* spawn tasks */
  497. if (unlikely(current.size() > cfg.singleThreadThreshold))
  498. {
  499. /*! parallel_for is faster than spawing sub-tasks */
  500. parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
  501. for (size_t i=r.begin(); i<r.end(); i++) {
  502. values[i] = recurse(children[i],nullptr,true);
  503. _mm_mfence(); // to allow non-temporal stores during build
  504. }
  505. });
  506. /*! merge bounding boxes */
  507. for (size_t i=0; i<children.size(); i++)
  508. gbounds.extend(values[i].lbounds);
  509. }
  510. /* recurse into each child */
  511. else
  512. {
  513. //for (size_t i=0; i<children.size(); i++)
  514. for (ssize_t i=children.size()-1; i>=0; i--) {
  515. values[i] = recurse(children[i],alloc,false);
  516. gbounds.extend(values[i].lbounds);
  517. }
  518. }
  519. setNode(current,children.children.data(),node,values,children.numChildren);
  520. /* calculate geometry bounds of this node */
  521. if (unlikely(hasTimeSplits))
  522. return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
  523. else
  524. return NodeRecordMB4D(node,gbounds,current.prims.time_range);
  525. }
  526. /*! builder entry function */
  527. __forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
  528. {
  529. const SetMB set(pinfo,&prims);
  530. auto ret = recurse(BuildRecord(set,1),nullptr,true);
  531. _mm_mfence(); // to allow non-temporal stores during build
  532. return ret;
  533. }
  534. private:
  535. Settings cfg;
  536. HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
  537. HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
  538. const RecalculatePrimRef recalculatePrimRef;
  539. const CreateAllocFunc createAlloc;
  540. const CreateNodeFunc createNode;
  541. const SetNodeFunc setNode;
  542. const CreateLeafFunc createLeaf;
  543. const ProgressMonitor progressMonitor;
  544. };
  545. template<typename NodeRef,
  546. typename RecalculatePrimRef,
  547. typename CreateAllocFunc,
  548. typename CreateNodeFunc,
  549. typename SetNodeFunc,
  550. typename CreateLeafFunc,
  551. typename ProgressMonitorFunc>
  552. static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
  553. const PrimInfoMB& pinfo,
  554. MemoryMonitorInterface* device,
  555. const RecalculatePrimRef recalculatePrimRef,
  556. const CreateAllocFunc createAlloc,
  557. const CreateNodeFunc createNode,
  558. const SetNodeFunc setNode,
  559. const CreateLeafFunc createLeaf,
  560. const ProgressMonitorFunc progressMonitor,
  561. const Settings& settings)
  562. {
  563. typedef BuilderT<
  564. NodeRef,
  565. RecalculatePrimRef,
  566. decltype(createAlloc()),
  567. CreateAllocFunc,
  568. CreateNodeFunc,
  569. SetNodeFunc,
  570. CreateLeafFunc,
  571. ProgressMonitorFunc> Builder;
  572. Builder builder(device,
  573. recalculatePrimRef,
  574. createAlloc,
  575. createNode,
  576. setNode,
  577. createLeaf,
  578. progressMonitor,
  579. settings);
  580. return builder(prims,pinfo);
  581. }
  582. };
  583. }
  584. }