heuristic_spatial_array.h 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "heuristic_binning.h"
  18. #include "heuristic_spatial.h"
  19. namespace embree
  20. {
  21. namespace isa
  22. {
  23. #if 0
  24. #define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f
  25. #define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f
  26. #define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f
  27. #else
  28. #define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f
  29. #define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f
  30. #define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f
  31. #endif
  32. struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>
  33. {
  34. __forceinline PrimInfoExtRange() {
  35. }
  36. __forceinline PrimInfoExtRange(EmptyTy)
  37. : CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}
  38. __forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const BBox3fa& geomBounds, const BBox3fa& centBounds)
  39. : CentGeomBBox3fa(geomBounds,centBounds), extended_range<size_t>(begin,end,ext_end) {}
  40. __forceinline float leafSAH() const {
  41. return expectedApproxHalfArea(geomBounds)*float(size());
  42. }
  43. __forceinline float leafSAH(size_t block_shift) const {
  44. return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
  45. }
  46. };
  47. template<typename ObjectSplit, typename SpatialSplit>
  48. struct Split2
  49. {
  50. __forceinline Split2 () {}
  51. __forceinline Split2 (const Split2& other)
  52. {
  53. spatial = other.spatial;
  54. sah = other.sah;
  55. if (spatial) spatialSplit() = other.spatialSplit();
  56. else objectSplit() = other.objectSplit();
  57. }
  58. __forceinline Split2& operator= (const Split2& other)
  59. {
  60. spatial = other.spatial;
  61. sah = other.sah;
  62. if (spatial) spatialSplit() = other.spatialSplit();
  63. else objectSplit() = other.objectSplit();
  64. return *this;
  65. }
  66. __forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }
  67. __forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }
  68. __forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }
  69. __forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }
  70. __forceinline Split2 (const ObjectSplit& objectSplit, float sah)
  71. : spatial(false), sah(sah)
  72. {
  73. new (data) ObjectSplit(objectSplit);
  74. }
  75. __forceinline Split2 (const SpatialSplit& spatialSplit, float sah)
  76. : spatial(true), sah(sah)
  77. {
  78. new (data) SpatialSplit(spatialSplit);
  79. }
  80. __forceinline float splitSAH() const {
  81. return sah;
  82. }
  83. __forceinline bool valid() const {
  84. return sah < float(inf);
  85. }
  86. public:
  87. __aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];
  88. bool spatial;
  89. float sah;
  90. };
  91. /*! Performs standard object binning */
  92. #if defined(__AVX512F__)
  93. template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS = 16, size_t SPATIAL_BINS = 16>
  94. #else
  95. template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS = 32, size_t SPATIAL_BINS = 16>
  96. #endif
  97. struct HeuristicArraySpatialSAH
  98. {
  99. typedef BinSplit<OBJECT_BINS> ObjectSplit;
  100. typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;
  101. typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;
  102. typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;
  103. //typedef extended_range<size_t> Set;
  104. typedef Split2<ObjectSplit,SpatialSplit> Split;
  105. #if defined(__AVX512ER__) // KNL
  106. static const size_t PARALLEL_THRESHOLD = 3*1024;
  107. static const size_t PARALLEL_FIND_BLOCK_SIZE = 768;
  108. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  109. #else
  110. static const size_t PARALLEL_THRESHOLD = 3*1024;
  111. static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
  112. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  113. #endif
  114. static const size_t MOVE_STEP_SIZE = 64;
  115. static const size_t CREATE_SPLITS_STEP_SIZE = 64;
  116. __forceinline HeuristicArraySpatialSAH ()
  117. : prims0(nullptr) {}
  118. /*! remember prim array */
  119. __forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)
  120. : prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}
  121. /*! compute extended ranges */
  122. __noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
  123. {
  124. assert(set.ext_range_size() > 0);
  125. const float left_factor = (float)lweight / (lweight + rweight);
  126. const size_t ext_range_size = set.ext_range_size();
  127. const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
  128. const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
  129. lset.set_ext_range(lset.end() + left_ext_range_size);
  130. rset.set_ext_range(rset.end() + right_ext_range_size);
  131. }
  132. /*! move ranges */
  133. __noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  134. {
  135. const size_t left_ext_range_size = lset.ext_range_size();
  136. const size_t right_size = rset.size();
  137. /* has the left child an extended range? */
  138. if (left_ext_range_size > 0)
  139. {
  140. /* left extended range smaller than right range ? */
  141. if (left_ext_range_size < right_size)
  142. {
  143. /* only move a small part of the beginning of the right range to the end */
  144. parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
  145. for (size_t i=r.begin(); i<r.end(); i++)
  146. prims0[i+right_size] = prims0[i];
  147. });
  148. }
  149. else
  150. {
  151. /* no overlap, move entire right range to new location, can be made fully parallel */
  152. parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
  153. for (size_t i=r.begin(); i<r.end(); i++)
  154. prims0[i+left_ext_range_size] = prims0[i];
  155. });
  156. }
  157. /* update right range */
  158. assert(rset.ext_end() + left_ext_range_size == set.ext_end());
  159. rset.move_right(left_ext_range_size);
  160. }
  161. }
  162. /*! finds the best split */
  163. const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)
  164. {
  165. SplitInfo oinfo;
  166. const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);
  167. const float object_split_sah = object_split.splitSAH();
  168. if (unlikely(set.has_ext_range()))
  169. {
  170. const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);
  171. /* do only spatial splits if the child bounds overlap */
  172. if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&
  173. safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))
  174. {
  175. const SpatialSplit spatial_split = spatial_find(set, logBlockSize);
  176. const float spatial_split_sah = spatial_split.splitSAH();
  177. /* valid spatial split, better SAH and number of splits do not exceed extended range */
  178. if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&
  179. spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())
  180. {
  181. return Split(spatial_split,spatial_split_sah);
  182. }
  183. }
  184. }
  185. return Split(object_split,object_split_sah);
  186. }
  187. /*! finds the best object split */
  188. __forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
  189. {
  190. if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);
  191. else return parallel_object_find (set,logBlockSize,info);
  192. }
  193. /*! finds the best object split */
  194. __noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
  195. {
  196. ObjectBinner binner(empty);
  197. const BinMapping<OBJECT_BINS> mapping(set);
  198. binner.bin(prims0,set.begin(),set.end(),mapping);
  199. ObjectSplit s = binner.best(mapping,logBlockSize);
  200. binner.getSplitInfo(mapping, s, info);
  201. return s;
  202. }
  203. /*! finds the best split */
  204. __noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)
  205. {
  206. ObjectBinner binner(empty);
  207. const BinMapping<OBJECT_BINS> mapping(set);
  208. const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
  209. binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
  210. [&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },
  211. [&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });
  212. ObjectSplit s = binner.best(mapping,logBlockSize);
  213. binner.getSplitInfo(mapping, s, info);
  214. return s;
  215. }
  216. /*! finds the best spatial split */
  217. __forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
  218. {
  219. if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);
  220. else return parallel_spatial_find (set, logBlockSize);
  221. }
  222. /*! finds the best spatial split */
  223. __noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
  224. {
  225. SpatialBinner binner(empty);
  226. const SpatialBinMapping<SPATIAL_BINS> mapping(set);
  227. binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
  228. /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
  229. return binner.best(mapping,logBlockSize); //,set.ext_size());
  230. }
  231. __noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)
  232. {
  233. SpatialBinner binner(empty);
  234. const SpatialBinMapping<SPATIAL_BINS> mapping(set);
  235. const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
  236. binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,
  237. [&] (const range<size_t>& r) -> SpatialBinner {
  238. SpatialBinner binner(empty);
  239. binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
  240. return binner; },
  241. [&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
  242. /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
  243. return binner.best(mapping,logBlockSize); //,set.ext_size());
  244. }
  245. /*! subdivides primitives based on a spatial split */
  246. __noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)
  247. {
  248. assert(set.has_ext_range());
  249. const size_t max_ext_range_size = set.ext_range_size();
  250. const size_t ext_range_start = set.end();
  251. /* atomic counter for number of primref splits */
  252. std::atomic<size_t> ext_elements;
  253. ext_elements.store(0);
  254. const float fpos = split.mapping.pos(split.pos,split.dim);
  255. parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {
  256. for (size_t i=r.begin();i<r.end();i++)
  257. {
  258. const unsigned int splits = prims0[i].geomID() >> 24;
  259. if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
  260. //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
  261. //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
  262. //if (unlikely(bin0 < split.pos && bin1 >= split.pos))
  263. if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
  264. {
  265. assert(splits > 1);
  266. PrimRef left,right;
  267. const auto splitter = splitterFactory(prims0[i]);
  268. splitter(prims0[i],split.dim,fpos,left,right);
  269. // no empty splits
  270. if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;
  271. left.lower.a = (left.lower.a & 0x00FFFFFF) | (splits << 24);
  272. right.lower.a = (right.lower.a & 0x00FFFFFF) | (splits << 24);
  273. const size_t ID = ext_elements.fetch_add(1);
  274. /* break if the number of subdivided elements are greater than the maximal allowed size */
  275. if (unlikely(ID >= max_ext_range_size))
  276. break;
  277. /* only write within the correct bounds */
  278. assert(ID < max_ext_range_size);
  279. prims0[i] = left;
  280. prims0[ext_range_start+ID] = right;
  281. }
  282. }
  283. });
  284. const size_t numExtElements = min(max_ext_range_size,ext_elements.load());
  285. assert(set.end()+numExtElements<=set.ext_end());
  286. set._end += numExtElements;
  287. }
  288. /*! array partitioning */
  289. void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  290. {
  291. PrimInfoExtRange set = set_i;
  292. /* valid split */
  293. if (unlikely(!split.valid())) {
  294. deterministic_order(set);
  295. return splitFallback(set,lset,rset);
  296. }
  297. std::pair<size_t,size_t> ext_weights(0,0);
  298. if (unlikely(split.spatial))
  299. {
  300. create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);
  301. /* spatial split */
  302. if (likely(set.size() < PARALLEL_THRESHOLD))
  303. ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);
  304. else
  305. ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);
  306. }
  307. else
  308. {
  309. /* object split */
  310. if (likely(set.size() < PARALLEL_THRESHOLD))
  311. ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);
  312. else
  313. ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);
  314. }
  315. /* if we have an extended range, set extended child ranges and move right split range */
  316. if (unlikely(set.has_ext_range()))
  317. {
  318. setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
  319. moveExtentedRange(set,lset,rset);
  320. }
  321. }
  322. /*! array partitioning */
  323. std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  324. {
  325. const size_t begin = set.begin();
  326. const size_t end = set.end();
  327. PrimInfo local_left(empty);
  328. PrimInfo local_right(empty);
  329. const unsigned int splitPos = split.pos;
  330. const unsigned int splitDim = split.dim;
  331. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  332. #if defined(__AVX512F__)
  333. const vint16 vSplitPos(splitPos);
  334. const vbool16 vSplitMask( splitDimMask );
  335. #else
  336. const vint4 vSplitPos(splitPos);
  337. const vbool4 vSplitMask( (int)splitDimMask );
  338. #endif
  339. size_t center = serial_partitioning(prims0,
  340. begin,end,local_left,local_right,
  341. [&] (const PrimRef& ref) {
  342. return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);
  343. },
  344. [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add(ref.bounds(),ref.lower.a >> 24); });
  345. const size_t left_weight = local_left.end;
  346. const size_t right_weight = local_right.end;
  347. new (&lset) PrimInfoExtRange(begin,center,center,local_left.geomBounds,local_left.centBounds);
  348. new (&rset) PrimInfoExtRange(center,end,end,local_right.geomBounds,local_right.centBounds);
  349. assert(area(lset.geomBounds) >= 0.0f);
  350. assert(area(rset.geomBounds) >= 0.0f);
  351. return std::pair<size_t,size_t>(left_weight,right_weight);
  352. }
  353. /*! array partitioning */
  354. __noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  355. {
  356. const size_t begin = set.begin();
  357. const size_t end = set.end();
  358. PrimInfo local_left(empty);
  359. PrimInfo local_right(empty);
  360. const unsigned int splitPos = split.pos;
  361. const unsigned int splitDim = split.dim;
  362. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  363. /* init spatial mapping */
  364. const SpatialBinMapping<SPATIAL_BINS> mapping = split.mapping;
  365. const vint4 vSplitPos(splitPos);
  366. const vbool4 vSplitMask( (int)splitDimMask );
  367. size_t center = serial_partitioning(prims0,
  368. begin,end,local_left,local_right,
  369. [&] (const PrimRef& ref) {
  370. const Vec3fa c = ref.bounds().center();
  371. return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
  372. },
  373. [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add(ref.bounds(),ref.lower.a >> 24); });
  374. const size_t left_weight = local_left.end;
  375. const size_t right_weight = local_right.end;
  376. new (&lset) PrimInfoExtRange(begin,center,center,local_left.geomBounds,local_left.centBounds);
  377. new (&rset) PrimInfoExtRange(center,end,end,local_right.geomBounds,local_right.centBounds);
  378. assert(area(lset.geomBounds) >= 0.0f);
  379. assert(area(rset.geomBounds) >= 0.0f);
  380. return std::pair<size_t,size_t>(left_weight,right_weight);
  381. }
  382. /*! array partitioning */
  383. __noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  384. {
  385. const size_t begin = set.begin();
  386. const size_t end = set.end();
  387. PrimInfo left; left.reset();
  388. PrimInfo right; right.reset();
  389. const unsigned int splitPos = split.pos;
  390. const unsigned int splitDim = split.dim;
  391. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  392. #if defined(__AVX512F__)
  393. const vint16 vSplitPos(splitPos);
  394. const vbool16 vSplitMask( (int)splitDimMask );
  395. #else
  396. const vint4 vSplitPos(splitPos);
  397. const vbool4 vSplitMask( (int)splitDimMask );
  398. #endif
  399. auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
  400. const size_t center = parallel_partitioning(
  401. prims0,begin,end,EmptyTy(),left,right,isLeft,
  402. [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add(ref.bounds(),ref.lower.a >> 24); },
  403. [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
  404. PARALLEL_PARTITION_BLOCK_SIZE);
  405. const size_t left_weight = left.end;
  406. const size_t right_weight = right.end;
  407. left.begin = begin; left.end = center;
  408. right.begin = center; right.end = end;
  409. new (&lset) PrimInfoExtRange(begin,center,center,left.geomBounds,left.centBounds);
  410. new (&rset) PrimInfoExtRange(center,end,end,right.geomBounds,right.centBounds);
  411. assert(area(left.geomBounds) >= 0.0f);
  412. assert(area(right.geomBounds) >= 0.0f);
  413. return std::pair<size_t,size_t>(left_weight,right_weight);
  414. }
  415. /*! array partitioning */
  416. __noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
  417. {
  418. const size_t begin = set.begin();
  419. const size_t end = set.end();
  420. PrimInfo left; left.reset();
  421. PrimInfo right; right.reset();
  422. const unsigned int splitPos = split.pos;
  423. const unsigned int splitDim = split.dim;
  424. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  425. /* init spatial mapping */
  426. const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;
  427. const vint4 vSplitPos(splitPos);
  428. const vbool4 vSplitMask( (int)splitDimMask );
  429. auto isLeft = [&] (const PrimRef &ref) {
  430. const Vec3fa c = ref.bounds().center();
  431. return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };
  432. const size_t center = parallel_partitioning(
  433. prims0,begin,end,EmptyTy(),left,right,isLeft,
  434. [] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add(ref.bounds(),ref.lower.a >> 24); },
  435. [] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },
  436. PARALLEL_PARTITION_BLOCK_SIZE);
  437. const size_t left_weight = left.end;
  438. const size_t right_weight = right.end;
  439. left.begin = begin; left.end = center;
  440. right.begin = center; right.end = end;
  441. new (&lset) PrimInfoExtRange(begin,center,center,left.geomBounds,left.centBounds);
  442. new (&rset) PrimInfoExtRange(center,end,end,right.geomBounds,right.centBounds);
  443. assert(area(left.geomBounds) >= 0.0f);
  444. assert(area(right.geomBounds) >= 0.0f);
  445. return std::pair<size_t,size_t>(left_weight,right_weight);
  446. }
  447. void deterministic_order(const PrimInfoExtRange& set)
  448. {
  449. /* required as parallel partition destroys original primitive order */
  450. std::sort(&prims0[set.begin()],&prims0[set.end()]);
  451. }
  452. void splitFallback(const PrimInfoExtRange& set,
  453. PrimInfoExtRange& lset,
  454. PrimInfoExtRange& rset)
  455. {
  456. const size_t begin = set.begin();
  457. const size_t end = set.end();
  458. const size_t center = (begin + end)/2;
  459. PrimInfo left(empty);
  460. for (size_t i=begin; i<center; i++) {
  461. left.add(prims0[i].bounds(),prims0[i].lower.a >> 24);
  462. }
  463. const size_t lweight = left.end;
  464. PrimInfo right(empty);
  465. for (size_t i=center; i<end; i++) {
  466. right.add(prims0[i].bounds(),prims0[i].lower.a >> 24);
  467. }
  468. const size_t rweight = right.end;
  469. new (&lset) PrimInfoExtRange(begin,center,center,left.geomBounds,left.centBounds);
  470. new (&rset) PrimInfoExtRange(center,end,end,right.geomBounds,right.centBounds);
  471. /* if we have an extended range */
  472. if (set.has_ext_range()) {
  473. setExtentedRanges(set,lset,rset,lweight,rweight);
  474. moveExtentedRange(set,lset,rset);
  475. }
  476. }
  477. private:
  478. PrimRef* const prims0;
  479. const PrimitiveSplitterFactory& splitterFactory;
  480. const CentGeomBBox3fa& root_info;
  481. };
  482. }
  483. }