heuristic_binning_array_aligned.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "heuristic_binning.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
  10. {
  11. __forceinline PrimInfoRange () {
  12. }
  13. __forceinline PrimInfoRange(const PrimInfo& pinfo)
  14. : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
  15. __forceinline PrimInfoRange(EmptyTy)
  16. : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
  17. __forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
  18. : CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
  19. __forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)
  20. : CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}
  21. __forceinline float leafSAH() const {
  22. return expectedApproxHalfArea(geomBounds)*float(size());
  23. }
  24. __forceinline float leafSAH(size_t block_shift) const {
  25. return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
  26. }
  27. __forceinline range<size_t> get_range() const {
  28. return range<size_t>(begin(),end());
  29. }
  30. template<typename PrimRef>
  31. __forceinline void add_primref(const PrimRef& prim)
  32. {
  33. CentGeomBBox3fa::extend_primref(prim);
  34. _end++;
  35. }
  36. };
  37. inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
  38. {
  39. const size_t begin = pinfo.begin();
  40. const size_t end = pinfo.end();
  41. const size_t center = (begin + end)/2;
  42. CentGeomBBox3fa left(empty);
  43. for (size_t i=begin; i<center; i++)
  44. left.extend_center2(prims[i]);
  45. new (&linfo) PrimInfoRange(begin,center,left);
  46. CentGeomBBox3fa right(empty);
  47. for (size_t i=center; i<end; i++)
  48. right.extend_center2(prims[i]);
  49. new (&rinfo) PrimInfoRange(center,end,right);
  50. }
  51. template<typename Type, typename getTypeFunc>
  52. inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
  53. {
  54. CentGeomBBox3fa local_left(empty), local_right(empty);
  55. auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };
  56. const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);
  57. linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);
  58. rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);
  59. }
  60. /*! Performs standard object binning */
  61. template<typename PrimRef, size_t BINS>
  62. struct HeuristicArrayBinningSAH
  63. {
  64. typedef BinSplit<BINS> Split;
  65. typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
  66. typedef range<size_t> Set;
  67. static const size_t PARALLEL_THRESHOLD = 3 * 1024;
  68. static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
  69. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  70. __forceinline HeuristicArrayBinningSAH ()
  71. : prims(nullptr) {}
  72. /*! remember prim array */
  73. __forceinline HeuristicArrayBinningSAH (PrimRef* prims)
  74. : prims(prims) {}
  75. /*! finds the best split */
  76. __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
  77. {
  78. if (likely(pinfo.size() < PARALLEL_THRESHOLD))
  79. return find_template<false>(pinfo,logBlockSize);
  80. else
  81. return find_template<true>(pinfo,logBlockSize);
  82. }
  83. template<bool parallel>
  84. __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
  85. {
  86. Binner binner(empty);
  87. const BinMapping<BINS> mapping(pinfo);
  88. bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
  89. return binner.best(mapping,logBlockSize);
  90. }
  91. /*! finds the best split */
  92. __noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)
  93. {
  94. if (likely(pinfo.size() < PARALLEL_THRESHOLD))
  95. return find_block_size_template<false>(pinfo,blockSize);
  96. else
  97. return find_block_size_template<true>(pinfo,blockSize);
  98. }
  99. template<bool parallel>
  100. __forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)
  101. {
  102. Binner binner(empty);
  103. const BinMapping<BINS> mapping(pinfo);
  104. bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
  105. return binner.best_block_size(mapping,blockSize);
  106. }
  107. /*! array partitioning */
  108. __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
  109. {
  110. if (likely(pinfo.size() < PARALLEL_THRESHOLD))
  111. split_template<false>(split,pinfo,linfo,rinfo);
  112. else
  113. split_template<true>(split,pinfo,linfo,rinfo);
  114. }
  115. template<bool parallel>
  116. __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
  117. {
  118. if (!split.valid()) {
  119. deterministic_order(set);
  120. return splitFallback(set,lset,rset);
  121. }
  122. const size_t begin = set.begin();
  123. const size_t end = set.end();
  124. CentGeomBBox3fa local_left(empty);
  125. CentGeomBBox3fa local_right(empty);
  126. const unsigned int splitPos = split.pos;
  127. const unsigned int splitDim = split.dim;
  128. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  129. const typename Binner::vint vSplitPos(splitPos);
  130. const typename Binner::vbool vSplitMask(splitDimMask);
  131. auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
  132. size_t center = 0;
  133. if (!parallel)
  134. center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
  135. [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });
  136. else
  137. center = parallel_partitioning(
  138. prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
  139. [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },
  140. [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },
  141. PARALLEL_PARTITION_BLOCK_SIZE);
  142. new (&lset) PrimInfoRange(begin,center,local_left);
  143. new (&rset) PrimInfoRange(center,end,local_right);
  144. assert(area(lset.geomBounds) >= 0.0f);
  145. assert(area(rset.geomBounds) >= 0.0f);
  146. }
  147. void deterministic_order(const PrimInfoRange& pinfo)
  148. {
  149. /* required as parallel partition destroys original primitive order */
  150. std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
  151. }
  152. void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {
  153. performFallbackSplit(prims,pinfo,linfo,rinfo);
  154. }
  155. void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
  156. {
  157. assert(range.size() > 1);
  158. CentGeomBBox3fa left(empty);
  159. CentGeomBBox3fa right(empty);
  160. unsigned int geomID = prims[range.begin()].geomID();
  161. size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,
  162. [&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },
  163. [ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });
  164. new (&linfo) PrimInfoRange(range.begin(),center,left);
  165. new (&rinfo) PrimInfoRange(center,range.end(),right);
  166. }
  167. private:
  168. PrimRef* const prims;
  169. };
  170. #if !defined(RTHWIF_STANDALONE)
  171. /*! Performs standard object binning */
  172. template<typename PrimRefMB, size_t BINS>
  173. struct HeuristicArrayBinningMB
  174. {
  175. typedef BinSplit<BINS> Split;
  176. typedef typename PrimRefMB::BBox BBox;
  177. typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;
  178. static const size_t PARALLEL_THRESHOLD = 3 * 1024;
  179. static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
  180. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  181. /*! finds the best split */
  182. const Split find(const SetMB& set, const size_t logBlockSize)
  183. {
  184. ObjectBinner binner(empty);
  185. const BinMapping<BINS> mapping(set.size(),set.centBounds);
  186. bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);
  187. Split osplit = binner.best(mapping,logBlockSize);
  188. osplit.sah *= set.time_range.size();
  189. if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split
  190. return osplit;
  191. }
  192. /*! array partitioning */
  193. __forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
  194. {
  195. const size_t begin = set.begin();
  196. const size_t end = set.end();
  197. PrimInfoMB left = empty;
  198. PrimInfoMB right = empty;
  199. const vint4 vSplitPos(split.pos);
  200. const vbool4 vSplitMask(1 << split.dim);
  201. auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };
  202. auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };
  203. auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };
  204. size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);
  205. new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
  206. new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
  207. }
  208. };
  209. #endif
  210. }
  211. }