heuristic_binning_array_aligned.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "heuristic_binning.h"
  18. namespace embree
  19. {
  20. namespace isa
  21. {
  22. struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>
  23. {
  24. __forceinline PrimInfoRange () {
  25. }
  26. __forceinline PrimInfoRange(const PrimInfo& pinfo)
  27. : CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}
  28. __forceinline PrimInfoRange(EmptyTy)
  29. : CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}
  30. __forceinline PrimInfoRange (size_t begin, size_t end, const BBox3fa& geomBounds, const BBox3fa& centBounds)
  31. : CentGeomBBox3fa(geomBounds,centBounds), range<size_t>(begin,end) {}
  32. __forceinline float leafSAH() const {
  33. return expectedApproxHalfArea(geomBounds)*float(size());
  34. }
  35. __forceinline float leafSAH(size_t block_shift) const {
  36. return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
  37. }
  38. };
  39. /*! Performs standard object binning */
  40. template<typename PrimRef, size_t BINS>
  41. struct HeuristicArrayBinningSAH
  42. {
  43. typedef BinSplit<BINS> Split;
  44. typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;
  45. typedef range<size_t> Set;
  46. #if defined(__AVX512ER__) // KNL
  47. static const size_t PARALLEL_THRESHOLD = 4*768;
  48. static const size_t PARALLEL_FIND_BLOCK_SIZE = 768;
  49. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 768;
  50. #else
  51. static const size_t PARALLEL_THRESHOLD = 3 * 1024;
  52. static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
  53. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
  54. #endif
  55. __forceinline HeuristicArrayBinningSAH ()
  56. : prims(nullptr) {}
  57. /*! remember prim array */
  58. __forceinline HeuristicArrayBinningSAH (PrimRef* prims)
  59. : prims(prims) {}
  60. /*! finds the best split */
  61. __noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)
  62. {
  63. if (likely(pinfo.size() < PARALLEL_THRESHOLD))
  64. return find_template<false>(pinfo,logBlockSize);
  65. else
  66. return find_template<true>(pinfo,logBlockSize);
  67. }
  68. template<bool parallel>
  69. __forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)
  70. {
  71. Binner binner(empty);
  72. const BinMapping<BINS> mapping(pinfo);
  73. bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
  74. return binner.best(mapping,logBlockSize);
  75. }
  76. /*! array partitioning */
  77. __forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
  78. {
  79. if (likely(pinfo.size() < PARALLEL_THRESHOLD))
  80. split_template<false>(split,pinfo,linfo,rinfo);
  81. else
  82. split_template<true>(split,pinfo,linfo,rinfo);
  83. }
  84. template<bool parallel>
  85. __forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
  86. {
  87. if (!split.valid()) {
  88. deterministic_order(set);
  89. return splitFallback(set,lset,rset);
  90. }
  91. const size_t begin = set.begin();
  92. const size_t end = set.end();
  93. CentGeomBBox3fa local_left(empty);
  94. CentGeomBBox3fa local_right(empty);
  95. const unsigned int splitPos = split.pos;
  96. const unsigned int splitDim = split.dim;
  97. const unsigned int splitDimMask = (unsigned int)1 << splitDim;
  98. #if defined(__AVX512F__)
  99. const vint16 vSplitPos(splitPos);
  100. const vbool16 vSplitMask( splitDimMask );
  101. #else
  102. const vint4 vSplitPos(splitPos);
  103. const vbool4 vSplitMask( (int)splitDimMask );
  104. #endif
  105. auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
  106. size_t center = 0;
  107. if (!parallel)
  108. center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,
  109. [] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend(ref.bounds()); });
  110. else
  111. center = parallel_partitioning(
  112. prims,begin,end,EmptyTy(),local_left,local_right,isLeft,
  113. [] (CentGeomBBox3fa& pinfo,const PrimRef &ref) { pinfo.extend(ref.bounds()); },
  114. [] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa &pinfo1) { pinfo0.merge(pinfo1); },
  115. PARALLEL_PARTITION_BLOCK_SIZE);
  116. new (&lset) PrimInfoRange(begin,center,local_left.geomBounds,local_left.centBounds);
  117. new (&rset) PrimInfoRange(center,end,local_right.geomBounds,local_right.centBounds);
  118. assert(area(lset.geomBounds) >= 0.0f);
  119. assert(area(rset.geomBounds) >= 0.0f);
  120. }
  121. void deterministic_order(const PrimInfoRange& pinfo)
  122. {
  123. /* required as parallel partition destroys original primitive order */
  124. std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
  125. }
  126. void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
  127. {
  128. const size_t begin = pinfo.begin();
  129. const size_t end = pinfo.end();
  130. const size_t center = (begin + end)/2;
  131. CentGeomBBox3fa left; left.reset();
  132. for (size_t i=begin; i<center; i++)
  133. left.extend(prims[i].bounds());
  134. new (&linfo) PrimInfoRange(begin,center,left.geomBounds,left.centBounds);
  135. CentGeomBBox3fa right; right.reset();
  136. for (size_t i=center; i<end; i++)
  137. right.extend(prims[i].bounds());
  138. new (&rinfo) PrimInfoRange(center,end,right.geomBounds,right.centBounds);
  139. }
  140. private:
  141. PrimRef* const prims;
  142. };
  143. }
  144. }