heuristic_spatial.h 14 KB


  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "priminfo.h"
  5. namespace embree
  6. {
  7. static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
  8. namespace isa
  9. {
  10. /*! mapping into bins */
  11. template<size_t BINS>
  12. struct SpatialBinMapping
  13. {
  14. public:
  15. __forceinline SpatialBinMapping() {}
  16. /*! calculates the mapping */
  17. __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
  18. {
  19. const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
  20. const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
  21. const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
  22. const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
  23. scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
  24. ofs = (vfloat4) pinfo.geomBounds.lower;
  25. inv_scale = 1.0f / scale;
  26. }
  27. /*! slower but safe binning */
  28. __forceinline vint4 bin(const Vec3fa& p) const
  29. {
  30. const vint4 i = floori((vfloat4(p)-ofs)*scale);
  31. return clamp(i,vint4(0),vint4(BINS-1));
  32. }
  33. __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
  34. {
  35. #if defined(__AVX__)
  36. const vfloat8 ofs8(ofs);
  37. const vfloat8 scale8(scale);
  38. const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
  39. const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
  40. return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
  41. #else
  42. const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
  43. const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
  44. const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
  45. const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
  46. return std::pair<vint4,vint4>(c_lower,c_upper);
  47. #endif
  48. }
  49. /*! calculates left spatial position of bin */
  50. __forceinline float pos(const size_t bin, const size_t dim) const {
  51. return madd(float(bin),inv_scale[dim],ofs[dim]);
  52. }
  53. /*! calculates left spatial position of bin */
  54. template<size_t N>
  55. __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
  56. return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
  57. }
  58. /*! returns true if the mapping is invalid in some dimension */
  59. __forceinline bool invalid(const size_t dim) const {
  60. return scale[dim] == 0.0f;
  61. }
  62. public:
  63. vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
  64. };
  65. /*! stores all information required to perform some split */
  66. template<size_t BINS>
  67. struct SpatialBinSplit
  68. {
  69. /*! construct an invalid split by default */
  70. __forceinline SpatialBinSplit()
  71. : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
  72. /*! constructs specified split */
  73. __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
  74. : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
  75. /*! constructs specified split */
  76. __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
  77. : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
  78. /*! tests if this split is valid */
  79. __forceinline bool valid() const { return dim != -1; }
  80. /*! calculates surface area heuristic for performing the split */
  81. __forceinline float splitSAH() const { return sah; }
  82. /*! stream output */
  83. friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
  84. return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
  85. }
  86. public:
  87. float sah; //!< SAH cost of the split
  88. int dim; //!< split dimension
  89. int pos; //!< split position
  90. int left; //!< number of elements on the left side
  91. int right; //!< number of elements on the right side
  92. float factor; //!< factor splitting the extended range
  93. SpatialBinMapping<BINS> mapping; //!< mapping into bins
  94. };
  95. /*! stores all binning information */
  96. template<size_t BINS, typename PrimRef>
  97. struct __aligned(64) SpatialBinInfo
  98. {
  99. SpatialBinInfo() {
  100. }
  101. __forceinline SpatialBinInfo(EmptyTy) {
  102. clear();
  103. }
  104. /*! clears the bin info */
  105. __forceinline void clear()
  106. {
  107. for (size_t i=0; i<BINS; i++) {
  108. bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
  109. numBegin[i] = numEnd[i] = 0;
  110. }
  111. }
  112. /*! adds binning data */
  113. __forceinline void add(const size_t dim,
  114. const size_t beginID,
  115. const size_t endID,
  116. const size_t binID,
  117. const BBox3fa &b,
  118. const size_t n = 1)
  119. {
  120. assert(beginID < BINS);
  121. assert(endID < BINS);
  122. assert(binID < BINS);
  123. numBegin[beginID][dim]+=(unsigned int)n;
  124. numEnd [endID][dim]+=(unsigned int)n;
  125. bounds [binID][dim].extend(b);
  126. }
  127. /*! extends binning bounds */
  128. __forceinline void extend(const size_t dim,
  129. const size_t binID,
  130. const BBox3fa &b)
  131. {
  132. assert(binID < BINS);
  133. bounds [binID][dim].extend(b);
  134. }
  135. /*! bins an array of primitives */
  136. template<typename PrimitiveSplitterFactory>
  137. __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
  138. {
  139. for (size_t i=begin; i<end; i++)
  140. {
  141. const PrimRef& prim = source[i];
  142. unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
  143. if (unlikely(splits <= 1))
  144. {
  145. const vint4 bin = mapping.bin(center(prim.bounds()));
  146. for (size_t dim=0; dim<3; dim++)
  147. {
  148. assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
  149. add(dim,bin[dim],bin[dim],bin[dim],prim.bounds());
  150. }
  151. }
  152. else
  153. {
  154. const vint4 bin0 = mapping.bin(prim.bounds().lower);
  155. const vint4 bin1 = mapping.bin(prim.bounds().upper);
  156. for (size_t dim=0; dim<3; dim++)
  157. {
  158. if (unlikely(mapping.invalid(dim)))
  159. continue;
  160. size_t bin;
  161. size_t l = bin0[dim];
  162. size_t r = bin1[dim];
  163. // same bin optimization
  164. if (likely(l == r))
  165. {
  166. add(dim,l,l,l,prim.bounds());
  167. continue;
  168. }
  169. size_t bin_start = bin0[dim];
  170. size_t bin_end = bin1[dim];
  171. BBox3fa rest = prim.bounds();
  172. /* assure that split position always overlaps the primitive bounds */
  173. while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++;
  174. while (bin_start < bin_end && mapping.pos(bin_end ,dim) >= rest.upper[dim]) bin_end--;
  175. const auto splitter = splitterFactory(prim);
  176. for (bin=bin_start; bin<bin_end; bin++)
  177. {
  178. const float pos = mapping.pos(bin+1,dim);
  179. BBox3fa left,right;
  180. splitter(rest,dim,pos,left,right);
  181. if (unlikely(left.empty())) l++;
  182. extend(dim,bin,left);
  183. rest = right;
  184. }
  185. if (unlikely(rest.empty())) r--;
  186. add(dim,l,r,bin,rest);
  187. }
  188. }
  189. }
  190. }
  191. /*! bins an array of primitives */
  192. __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
  193. {
  194. for (size_t i=begin; i<end; i++)
  195. {
  196. const PrimRef &prim = source[i];
  197. const vint4 bin0 = mapping.bin(prim.bounds().lower);
  198. const vint4 bin1 = mapping.bin(prim.bounds().upper);
  199. for (size_t dim=0; dim<3; dim++)
  200. {
  201. if (unlikely(mapping.invalid(dim)))
  202. continue;
  203. const size_t l = bin0[dim];
  204. const size_t r = bin1[dim];
  205. const unsigned int n = prim.primID();
  206. // same bin optimization
  207. if (likely(l == r))
  208. {
  209. add(dim,l,l,l,prim.bounds(),n);
  210. continue;
  211. }
  212. const size_t bin_start = bin0[dim];
  213. const size_t bin_end = bin1[dim];
  214. for (size_t bin=bin_start; bin<bin_end; bin++)
  215. add(dim,l,r,bin,prim.bounds(),n);
  216. }
  217. }
  218. }
  219. /*! merges in other binning information */
  220. void merge (const SpatialBinInfo& other)
  221. {
  222. for (size_t i=0; i<BINS; i++)
  223. {
  224. numBegin[i] += other.numBegin[i];
  225. numEnd [i] += other.numEnd [i];
  226. bounds[i][0].extend(other.bounds[i][0]);
  227. bounds[i][1].extend(other.bounds[i][1]);
  228. bounds[i][2].extend(other.bounds[i][2]);
  229. }
  230. }
  231. /*! merges in other binning information */
  232. static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
  233. {
  234. SpatialBinInfo c(empty);
  235. for (size_t i=0; i<BINS; i++)
  236. {
  237. c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
  238. c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
  239. c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
  240. c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
  241. c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
  242. }
  243. return c;
  244. }
  245. /*! finds the best split by scanning binning information */
  246. SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
  247. {
  248. /* sweep from right to left and compute parallel prefix of merged bounds */
  249. vfloat4 rAreas[BINS];
  250. vuint4 rCounts[BINS];
  251. vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
  252. for (size_t i=BINS-1; i>0; i--)
  253. {
  254. count += numEnd[i];
  255. rCounts[i] = count;
  256. bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
  257. by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
  258. bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
  259. rAreas[i][3] = 0.0f;
  260. }
  261. /* sweep from left to right and compute SAH */
  262. vuint4 blocks_add = (1 << blocks_shift)-1;
  263. vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
  264. count = 0; bx = empty; by = empty; bz = empty;
  265. for (size_t i=1; i<BINS; i++, ii+=1)
  266. {
  267. count += numBegin[i-1];
  268. bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
  269. by.extend(bounds[i-1][1]); float Ay = halfArea(by);
  270. bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
  271. const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
  272. const vfloat4 rArea = rAreas[i];
  273. const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
  274. const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
  275. const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
  276. // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
  277. const vbool4 mask = sah < vbestSAH;
  278. vbestPos = select(mask,ii ,vbestPos);
  279. vbestSAH = select(mask,sah,vbestSAH);
  280. vbestlCount = select(mask,count,vbestlCount);
  281. vbestrCount = select(mask,rCounts[i],vbestrCount);
  282. }
  283. /* find best dimension */
  284. float bestSAH = inf;
  285. int bestDim = -1;
  286. int bestPos = 0;
  287. unsigned int bestlCount = 0;
  288. unsigned int bestrCount = 0;
  289. for (int dim=0; dim<3; dim++)
  290. {
  291. /* ignore zero sized dimensions */
  292. if (unlikely(mapping.invalid(dim)))
  293. continue;
  294. /* test if this is a better dimension */
  295. if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
  296. bestDim = dim;
  297. bestPos = vbestPos[dim];
  298. bestSAH = vbestSAH[dim];
  299. bestlCount = vbestlCount[dim];
  300. bestrCount = vbestrCount[dim];
  301. }
  302. }
  303. assert(bestSAH >= 0.0f);
  304. /* return invalid split if no split found */
  305. if (bestDim == -1)
  306. return SpatialBinSplit<BINS>(inf,-1,0,mapping);
  307. /* return best found split */
  308. return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
  309. }
  310. private:
  311. BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
  312. vuint4 numBegin[BINS]; //!< number of primitives starting in bin
  313. vuint4 numEnd[BINS]; //!< number of primitives ending in bin
  314. };
  315. }
  316. }