2
0

heuristic_spatial.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../common/scene.h"
  5. #include "priminfo.h"
  6. namespace embree
  7. {
  8. static const unsigned int RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS = 5;
  9. namespace isa
  10. {
  11. /*! mapping into bins */
  12. template<size_t BINS>
  13. struct SpatialBinMapping
  14. {
  15. public:
  16. __forceinline SpatialBinMapping() {}
  17. /*! calculates the mapping */
  18. __forceinline SpatialBinMapping(const CentGeomBBox3fa& pinfo)
  19. {
  20. const vfloat4 lower = (vfloat4) pinfo.geomBounds.lower;
  21. const vfloat4 upper = (vfloat4) pinfo.geomBounds.upper;
  22. const vfloat4 eps = 128.0f*vfloat4(ulp)*max(abs(lower),abs(upper));
  23. const vfloat4 diag = max(eps,(vfloat4) pinfo.geomBounds.size());
  24. scale = select(upper-lower <= eps,vfloat4(0.0f),vfloat4(BINS)/diag);
  25. ofs = (vfloat4) pinfo.geomBounds.lower;
  26. inv_scale = 1.0f / scale;
  27. }
  28. /*! slower but safe binning */
  29. __forceinline vint4 bin(const Vec3fa& p) const
  30. {
  31. const vint4 i = floori((vfloat4(p)-ofs)*scale);
  32. return clamp(i,vint4(0),vint4(BINS-1));
  33. }
  34. __forceinline std::pair<vint4,vint4> bin(const BBox3fa& b) const
  35. {
  36. #if defined(__AVX__)
  37. const vfloat8 ofs8(ofs);
  38. const vfloat8 scale8(scale);
  39. const vint8 lu = floori((vfloat8::loadu(&b)-ofs8)*scale8);
  40. const vint8 c_lu = clamp(lu,vint8(zero),vint8(BINS-1));
  41. return std::pair<vint4,vint4>(extract4<0>(c_lu),extract4<1>(c_lu));
  42. #else
  43. const vint4 lower = floori((vfloat4(b.lower)-ofs)*scale);
  44. const vint4 upper = floori((vfloat4(b.upper)-ofs)*scale);
  45. const vint4 c_lower = clamp(lower,vint4(0),vint4(BINS-1));
  46. const vint4 c_upper = clamp(upper,vint4(0),vint4(BINS-1));
  47. return std::pair<vint4,vint4>(c_lower,c_upper);
  48. #endif
  49. }
  50. /*! calculates left spatial position of bin */
  51. __forceinline float pos(const size_t bin, const size_t dim) const {
  52. return madd(float(bin),inv_scale[dim],ofs[dim]);
  53. }
  54. /*! calculates left spatial position of bin */
  55. template<size_t N>
  56. __forceinline vfloat<N> posN(const vfloat<N> bin, const size_t dim) const {
  57. return madd(bin,vfloat<N>(inv_scale[dim]),vfloat<N>(ofs[dim]));
  58. }
  59. /*! returns true if the mapping is invalid in some dimension */
  60. __forceinline bool invalid(const size_t dim) const {
  61. return scale[dim] == 0.0f;
  62. }
  63. public:
  64. vfloat4 ofs,scale,inv_scale; //!< linear function that maps to bin ID
  65. };
  66. /*! stores all information required to perform some split */
  67. template<size_t BINS>
  68. struct SpatialBinSplit
  69. {
  70. /*! construct an invalid split by default */
  71. __forceinline SpatialBinSplit()
  72. : sah(inf), dim(-1), pos(0), left(-1), right(-1), factor(1.0f) {}
  73. /*! constructs specified split */
  74. __forceinline SpatialBinSplit(float sah, int dim, int pos, const SpatialBinMapping<BINS>& mapping)
  75. : sah(sah), dim(dim), pos(pos), left(-1), right(-1), factor(1.0f), mapping(mapping) {}
  76. /*! constructs specified split */
  77. __forceinline SpatialBinSplit(float sah, int dim, int pos, int left, int right, float factor, const SpatialBinMapping<BINS>& mapping)
  78. : sah(sah), dim(dim), pos(pos), left(left), right(right), factor(factor), mapping(mapping) {}
  79. /*! tests if this split is valid */
  80. __forceinline bool valid() const { return dim != -1; }
  81. /*! calculates surface area heuristic for performing the split */
  82. __forceinline float splitSAH() const { return sah; }
  83. /*! stream output */
  84. friend embree_ostream operator<<(embree_ostream cout, const SpatialBinSplit& split) {
  85. return cout << "SpatialBinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << ", left = " << split.left << ", right = " << split.right << ", factor = " << split.factor << "}";
  86. }
  87. public:
  88. float sah; //!< SAH cost of the split
  89. int dim; //!< split dimension
  90. int pos; //!< split position
  91. int left; //!< number of elements on the left side
  92. int right; //!< number of elements on the right side
  93. float factor; //!< factor splitting the extended range
  94. SpatialBinMapping<BINS> mapping; //!< mapping into bins
  95. };
  96. /*! stores all binning information */
  97. template<size_t BINS, typename PrimRef>
  98. struct __aligned(64) SpatialBinInfo
  99. {
  100. SpatialBinInfo() {
  101. }
  102. __forceinline SpatialBinInfo(EmptyTy) {
  103. clear();
  104. }
  105. /*! clears the bin info */
  106. __forceinline void clear()
  107. {
  108. for (size_t i=0; i<BINS; i++) {
  109. bounds[i][0] = bounds[i][1] = bounds[i][2] = empty;
  110. numBegin[i] = numEnd[i] = 0;
  111. }
  112. }
  113. /*! adds binning data */
  114. __forceinline void add(const size_t dim,
  115. const size_t beginID,
  116. const size_t endID,
  117. const size_t binID,
  118. const BBox3fa &b,
  119. const size_t n = 1)
  120. {
  121. assert(beginID < BINS);
  122. assert(endID < BINS);
  123. assert(binID < BINS);
  124. numBegin[beginID][dim]+=(unsigned int)n;
  125. numEnd [endID][dim]+=(unsigned int)n;
  126. bounds [binID][dim].extend(b);
  127. }
  128. /*! extends binning bounds */
  129. __forceinline void extend(const size_t dim,
  130. const size_t binID,
  131. const BBox3fa &b)
  132. {
  133. assert(binID < BINS);
  134. bounds [binID][dim].extend(b);
  135. }
  136. /*! bins an array of triangles */
  137. template<typename SplitPrimitive>
  138. __forceinline void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t N, const SpatialBinMapping<BINS>& mapping)
  139. {
  140. for (size_t i=0; i<N; i++)
  141. {
  142. const PrimRef prim = prims[i];
  143. unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
  144. if (unlikely(splits == 1))
  145. {
  146. const vint4 bin = mapping.bin(center(prim.bounds()));
  147. for (size_t dim=0; dim<3; dim++)
  148. {
  149. assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
  150. numBegin[bin[dim]][dim]++;
  151. numEnd [bin[dim]][dim]++;
  152. bounds [bin[dim]][dim].extend(prim.bounds());
  153. }
  154. }
  155. else
  156. {
  157. const vint4 bin0 = mapping.bin(prim.bounds().lower);
  158. const vint4 bin1 = mapping.bin(prim.bounds().upper);
  159. for (size_t dim=0; dim<3; dim++)
  160. {
  161. size_t bin;
  162. PrimRef rest = prim;
  163. size_t l = bin0[dim];
  164. size_t r = bin1[dim];
  165. // same bin optimization
  166. if (likely(l == r))
  167. {
  168. numBegin[l][dim]++;
  169. numEnd [l][dim]++;
  170. bounds [l][dim].extend(prim.bounds());
  171. continue;
  172. }
  173. for (bin=(size_t)bin0[dim]; bin<(size_t)bin1[dim]; bin++)
  174. {
  175. const float pos = mapping.pos(bin+1,dim);
  176. PrimRef left,right;
  177. splitPrimitive(rest,(int)dim,pos,left,right);
  178. if (unlikely(left.bounds().empty())) l++;
  179. bounds[bin][dim].extend(left.bounds());
  180. rest = right;
  181. }
  182. if (unlikely(rest.bounds().empty())) r--;
  183. numBegin[l][dim]++;
  184. numEnd [r][dim]++;
  185. bounds [bin][dim].extend(rest.bounds());
  186. }
  187. }
  188. }
  189. }
  190. /*! bins a range of primitives inside an array */
  191. template<typename SplitPrimitive>
  192. void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) {
  193. bin(splitPrimitive,prims+begin,end-begin,mapping);
  194. }
  195. /*! bins an array of primitives */
  196. template<typename PrimitiveSplitterFactory>
  197. __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
  198. {
  199. for (size_t i=begin; i<end; i++)
  200. {
  201. const PrimRef &prim = source[i];
  202. const vint4 bin0 = mapping.bin(prim.bounds().lower);
  203. const vint4 bin1 = mapping.bin(prim.bounds().upper);
  204. for (size_t dim=0; dim<3; dim++)
  205. {
  206. if (unlikely(mapping.invalid(dim)))
  207. continue;
  208. size_t bin;
  209. size_t l = bin0[dim];
  210. size_t r = bin1[dim];
  211. // same bin optimization
  212. if (likely(l == r))
  213. {
  214. add(dim,l,l,l,prim.bounds());
  215. continue;
  216. }
  217. const size_t bin_start = bin0[dim];
  218. const size_t bin_end = bin1[dim];
  219. BBox3fa rest = prim.bounds();
  220. const auto splitter = splitterFactory(prim);
  221. for (bin=bin_start; bin<bin_end; bin++)
  222. {
  223. const float pos = mapping.pos(bin+1,dim);
  224. BBox3fa left,right;
  225. splitter(rest,dim,pos,left,right);
  226. if (unlikely(left.empty())) l++;
  227. extend(dim,bin,left);
  228. rest = right;
  229. }
  230. if (unlikely(rest.empty())) r--;
  231. add(dim,l,r,bin,rest);
  232. }
  233. }
  234. }
  235. /*! bins an array of primitives */
  236. __forceinline void binSubTreeRefs(const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
  237. {
  238. for (size_t i=begin; i<end; i++)
  239. {
  240. const PrimRef &prim = source[i];
  241. const vint4 bin0 = mapping.bin(prim.bounds().lower);
  242. const vint4 bin1 = mapping.bin(prim.bounds().upper);
  243. for (size_t dim=0; dim<3; dim++)
  244. {
  245. if (unlikely(mapping.invalid(dim)))
  246. continue;
  247. const size_t l = bin0[dim];
  248. const size_t r = bin1[dim];
  249. const unsigned int n = prim.primID();
  250. // same bin optimization
  251. if (likely(l == r))
  252. {
  253. add(dim,l,l,l,prim.bounds(),n);
  254. continue;
  255. }
  256. const size_t bin_start = bin0[dim];
  257. const size_t bin_end = bin1[dim];
  258. for (size_t bin=bin_start; bin<bin_end; bin++)
  259. add(dim,l,r,bin,prim.bounds(),n);
  260. }
  261. }
  262. }
  263. /*! merges in other binning information */
  264. void merge (const SpatialBinInfo& other)
  265. {
  266. for (size_t i=0; i<BINS; i++)
  267. {
  268. numBegin[i] += other.numBegin[i];
  269. numEnd [i] += other.numEnd [i];
  270. bounds[i][0].extend(other.bounds[i][0]);
  271. bounds[i][1].extend(other.bounds[i][1]);
  272. bounds[i][2].extend(other.bounds[i][2]);
  273. }
  274. }
  275. /*! merges in other binning information */
  276. static __forceinline const SpatialBinInfo reduce (const SpatialBinInfo& a, const SpatialBinInfo& b)
  277. {
  278. SpatialBinInfo c(empty);
  279. for (size_t i=0; i<BINS; i++)
  280. {
  281. c.numBegin[i] += a.numBegin[i]+b.numBegin[i];
  282. c.numEnd [i] += a.numEnd [i]+b.numEnd [i];
  283. c.bounds[i][0] = embree::merge(a.bounds[i][0],b.bounds[i][0]);
  284. c.bounds[i][1] = embree::merge(a.bounds[i][1],b.bounds[i][1]);
  285. c.bounds[i][2] = embree::merge(a.bounds[i][2],b.bounds[i][2]);
  286. }
  287. return c;
  288. }
  289. /*! finds the best split by scanning binning information */
  290. SpatialBinSplit<BINS> best(const SpatialBinMapping<BINS>& mapping, const size_t blocks_shift) const
  291. {
  292. /* sweep from right to left and compute parallel prefix of merged bounds */
  293. vfloat4 rAreas[BINS];
  294. vuint4 rCounts[BINS];
  295. vuint4 count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty;
  296. for (size_t i=BINS-1; i>0; i--)
  297. {
  298. count += numEnd[i];
  299. rCounts[i] = count;
  300. bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx);
  301. by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by);
  302. bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz);
  303. rAreas[i][3] = 0.0f;
  304. }
  305. /* sweep from left to right and compute SAH */
  306. vuint4 blocks_add = (1 << blocks_shift)-1;
  307. vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0; vuint4 vbestlCount = 0; vuint4 vbestrCount = 0;
  308. count = 0; bx = empty; by = empty; bz = empty;
  309. for (size_t i=1; i<BINS; i++, ii+=1)
  310. {
  311. count += numBegin[i-1];
  312. bx.extend(bounds[i-1][0]); float Ax = halfArea(bx);
  313. by.extend(bounds[i-1][1]); float Ay = halfArea(by);
  314. bz.extend(bounds[i-1][2]); float Az = halfArea(bz);
  315. const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
  316. const vfloat4 rArea = rAreas[i];
  317. const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift);
  318. const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);
  319. const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));
  320. // const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));
  321. const vbool4 mask = sah < vbestSAH;
  322. vbestPos = select(mask,ii ,vbestPos);
  323. vbestSAH = select(mask,sah,vbestSAH);
  324. vbestlCount = select(mask,count,vbestlCount);
  325. vbestrCount = select(mask,rCounts[i],vbestrCount);
  326. }
  327. /* find best dimension */
  328. float bestSAH = inf;
  329. int bestDim = -1;
  330. int bestPos = 0;
  331. unsigned int bestlCount = 0;
  332. unsigned int bestrCount = 0;
  333. for (int dim=0; dim<3; dim++)
  334. {
  335. /* ignore zero sized dimensions */
  336. if (unlikely(mapping.invalid(dim)))
  337. continue;
  338. /* test if this is a better dimension */
  339. if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
  340. bestDim = dim;
  341. bestPos = vbestPos[dim];
  342. bestSAH = vbestSAH[dim];
  343. bestlCount = vbestlCount[dim];
  344. bestrCount = vbestrCount[dim];
  345. }
  346. }
  347. assert(bestSAH >= 0.0f);
  348. /* return invalid split if no split found */
  349. if (bestDim == -1)
  350. return SpatialBinSplit<BINS>(inf,-1,0,mapping);
  351. /* return best found split */
  352. return SpatialBinSplit<BINS>(bestSAH,bestDim,bestPos,bestlCount,bestrCount,1.0f,mapping);
  353. }
  354. private:
  355. BBox3fa bounds[BINS][3]; //!< geometry bounds for each bin in each dimension
  356. vuint4 numBegin[BINS]; //!< number of primitives starting in bin
  357. vuint4 numEnd[BINS]; //!< number of primitives ending in bin
  358. };
  359. }
  360. }