heuristic_strand_array.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "priminfo.h"
  18. #include "../geometry/bezier1v.h"
  19. #include "../../common/algorithms/parallel_reduce.h"
  20. #include "../../common/algorithms/parallel_partition.h"
  21. namespace embree
  22. {
  23. namespace isa
  24. {
  25. /*! Performs standard object binning */
  26. struct HeuristicStrandSplit
  27. {
  28. typedef range<size_t> Set;
  29. static const size_t PARALLEL_THRESHOLD = 10000;
  30. static const size_t PARALLEL_FIND_BLOCK_SIZE = 4096;
  31. static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 64;
  32. /*! stores all information to perform some split */
  33. struct Split
  34. {
  35. /*! construct an invalid split by default */
  36. __forceinline Split()
  37. : sah(inf), axis0(zero), axis1(zero) {}
  38. /*! constructs specified split */
  39. __forceinline Split(const float sah, const Vec3fa& axis0, const Vec3fa& axis1)
  40. : sah(sah), axis0(axis0), axis1(axis1) {}
  41. /*! calculates standard surface area heuristic for the split */
  42. __forceinline float splitSAH() const { return sah; }
  43. /*! test if this split is valid */
  44. __forceinline bool valid() const { return sah != float(inf); }
  45. public:
  46. float sah; //!< SAH cost of the split
  47. Vec3fa axis0, axis1; //!< axis the two strands are aligned into
  48. };
  49. __forceinline HeuristicStrandSplit () // FIXME: required?
  50. : scene(nullptr), prims(nullptr) {}
  51. /*! remember prim array */
  52. __forceinline HeuristicStrandSplit (Scene* scene, PrimRef* prims)
  53. : scene(scene), prims(prims) {}
  54. /*! finds the best split */
  55. const Split find(const range<size_t>& set)
  56. {
  57. if (likely(set.size() < PARALLEL_THRESHOLD)) return sequential_find(set);
  58. else return parallel_find(set);
  59. }
  60. __forceinline const Vec3fa direction(const PrimRef& prim)
  61. {
  62. const Curve3fa curve = scene->get<NativeCurves>(prim.geomID())->getCurve(prim.primID());
  63. return curve.end()-curve.begin();
  64. }
  65. __forceinline const BBox3fa bounds(const PrimRef& prim)
  66. {
  67. NativeCurves* curves = scene->get<NativeCurves>(prim.geomID());
  68. return curves->bounds(prim.primID());
  69. }
  70. __forceinline const BBox3fa bounds(const LinearSpace3fa& space, const PrimRef& prim)
  71. {
  72. NativeCurves* curves = scene->get<NativeCurves>(prim.geomID());
  73. return curves->bounds(space,prim.primID());
  74. }
  75. /*! finds the best split */
  76. const Split sequential_find(const range<size_t>& set)
  77. {
  78. /* first curve determines first axis */
  79. Vec3fa axis0 = normalize(direction(prims[set.begin()]));
  80. /* find 2nd axis that is most misaligned with first axis */
  81. float bestCos = 1.0f;
  82. Vec3fa axis1 = axis0;
  83. for (size_t i=set.begin(); i<set.end(); i++)
  84. {
  85. Vec3fa axisi = direction(prims[i]);
  86. float leni = length(axisi);
  87. if (leni == 0.0f) continue;
  88. axisi /= leni;
  89. float cos = abs(dot(axisi,axis0));
  90. if (cos < bestCos) { bestCos = cos; axis1 = axisi; }
  91. }
  92. /* partition the two strands */
  93. size_t lnum = 0, rnum = 0;
  94. BBox3fa lbounds = empty, rbounds = empty;
  95. const LinearSpace3fa space0 = frame(axis0).transposed();
  96. const LinearSpace3fa space1 = frame(axis1).transposed();
  97. for (size_t i=set.begin(); i<set.end(); i++)
  98. {
  99. PrimRef& prim = prims[i];
  100. const Vec3fa axisi = normalize(direction(prim));
  101. const float cos0 = abs(dot(axisi,axis0));
  102. const float cos1 = abs(dot(axisi,axis1));
  103. if (cos0 > cos1) { lnum++; lbounds.extend(bounds(space0,prim)); }
  104. else { rnum++; rbounds.extend(bounds(space1,prim)); }
  105. }
  106. /*! return an invalid split if we do not partition */
  107. if (lnum == 0 || rnum == 0)
  108. return Split(inf,axis0,axis1);
  109. /*! calculate sah for the split */
  110. const float sah = madd(float(lnum),halfArea(lbounds),float(rnum)*halfArea(rbounds));
  111. return Split(sah,axis0,axis1);
  112. }
  113. /*! finds the best split */
  114. const Split parallel_find(const range<size_t>& set)
  115. {
  116. /* first curve determines first axis */
  117. const Vec3fa axis0 = normalize(direction(prims[set.begin()]));
  118. /* find 2nd axis that is most misaligned with first axis */
  119. struct __aligned(16) BestAxis
  120. {
  121. __forceinline BestAxis ()
  122. : cos(inf), axis(Vec3fa(1.0f,0.0f,0.0f)) {}
  123. __forceinline BestAxis (float otherCos, const Vec3fa& otherAxis)
  124. : cos(otherCos), axis(otherAxis) {}
  125. public:
  126. float cos;
  127. Vec3fa axis;
  128. };
  129. const BestAxis best = parallel_reduce
  130. (set.begin(),set.end(),size_t(1024),BestAxis(inf,axis0),
  131. [&] (const range<size_t>& r) -> BestAxis
  132. {
  133. BestAxis best(inf,axis0);
  134. for (size_t i=r.begin(); i<r.end(); i++)
  135. {
  136. Vec3fa axisi = direction(prims[i]);
  137. float leni = length(axisi);
  138. if (leni == 0.0f) continue;
  139. axisi /= leni;
  140. float cos = abs(dot(axisi,axis0));
  141. if (cos < best.cos) { best.cos = cos; best.axis = axisi; }
  142. }
  143. return best;
  144. }, [] (const BestAxis& axis0, const BestAxis& axis1) -> BestAxis {
  145. if (axis0.cos < axis1.cos) return axis0; else return axis1;
  146. });
  147. const Vec3fa axis1 = best.axis;
  148. /* partition the two strands */
  149. struct __aligned(16) Info
  150. {
  151. __forceinline Info()
  152. : lnum(0), rnum(0), lbounds(empty), rbounds(empty) {}
  153. __forceinline static Info merge(const Info& a, const Info& b)
  154. {
  155. Info c;
  156. c.lnum = a.lnum+b.lnum;
  157. c.rnum = a.rnum+b.rnum;
  158. c.lbounds = embree::merge(a.lbounds,b.lbounds);
  159. c.rbounds = embree::merge(a.rbounds,b.rbounds);
  160. return c;
  161. }
  162. size_t lnum, rnum;
  163. BBox3fa lbounds, rbounds;
  164. };
  165. const LinearSpace3fa space0 = frame(axis0).transposed();
  166. const LinearSpace3fa space1 = frame(axis1).transposed();
  167. Info info = parallel_reduce
  168. (set.begin(), set.end(), size_t(1024), Info(),
  169. [&] (const range<size_t>& r) -> Info
  170. {
  171. Info info;
  172. for (size_t i=r.begin(); i<r.end(); i++)
  173. {
  174. PrimRef& prim = prims[i];
  175. const Vec3fa axisi = normalize(direction(prim));
  176. const float cos0 = abs(dot(axisi,axis0));
  177. const float cos1 = abs(dot(axisi,axis1));
  178. if (cos0 > cos1) { info.lnum++; info.lbounds.extend(bounds(space0,prim)); }
  179. else { info.rnum++; info.rbounds.extend(bounds(space1,prim)); }
  180. }
  181. return info;
  182. },
  183. [] (const Info& info0, const Info& info1) {
  184. return Info::merge(info0,info1);
  185. });
  186. /*! return an invalid split if we do not partition */
  187. if (info.lnum == 0 || info.rnum == 0)
  188. return Split(inf,axis0,axis1);
  189. /*! calculate sah for the split */
  190. const float sah = madd(float(info.lnum),halfArea(info.lbounds),float(info.rnum)*halfArea(info.rbounds));
  191. return Split(sah,axis0,axis1);
  192. }
  193. /*! array partitioning */
  194. void split(const Split& spliti, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
  195. {
  196. if (likely(set.size() < PARALLEL_THRESHOLD))
  197. sequential_split(spliti,set,lset,rset);
  198. else
  199. parallel_split(spliti,set,lset,rset);
  200. }
  201. /*! array partitioning */
  202. void sequential_split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
  203. {
  204. if (!split.valid()) {
  205. deterministic_order(set);
  206. return splitFallback(set,lset,rset);
  207. }
  208. const size_t begin = set.begin();
  209. const size_t end = set.end();
  210. CentGeomBBox3fa local_left(empty);
  211. CentGeomBBox3fa local_right(empty);
  212. auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
  213. const Vec3fa axisi = normalize(direction(prim));
  214. const float cos0 = abs(dot(axisi,split.axis0));
  215. const float cos1 = abs(dot(axisi,split.axis1));
  216. return cos0 > cos1;
  217. };
  218. auto mergePrimBounds = [this] (CentGeomBBox3fa& pinfo,const PrimRef& ref) {
  219. pinfo.extend(bounds(ref));
  220. };
  221. size_t center = serial_partitioning(prims,begin,end,local_left,local_right,primOnLeftSide,mergePrimBounds);
  222. new (&lset) PrimInfoRange(begin,center,local_left.geomBounds,local_left.centBounds);
  223. new (&rset) PrimInfoRange(center,end,local_right.geomBounds,local_right.centBounds);
  224. assert(area(lset.geomBounds) >= 0.0f);
  225. assert(area(rset.geomBounds) >= 0.0f);
  226. }
  227. /*! array partitioning */
  228. void parallel_split(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)
  229. {
  230. if (!split.valid()) {
  231. deterministic_order(set);
  232. return splitFallback(set,lset,rset);
  233. }
  234. const size_t begin = set.begin();
  235. const size_t end = set.end();
  236. auto primOnLeftSide = [&] (const PrimRef& prim) -> bool {
  237. const Vec3fa axisi = normalize(direction(prim));
  238. const float cos0 = abs(dot(axisi,split.axis0));
  239. const float cos1 = abs(dot(axisi,split.axis1));
  240. return cos0 > cos1;
  241. };
  242. PrimInfo linfo; linfo.reset(); // FIXME: use CentGeomBBox3fa
  243. PrimInfo rinfo; rinfo.reset();
  244. const size_t center = parallel_partitioning(
  245. prims,begin,end,EmptyTy(),linfo,rinfo,primOnLeftSide,
  246. [this] (PrimInfo &pinfo, const PrimRef& ref) { pinfo.add(bounds(ref)); },
  247. [] (PrimInfo &pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
  248. PARALLEL_PARTITION_BLOCK_SIZE);
  249. new (&lset) PrimInfoRange(begin,center,linfo.geomBounds,linfo.centBounds);
  250. new (&rset) PrimInfoRange(center,end,rinfo.geomBounds,rinfo.centBounds);
  251. }
  252. void deterministic_order(const Set& set)
  253. {
  254. /* required as parallel partition destroys original primitive order */
  255. std::sort(&prims[set.begin()],&prims[set.end()]);
  256. }
  257. void splitFallback(const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)
  258. {
  259. const size_t begin = set.begin();
  260. const size_t end = set.end();
  261. const size_t center = (begin + end)/2;
  262. CentGeomBBox3fa left; left.reset();
  263. for (size_t i=begin; i<center; i++)
  264. left.extend(bounds(prims[i]));
  265. new (&lset) PrimInfoRange(begin,center,left.geomBounds,left.centBounds);
  266. CentGeomBBox3fa right; right.reset();
  267. for (size_t i=center; i<end; i++)
  268. right.extend(bounds(prims[i]));
  269. new (&rset) PrimInfoRange(center,end,right.geomBounds,right.centBounds);
  270. }
  271. private:
  272. Scene* const scene;
  273. PrimRef* const prims;
  274. };
  275. }
  276. }