bezier_curve.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "../common/default.h"
  18. namespace embree
  19. {
  20. class BezierBasis
  21. {
  22. public:
  23. template<typename T>
  24. static __forceinline Vec4<T> eval(const T& u)
  25. {
  26. const T t1 = u;
  27. const T t0 = 1.0f-t1;
  28. const T B0 = t0 * t0 * t0;
  29. const T B1 = 3.0f * t1 * (t0 * t0);
  30. const T B2 = 3.0f * (t1 * t1) * t0;
  31. const T B3 = t1 * t1 * t1;
  32. return Vec4<T>(B0,B1,B2,B3);
  33. }
  34. template<typename T>
  35. static __forceinline Vec4<T> derivative(const T& u)
  36. {
  37. const T t1 = u;
  38. const T t0 = 1.0f-t1;
  39. const T B0 = -(t0*t0);
  40. const T B1 = madd(-2.0f,t0*t1,t0*t0);
  41. const T B2 = msub(+2.0f,t0*t1,t1*t1);
  42. const T B3 = +(t1*t1);
  43. return T(3.0f)*Vec4<T>(B0,B1,B2,B3);
  44. }
  45. template<typename T>
  46. static __forceinline Vec4<T> derivative2(const T& u)
  47. {
  48. const T t1 = u;
  49. const T t0 = 1.0f-t1;
  50. const T B0 = t0;
  51. const T B1 = madd(-2.0f,t0,t1);
  52. const T B2 = madd(-2.0f,t1,t0);
  53. const T B3 = t1;
  54. return T(6.0f)*Vec4<T>(B0,B1,B2,B3);
  55. }
  56. };
  57. struct PrecomputedBezierBasis
  58. {
  59. enum { N = 16 };
  60. public:
  61. PrecomputedBezierBasis() {}
  62. PrecomputedBezierBasis(int shift);
  63. template<typename T>
  64. __forceinline Vec4<T> eval(const int u, const int size)
  65. {
  66. assert(size <= N);
  67. assert(u <= size);
  68. return Vec4<T>(T::loadu(&c0[size][u]),
  69. T::loadu(&c1[size][u]),
  70. T::loadu(&c2[size][u]),
  71. T::loadu(&c3[size][u]));
  72. }
  73. template<typename T>
  74. __forceinline Vec4<T> derivative(const int u, const int size)
  75. {
  76. assert(size <= N);
  77. assert(u <= size);
  78. return Vec4<T>(T::loadu(&d0[size][u]),
  79. T::loadu(&d1[size][u]),
  80. T::loadu(&d2[size][u]),
  81. T::loadu(&d3[size][u]));
  82. }
  83. /* basis for bezier evaluation */
  84. public:
  85. float c0[N+1][N+1];
  86. float c1[N+1][N+1];
  87. float c2[N+1][N+1];
  88. float c3[N+1][N+1];
  89. /* basis for bezier derivative evaluation */
  90. public:
  91. float d0[N+1][N+1];
  92. float d1[N+1][N+1];
  93. float d2[N+1][N+1];
  94. float d3[N+1][N+1];
  95. };
  96. extern PrecomputedBezierBasis bezier_basis0;
  97. extern PrecomputedBezierBasis bezier_basis1;
  98. template<typename Vertex>
  99. struct BezierCurveT
  100. {
  101. Vertex v0,v1,v2,v3;
  102. __forceinline BezierCurveT() {}
  103. __forceinline BezierCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
  104. : v0(v0), v1(v1), v2(v2), v3(v3) {}
  105. __forceinline Vertex begin() const {
  106. return v0;
  107. }
  108. __forceinline Vertex end() const {
  109. return v3;
  110. }
  111. __forceinline Vertex eval(const float t) const
  112. {
  113. const Vec4<float> b = BezierBasis::eval(t);
  114. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  115. }
  116. __forceinline Vertex eval_du(const float t) const
  117. {
  118. const Vec4<float> b = BezierBasis::derivative(t);
  119. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  120. }
  121. __forceinline Vertex eval_dudu(const float t) const
  122. {
  123. const Vec4<float> b = BezierBasis::derivative2(t);
  124. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  125. }
  126. __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
  127. {
  128. const Vertex p00 = v0;
  129. const Vertex p01 = v1;
  130. const Vertex p02 = v2;
  131. const Vertex p03 = v3;
  132. const Vertex p10 = lerp(p00,p01,t);
  133. const Vertex p11 = lerp(p01,p02,t);
  134. const Vertex p12 = lerp(p02,p03,t);
  135. const Vertex p20 = lerp(p10,p11,t);
  136. const Vertex p21 = lerp(p11,p12,t);
  137. const Vertex p30 = lerp(p20,p21,t);
  138. p = p30;
  139. dp = 3.0f*(p21-p20);
  140. ddp = eval_dudu(t);
  141. }
  142. friend inline std::ostream& operator<<(std::ostream& cout, const BezierCurveT& curve) {
  143. return cout << "BezierCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
  144. }
  145. };
  146. struct BezierCurve3fa : public BezierCurveT<Vec3fa>
  147. {
  148. //using BezierCurveT<Vec3fa>::BezierCurveT; // FIXME: not supported by VS2010
  149. __forceinline BezierCurve3fa() {}
  150. __forceinline BezierCurve3fa(const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3)
  151. : BezierCurveT<Vec3fa>(v0,v1,v2,v3) {}
  152. __forceinline void evalN(const vfloatx& t, Vec4vfx& p, Vec4vfx& dp) const
  153. {
  154. const Vec4vfx p00 = v0;
  155. const Vec4vfx p01 = v1;
  156. const Vec4vfx p02 = v2;
  157. const Vec4vfx p03 = v3;
  158. const Vec4vfx p10 = lerp(p00,p01,t);
  159. const Vec4vfx p11 = lerp(p01,p02,t);
  160. const Vec4vfx p12 = lerp(p02,p03,t);
  161. const Vec4vfx p20 = lerp(p10,p11,t);
  162. const Vec4vfx p21 = lerp(p11,p12,t);
  163. const Vec4vfx p30 = lerp(p20,p21,t);
  164. p = p30;
  165. dp = vfloatx(3.0f)*(p21-p20);
  166. }
  167. #if 0
  168. template<int M>
  169. __forceinline Vec4<vfloat<M>> eval0(const int ofs, const int size) const
  170. {
  171. const Vec4<vfloat<M>> b = bezier_basis0.eval<vfloat<M>>(ofs,size);
  172. return madd(b.x, Vec4<vfloat<M>>(v0), madd(b.y, Vec4<vfloat<M>>(v1), madd(b.z, Vec4<vfloat<M>>(v2), b.w * Vec4<vfloat<M>>(v3))));
  173. }
  174. template<int M>
  175. __forceinline Vec4<vfloat<M>> eval1(const int ofs, const int size) const
  176. {
  177. const Vec4<vfloat<M>> b = bezier_basis1.eval<vfloat<M>>(ofs,size);
  178. return madd(b.x, Vec4<vfloat<M>>(v0), madd(b.y, Vec4<vfloat<M>>(v1), madd(b.z, Vec4<vfloat<M>>(v2), b.w * Vec4<vfloat<M>>(v3))));
  179. }
  180. template<int M>
  181. __forceinline Vec4<vfloat<M>> derivative0(const int ofs, const int size) const
  182. {
  183. const Vec4<vfloat<M>> b = bezier_basis0.derivative<vfloat<M>>(ofs,size);
  184. return madd(b.x, Vec4<vfloat<M>>(v0), madd(b.y, Vec4<vfloat<M>>(v1), madd(b.z, Vec4<vfloat<M>>(v2), b.w * Vec4<vfloat<M>>(v3))));
  185. }
  186. template<int M>
  187. __forceinline Vec4<vfloat<M>> derivative1(const int ofs, const int size) const
  188. {
  189. const Vec4<vfloat<M>> b = bezier_basis1.derivative<vfloat<M>>(ofs,size);
  190. return madd(b.x, Vec4<vfloat<M>>(v0), madd(b.y, Vec4<vfloat<M>>(v1), madd(b.z, Vec4<vfloat<M>>(v2), b.w * Vec4<vfloat<M>>(v3))));
  191. }
  192. #else
  193. template<int M>
  194. __forceinline Vec4<vfloat<M>> eval0(const int ofs, const int size) const
  195. {
  196. assert(size <= PrecomputedBezierBasis::N);
  197. assert(ofs <= size);
  198. return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec4<vfloat<M>>(v0),
  199. madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec4<vfloat<M>>(v1),
  200. madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec4<vfloat<M>>(v2),
  201. vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec4<vfloat<M>>(v3))));
  202. }
  203. template<int M>
  204. __forceinline Vec4<vfloat<M>> eval1(const int ofs, const int size) const
  205. {
  206. assert(size <= PrecomputedBezierBasis::N);
  207. assert(ofs <= size);
  208. return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec4<vfloat<M>>(v0),
  209. madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec4<vfloat<M>>(v1),
  210. madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec4<vfloat<M>>(v2),
  211. vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec4<vfloat<M>>(v3))));
  212. }
  213. template<int M>
  214. __forceinline Vec4<vfloat<M>> derivative0(const int ofs, const int size) const
  215. {
  216. assert(size <= PrecomputedBezierBasis::N);
  217. assert(ofs <= size);
  218. return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec4<vfloat<M>>(v0),
  219. madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec4<vfloat<M>>(v1),
  220. madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec4<vfloat<M>>(v2),
  221. vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec4<vfloat<M>>(v3))));
  222. }
  223. template<int M>
  224. __forceinline Vec4<vfloat<M>> derivative1(const int ofs, const int size) const
  225. {
  226. assert(size <= PrecomputedBezierBasis::N);
  227. assert(ofs <= size);
  228. return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec4<vfloat<M>>(v0),
  229. madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec4<vfloat<M>>(v1),
  230. madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec4<vfloat<M>>(v2),
  231. vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec4<vfloat<M>>(v3))));
  232. }
  233. #endif
  234. /* calculates bounds of bezier curve geometry */
  235. __forceinline BBox3fa accurateBounds() const
  236. {
  237. const int N = 7;
  238. const float scale = 1.0f/(3.0f*(N-1));
  239. Vec4vfx pl(pos_inf), pu(neg_inf);
  240. for (int i=0; i<=N; i+=VSIZEX)
  241. {
  242. vintx vi = vintx(i)+vintx(step);
  243. vboolx valid = vi <= vintx(N);
  244. const Vec4vfx p = eval0<VSIZEX>(i,N);
  245. const Vec4vfx dp = derivative0<VSIZEX>(i,N);
  246. const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
  247. const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
  248. pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
  249. pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
  250. }
  251. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  252. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  253. const Vec3fa upper_r = Vec3fa(reduce_max(max(abs(pl.w),abs(pu.w))));
  254. return enlarge(BBox3fa(lower,upper),upper_r);
  255. }
  256. /* calculates bounds when tessellated into N line segments */
  257. __forceinline BBox3fa tessellatedBounds(int N) const
  258. {
  259. if (likely(N == 4))
  260. {
  261. const Vec4vf4 pi = eval0<4>(0,4);
  262. const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
  263. const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
  264. const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
  265. return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
  266. }
  267. else
  268. {
  269. Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
  270. for (int i=0; i<N; i+=VSIZEX)
  271. {
  272. vboolx valid = vintx(i)+vintx(step) < vintx(N);
  273. const Vec4vfx pi = eval0<VSIZEX>(i,N);
  274. pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
  275. pl.y = select(valid,min(pl.y,pi.y),pl.y);
  276. pl.z = select(valid,min(pl.z,pi.z),pl.z);
  277. pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
  278. pu.y = select(valid,max(pu.y,pi.y),pu.y);
  279. pu.z = select(valid,max(pu.z,pi.z),pu.z);
  280. ru = select(valid,max(ru,abs(pi.w)),ru);
  281. }
  282. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  283. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  284. const Vec3fa upper_r(reduce_max(ru));
  285. return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
  286. }
  287. }
  288. };
  289. #if !defined(EMBREE_NATIVE_CURVE_BSPLINE)
  290. #define CurveT BezierCurveT
  291. typedef BezierCurve3fa Curve3fa;
  292. #endif
  293. }