bezier_curve.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../common/default.h"
  5. //#include "../common/scene_curves.h"
  6. #include "../common/context.h"
  7. namespace embree
  8. {
  9. class BezierBasis
  10. {
  11. public:
  12. template<typename T>
  13. static __forceinline Vec4<T> eval(const T& u)
  14. {
  15. const T t1 = u;
  16. const T t0 = 1.0f-t1;
  17. const T B0 = t0 * t0 * t0;
  18. const T B1 = 3.0f * t1 * (t0 * t0);
  19. const T B2 = 3.0f * (t1 * t1) * t0;
  20. const T B3 = t1 * t1 * t1;
  21. return Vec4<T>(B0,B1,B2,B3);
  22. }
  23. template<typename T>
  24. static __forceinline Vec4<T> derivative(const T& u)
  25. {
  26. const T t1 = u;
  27. const T t0 = 1.0f-t1;
  28. const T B0 = -(t0*t0);
  29. const T B1 = madd(-2.0f,t0*t1,t0*t0);
  30. const T B2 = msub(+2.0f,t0*t1,t1*t1);
  31. const T B3 = +(t1*t1);
  32. return T(3.0f)*Vec4<T>(B0,B1,B2,B3);
  33. }
  34. template<typename T>
  35. static __forceinline Vec4<T> derivative2(const T& u)
  36. {
  37. const T t1 = u;
  38. const T t0 = 1.0f-t1;
  39. const T B0 = t0;
  40. const T B1 = madd(-2.0f,t0,t1);
  41. const T B2 = madd(-2.0f,t1,t0);
  42. const T B3 = t1;
  43. return T(6.0f)*Vec4<T>(B0,B1,B2,B3);
  44. }
  45. };
  46. struct PrecomputedBezierBasis
  47. {
  48. enum { N = 16 };
  49. public:
  50. PrecomputedBezierBasis() {}
  51. PrecomputedBezierBasis(int shift);
  52. /* basis for bezier evaluation */
  53. public:
  54. float c0[N+1][N+1];
  55. float c1[N+1][N+1];
  56. float c2[N+1][N+1];
  57. float c3[N+1][N+1];
  58. /* basis for bezier derivative evaluation */
  59. public:
  60. float d0[N+1][N+1];
  61. float d1[N+1][N+1];
  62. float d2[N+1][N+1];
  63. float d3[N+1][N+1];
  64. };
  65. extern PrecomputedBezierBasis bezier_basis0;
  66. extern PrecomputedBezierBasis bezier_basis1;
  67. template<typename V>
  68. struct LinearBezierCurve
  69. {
  70. V v0,v1;
  71. __forceinline LinearBezierCurve () {}
  72. __forceinline LinearBezierCurve (const LinearBezierCurve& other)
  73. : v0(other.v0), v1(other.v1) {}
  74. __forceinline LinearBezierCurve& operator= (const LinearBezierCurve& other) {
  75. v0 = other.v0; v1 = other.v1; return *this;
  76. }
  77. __forceinline LinearBezierCurve (const V& v0, const V& v1)
  78. : v0(v0), v1(v1) {}
  79. __forceinline V begin() const { return v0; }
  80. __forceinline V end () const { return v1; }
  81. bool hasRoot() const;
  82. friend embree_ostream operator<<(embree_ostream cout, const LinearBezierCurve& a) {
  83. return cout << "LinearBezierCurve (" << a.v0 << ", " << a.v1 << ")";
  84. }
  85. };
  86. template<> __forceinline bool LinearBezierCurve<Interval1f>::hasRoot() const {
  87. return numRoots(v0,v1);
  88. }
  89. template<typename V>
  90. struct QuadraticBezierCurve
  91. {
  92. V v0,v1,v2;
  93. __forceinline QuadraticBezierCurve () {}
  94. __forceinline QuadraticBezierCurve (const QuadraticBezierCurve& other)
  95. : v0(other.v0), v1(other.v1), v2(other.v2) {}
  96. __forceinline QuadraticBezierCurve& operator= (const QuadraticBezierCurve& other) {
  97. v0 = other.v0; v1 = other.v1; v2 = other.v2; return *this;
  98. }
  99. __forceinline QuadraticBezierCurve (const V& v0, const V& v1, const V& v2)
  100. : v0(v0), v1(v1), v2(v2) {}
  101. __forceinline V begin() const { return v0; }
  102. __forceinline V end () const { return v2; }
  103. __forceinline V interval() const {
  104. return merge(v0,v1,v2);
  105. }
  106. __forceinline BBox<V> bounds() const {
  107. return merge(BBox<V>(v0),BBox<V>(v1),BBox<V>(v2));
  108. }
  109. friend embree_ostream operator<<(embree_ostream cout, const QuadraticBezierCurve& a) {
  110. return cout << "QuadraticBezierCurve (" << a.v0 << ", " << a.v1 << ", " << a.v2 << ")";
  111. }
  112. };
  113. typedef QuadraticBezierCurve<float> QuadraticBezierCurve1f;
  114. typedef QuadraticBezierCurve<Vec2fa> QuadraticBezierCurve2fa;
  115. typedef QuadraticBezierCurve<Vec3fa> QuadraticBezierCurve3fa;
  116. template<typename Vertex>
  117. struct CubicBezierCurve
  118. {
  119. Vertex v0,v1,v2,v3;
  120. __forceinline CubicBezierCurve() {}
  121. template<typename T1>
  122. __forceinline CubicBezierCurve (const CubicBezierCurve<T1>& other)
  123. : v0(other.v0), v1(other.v1), v2(other.v2), v3(other.v3) {}
  124. __forceinline CubicBezierCurve& operator= (const CubicBezierCurve& other) {
  125. v0 = other.v0; v1 = other.v1; v2 = other.v2; v3 = other.v3; return *this;
  126. }
  127. __forceinline CubicBezierCurve(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
  128. : v0(v0), v1(v1), v2(v2), v3(v3) {}
  129. __forceinline Vertex begin() const {
  130. return v0;
  131. }
  132. __forceinline Vertex end() const {
  133. return v3;
  134. }
  135. __forceinline Vertex center() const {
  136. return 0.25f*(v0+v1+v2+v3);
  137. }
  138. __forceinline Vertex begin_direction() const {
  139. return v1-v0;
  140. }
  141. __forceinline Vertex end_direction() const {
  142. return v3-v2;
  143. }
  144. __forceinline CubicBezierCurve<float> xfm(const Vertex& dx) const {
  145. return CubicBezierCurve<float>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
  146. }
  147. template<int W>
  148. __forceinline CubicBezierCurve<vfloat<W>> vxfm(const Vertex& dx) const {
  149. return CubicBezierCurve<vfloat<W>>(dot(v0,dx),dot(v1,dx),dot(v2,dx),dot(v3,dx));
  150. }
  151. __forceinline CubicBezierCurve<float> xfm(const Vertex& dx, const Vertex& p) const {
  152. return CubicBezierCurve<float>(dot(v0-p,dx),dot(v1-p,dx),dot(v2-p,dx),dot(v3-p,dx));
  153. }
  154. __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space) const
  155. {
  156. const Vec3fa q0 = xfmVector(space,v0);
  157. const Vec3fa q1 = xfmVector(space,v1);
  158. const Vec3fa q2 = xfmVector(space,v2);
  159. const Vec3fa q3 = xfmVector(space,v3);
  160. return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
  161. }
  162. __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p) const
  163. {
  164. const Vec3fa q0 = xfmVector(space,v0-p);
  165. const Vec3fa q1 = xfmVector(space,v1-p);
  166. const Vec3fa q2 = xfmVector(space,v2-p);
  167. const Vec3fa q3 = xfmVector(space,v3-p);
  168. return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
  169. }
  170. __forceinline CubicBezierCurve<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
  171. {
  172. const Vec3ff q0(xfmVector(space,(Vec3fa)v0-p), v0.w);
  173. const Vec3ff q1(xfmVector(space,(Vec3fa)v1-p), v1.w);
  174. const Vec3ff q2(xfmVector(space,(Vec3fa)v2-p), v2.w);
  175. const Vec3ff q3(xfmVector(space,(Vec3fa)v3-p), v3.w);
  176. return CubicBezierCurve<Vec3ff>(q0,q1,q2,q3);
  177. }
  178. __forceinline CubicBezierCurve<Vec3fa> xfm(const LinearSpace3fa& space, const Vec3fa& p, const float s) const
  179. {
  180. const Vec3fa q0 = xfmVector(space,s*(v0-p));
  181. const Vec3fa q1 = xfmVector(space,s*(v1-p));
  182. const Vec3fa q2 = xfmVector(space,s*(v2-p));
  183. const Vec3fa q3 = xfmVector(space,s*(v3-p));
  184. return CubicBezierCurve<Vec3fa>(q0,q1,q2,q3);
  185. }
  186. __forceinline int maxRoots() const;
  187. __forceinline BBox<Vertex> bounds() const {
  188. return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
  189. }
  190. __forceinline friend CubicBezierCurve operator +( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
  191. return CubicBezierCurve(a.v0+b.v0,a.v1+b.v1,a.v2+b.v2,a.v3+b.v3);
  192. }
  193. __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
  194. return CubicBezierCurve(a.v0-b.v0,a.v1-b.v1,a.v2-b.v2,a.v3-b.v3);
  195. }
  196. __forceinline friend CubicBezierCurve operator -( const CubicBezierCurve& a, const Vertex& b ) {
  197. return CubicBezierCurve(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
  198. }
  199. __forceinline friend CubicBezierCurve operator *( const Vertex& a, const CubicBezierCurve& b ) {
  200. return CubicBezierCurve(a*b.v0,a*b.v1,a*b.v2,a*b.v3);
  201. }
  202. __forceinline friend CubicBezierCurve cmadd( const Vertex& a, const CubicBezierCurve& b, const CubicBezierCurve& c) {
  203. return CubicBezierCurve(madd(a,b.v0,c.v0),madd(a,b.v1,c.v1),madd(a,b.v2,c.v2),madd(a,b.v3,c.v3));
  204. }
  205. __forceinline friend CubicBezierCurve clerp ( const CubicBezierCurve& a, const CubicBezierCurve& b, const Vertex& t ) {
  206. return cmadd((Vertex(1.0f)-t),a,t*b);
  207. }
  208. __forceinline friend CubicBezierCurve merge ( const CubicBezierCurve& a, const CubicBezierCurve& b ) {
  209. return CubicBezierCurve(merge(a.v0,b.v0),merge(a.v1,b.v1),merge(a.v2,b.v2),merge(a.v3,b.v3));
  210. }
  211. __forceinline void split(CubicBezierCurve& left, CubicBezierCurve& right, const float t = 0.5f) const
  212. {
  213. const Vertex p00 = v0;
  214. const Vertex p01 = v1;
  215. const Vertex p02 = v2;
  216. const Vertex p03 = v3;
  217. const Vertex p10 = lerp(p00,p01,t);
  218. const Vertex p11 = lerp(p01,p02,t);
  219. const Vertex p12 = lerp(p02,p03,t);
  220. const Vertex p20 = lerp(p10,p11,t);
  221. const Vertex p21 = lerp(p11,p12,t);
  222. const Vertex p30 = lerp(p20,p21,t);
  223. new (&left ) CubicBezierCurve(p00,p10,p20,p30);
  224. new (&right) CubicBezierCurve(p30,p21,p12,p03);
  225. }
  226. __forceinline CubicBezierCurve<Vec2vfx> split() const
  227. {
  228. const float u0 = 0.0f, u1 = 1.0f;
  229. const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
  230. const vfloatx vu0 = lerp(u0,u1,vfloatx(StepTy())*(1.0f/(VSIZEX-1)));
  231. Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
  232. const Vec2vfx P3 = shift_right_1(P0);
  233. const Vec2vfx dP3du = shift_right_1(dP0du);
  234. const Vec2vfx P1 = P0 + dP0du;
  235. const Vec2vfx P2 = P3 - dP3du;
  236. return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
  237. }
  238. __forceinline CubicBezierCurve<Vec2vfx> split(const BBox1f& u) const
  239. {
  240. const float u0 = u.lower, u1 = u.upper;
  241. const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
  242. const vfloatx vu0 = lerp(u0,u1,vfloatx(StepTy())*(1.0f/(VSIZEX-1)));
  243. Vec2vfx P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vfx(dscale);
  244. const Vec2vfx P3 = shift_right_1(P0);
  245. const Vec2vfx dP3du = shift_right_1(dP0du);
  246. const Vec2vfx P1 = P0 + dP0du;
  247. const Vec2vfx P2 = P3 - dP3du;
  248. return CubicBezierCurve<Vec2vfx>(P0,P1,P2,P3);
  249. }
  250. template<int W>
  251. __forceinline CubicBezierCurve<Vec2vf<W>> split(const BBox1f& u, int i, int N) const
  252. {
  253. const float u0 = u.lower, u1 = u.upper;
  254. const float dscale = (u1-u0)*(1.0f/(3.0f*N));
  255. const vfloat<W> vu0 = lerp(u0,u1,(vfloat<W>(i)+vfloat<W>(StepTy()))*(1.0f/N));
  256. Vec2vf<W> P0, dP0du; evalN(vu0,P0,dP0du); dP0du = dP0du * Vec2vf<W>(dscale);
  257. const Vec2vf<W> P3 = shift_right_1(P0);
  258. const Vec2vf<W> dP3du = shift_right_1(dP0du);
  259. const Vec2vf<W> P1 = P0 + dP0du;
  260. const Vec2vf<W> P2 = P3 - dP3du;
  261. return CubicBezierCurve<Vec2vf<W>>(P0,P1,P2,P3);
  262. }
  263. __forceinline CubicBezierCurve<Vec2f> split1(const BBox1f& u, int i, int N) const
  264. {
  265. const float u0 = u.lower, u1 = u.upper;
  266. const float dscale = (u1-u0)*(1.0f/(3.0f*N));
  267. const float vu0 = lerp(u0,u1,(float(i)+0)*(1.0f/N));
  268. const float vu1 = lerp(u0,u1,(float(i)+1)*(1.0f/N));
  269. Vec2fa P0, dP0du; eval(vu0,P0,dP0du); dP0du = dP0du * Vec2fa(dscale);
  270. Vec2fa P3, dP3du; eval(vu1,P3,dP3du); dP3du = dP3du * Vec2fa(dscale);
  271. const Vec2fa P1 = P0 + dP0du;
  272. const Vec2fa P2 = P3 - dP3du;
  273. return CubicBezierCurve<Vec2f>(P0,P1,P2,P3);
  274. }
  275. __forceinline void eval(float t, Vertex& p, Vertex& dp) const
  276. {
  277. const Vertex p00 = v0;
  278. const Vertex p01 = v1;
  279. const Vertex p02 = v2;
  280. const Vertex p03 = v3;
  281. const Vertex p10 = lerp(p00,p01,t);
  282. const Vertex p11 = lerp(p01,p02,t);
  283. const Vertex p12 = lerp(p02,p03,t);
  284. const Vertex p20 = lerp(p10,p11,t);
  285. const Vertex p21 = lerp(p11,p12,t);
  286. const Vertex p30 = lerp(p20,p21,t);
  287. p = p30;
  288. dp = Vertex(3.0f)*(p21-p20);
  289. }
  290. #if 0
  291. __forceinline Vertex eval(float t) const
  292. {
  293. const Vertex p00 = v0;
  294. const Vertex p01 = v1;
  295. const Vertex p02 = v2;
  296. const Vertex p03 = v3;
  297. const Vertex p10 = lerp(p00,p01,t);
  298. const Vertex p11 = lerp(p01,p02,t);
  299. const Vertex p12 = lerp(p02,p03,t);
  300. const Vertex p20 = lerp(p10,p11,t);
  301. const Vertex p21 = lerp(p11,p12,t);
  302. const Vertex p30 = lerp(p20,p21,t);
  303. return p30;
  304. }
  305. #else
  306. __forceinline Vertex eval(const float t) const
  307. {
  308. const Vec4<float> b = BezierBasis::eval(t);
  309. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  310. }
  311. #endif
  312. __forceinline Vertex eval_dt(float t) const
  313. {
  314. const Vertex p00 = v1-v0;
  315. const Vertex p01 = v2-v1;
  316. const Vertex p02 = v3-v2;
  317. const Vertex p10 = lerp(p00,p01,t);
  318. const Vertex p11 = lerp(p01,p02,t);
  319. const Vertex p20 = lerp(p10,p11,t);
  320. return Vertex(3.0f)*p20;
  321. }
  322. __forceinline Vertex eval_du(const float t) const
  323. {
  324. const Vec4<float> b = BezierBasis::derivative(t);
  325. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  326. }
  327. __forceinline Vertex eval_dudu(const float t) const
  328. {
  329. const Vec4<float> b = BezierBasis::derivative2(t);
  330. return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
  331. }
  332. __forceinline void evalN(const vfloatx& t, Vec2vfx& p, Vec2vfx& dp) const
  333. {
  334. const Vec2vfx p00 = v0;
  335. const Vec2vfx p01 = v1;
  336. const Vec2vfx p02 = v2;
  337. const Vec2vfx p03 = v3;
  338. const Vec2vfx p10 = lerp(p00,p01,t);
  339. const Vec2vfx p11 = lerp(p01,p02,t);
  340. const Vec2vfx p12 = lerp(p02,p03,t);
  341. const Vec2vfx p20 = lerp(p10,p11,t);
  342. const Vec2vfx p21 = lerp(p11,p12,t);
  343. const Vec2vfx p30 = lerp(p20,p21,t);
  344. p = p30;
  345. dp = vfloatx(3.0f)*(p21-p20);
  346. }
  347. __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
  348. {
  349. const Vertex p00 = v0;
  350. const Vertex p01 = v1;
  351. const Vertex p02 = v2;
  352. const Vertex p03 = v3;
  353. const Vertex p10 = lerp(p00,p01,t);
  354. const Vertex p11 = lerp(p01,p02,t);
  355. const Vertex p12 = lerp(p02,p03,t);
  356. const Vertex p20 = lerp(p10,p11,t);
  357. const Vertex p21 = lerp(p11,p12,t);
  358. const Vertex p30 = lerp(p20,p21,t);
  359. p = p30;
  360. dp = 3.0f*(p21-p20);
  361. ddp = eval_dudu(t);
  362. }
  363. __forceinline CubicBezierCurve clip(const Interval1f& u1) const
  364. {
  365. Vertex f0,df0; eval(u1.lower,f0,df0);
  366. Vertex f1,df1; eval(u1.upper,f1,df1);
  367. float s = u1.upper-u1.lower;
  368. return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
  369. }
  370. __forceinline QuadraticBezierCurve<Vertex> derivative() const
  371. {
  372. const Vertex q0 = 3.0f*(v1-v0);
  373. const Vertex q1 = 3.0f*(v2-v1);
  374. const Vertex q2 = 3.0f*(v3-v2);
  375. return QuadraticBezierCurve<Vertex>(q0,q1,q2);
  376. }
  377. __forceinline BBox<Vertex> derivative_bounds(const Interval1f& u1) const
  378. {
  379. Vertex f0,df0; eval(u1.lower,f0,df0);
  380. Vertex f3,df3; eval(u1.upper,f3,df3);
  381. const float s = u1.upper-u1.lower;
  382. const Vertex f1 = f0+s*(1.0f/3.0f)*df0;
  383. const Vertex f2 = f3-s*(1.0f/3.0f)*df3;
  384. const Vertex q0 = s*df0;
  385. const Vertex q1 = 3.0f*(f2-f1);
  386. const Vertex q2 = s*df3;
  387. return merge(BBox<Vertex>(q0),BBox<Vertex>(q1),BBox<Vertex>(q2));
  388. }
  389. template<int M>
  390. __forceinline Vec4vf<M> veval(const vfloat<M>& t) const
  391. {
  392. const Vec4vf<M> b = BezierBasis::eval(t);
  393. return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
  394. }
  395. template<int M>
  396. __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
  397. {
  398. const Vec4vf<M> b = BezierBasis::derivative(t);
  399. return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
  400. }
  401. template<int M>
  402. __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
  403. {
  404. const Vec4vf<M> b = BezierBasis::derivative2(t);
  405. return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
  406. }
  407. template<int M, typename Vec>
  408. __forceinline void veval(const vfloat<M>& t, Vec& p, Vec& dp) const
  409. {
  410. const Vec p00 = v0;
  411. const Vec p01 = v1;
  412. const Vec p02 = v2;
  413. const Vec p03 = v3;
  414. const Vec p10 = lerp(p00,p01,t);
  415. const Vec p11 = lerp(p01,p02,t);
  416. const Vec p12 = lerp(p02,p03,t);
  417. const Vec p20 = lerp(p10,p11,t);
  418. const Vec p21 = lerp(p11,p12,t);
  419. const Vec p30 = lerp(p20,p21,t);
  420. p = p30;
  421. dp = vfloat<M>(3.0f)*(p21-p20);
  422. }
  423. template<int M, typename Vec = Vec4vf<M>>
  424. __forceinline Vec eval0(const int ofs, const int size) const
  425. {
  426. assert(size <= PrecomputedBezierBasis::N);
  427. assert(ofs <= size);
  428. #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
  429. assert(size > 0);
  430. const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+0))*rcp(float(size));
  431. Vec p,dp; veval<M>(t,p,dp);
  432. return p;
  433. #else
  434. return madd(vfloat<M>::loadu(&bezier_basis0.c0[size][ofs]), Vec(v0),
  435. madd(vfloat<M>::loadu(&bezier_basis0.c1[size][ofs]), Vec(v1),
  436. madd(vfloat<M>::loadu(&bezier_basis0.c2[size][ofs]), Vec(v2),
  437. vfloat<M>::loadu(&bezier_basis0.c3[size][ofs]) * Vec(v3))));
  438. #endif
  439. }
  440. template<int M, typename Vec = Vec4vf<M>>
  441. __forceinline Vec eval1(const int ofs, const int size) const
  442. {
  443. assert(size <= PrecomputedBezierBasis::N);
  444. assert(ofs <= size);
  445. #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
  446. assert(size > 0);
  447. const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+1))*rcp(float(size));
  448. Vec p,dp; veval<M>(t,p,dp);
  449. return p;
  450. #else
  451. return madd(vfloat<M>::loadu(&bezier_basis1.c0[size][ofs]), Vec(v0),
  452. madd(vfloat<M>::loadu(&bezier_basis1.c1[size][ofs]), Vec(v1),
  453. madd(vfloat<M>::loadu(&bezier_basis1.c2[size][ofs]), Vec(v2),
  454. vfloat<M>::loadu(&bezier_basis1.c3[size][ofs]) * Vec(v3))));
  455. #endif
  456. }
  457. template<int M, typename Vec = Vec4vf<M>>
  458. __forceinline Vec derivative0(const int ofs, const int size) const
  459. {
  460. assert(size <= PrecomputedBezierBasis::N);
  461. assert(ofs <= size);
  462. #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
  463. assert(size > 0);
  464. const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+0))*rcp(float(size));
  465. Vec p,dp; veval<M>(t,p,dp);
  466. return dp;
  467. #else
  468. return madd(vfloat<M>::loadu(&bezier_basis0.d0[size][ofs]), Vec(v0),
  469. madd(vfloat<M>::loadu(&bezier_basis0.d1[size][ofs]), Vec(v1),
  470. madd(vfloat<M>::loadu(&bezier_basis0.d2[size][ofs]), Vec(v2),
  471. vfloat<M>::loadu(&bezier_basis0.d3[size][ofs]) * Vec(v3))));
  472. #endif
  473. }
  474. template<int M, typename Vec = Vec4vf<M>>
  475. __forceinline Vec derivative1(const int ofs, const int size) const
  476. {
  477. assert(size <= PrecomputedBezierBasis::N);
  478. assert(ofs <= size);
  479. #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
  480. assert(size > 0);
  481. const vfloat<M> t = (vfloat<M>(step) + vfloat<M>(ofs+1))*rcp(float(size));
  482. Vec p,dp; veval<M>(t,p,dp);
  483. return dp;
  484. #else
  485. return madd(vfloat<M>::loadu(&bezier_basis1.d0[size][ofs]), Vec(v0),
  486. madd(vfloat<M>::loadu(&bezier_basis1.d1[size][ofs]), Vec(v1),
  487. madd(vfloat<M>::loadu(&bezier_basis1.d2[size][ofs]), Vec(v2),
  488. vfloat<M>::loadu(&bezier_basis1.d3[size][ofs]) * Vec(v3))));
  489. #endif
  490. }
  491. /* calculates bounds of bezier curve geometry */
  492. __forceinline BBox3fa accurateBounds() const
  493. {
  494. const int N = 7;
  495. const float scale = 1.0f/(3.0f*(N-1));
  496. Vec3vfx pl(pos_inf), pu(neg_inf);
  497. for (int i=0; i<=N; i+=VSIZEX)
  498. {
  499. vintx vi = vintx(i)+vintx(StepTy());
  500. vboolx valid = vi <= vintx(N);
  501. const Vec3vfx p = eval0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
  502. const Vec3vfx dp = derivative0<VSIZEX,Vec3vf<VSIZEX>>(i,N);
  503. const Vec3vfx pm = p-Vec3vfx(scale)*select(vi!=vintx(0),dp,Vec3vfx(zero));
  504. const Vec3vfx pp = p+Vec3vfx(scale)*select(vi!=vintx(N),dp,Vec3vfx(zero));
  505. pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
  506. pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
  507. }
  508. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  509. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  510. return BBox3fa(lower,upper);
  511. }
  512. /* calculates bounds of bezier curve geometry */
  513. __forceinline BBox3fa accurateRoundBounds() const
  514. {
  515. const int N = 7;
  516. const float scale = 1.0f/(3.0f*(N-1));
  517. Vec4vfx pl(pos_inf), pu(neg_inf);
  518. for (int i=0; i<=N; i+=VSIZEX)
  519. {
  520. vintx vi = vintx(i)+vintx(StepTy());
  521. vboolx valid = vi <= vintx(N);
  522. const Vec4vfx p = eval0<VSIZEX>(i,N);
  523. const Vec4vfx dp = derivative0<VSIZEX>(i,N);
  524. const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
  525. const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
  526. pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
  527. pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
  528. }
  529. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  530. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  531. const float r_min = reduce_min(pl.w);
  532. const float r_max = reduce_max(pu.w);
  533. const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
  534. return enlarge(BBox3fa(lower,upper),upper_r);
  535. }
  536. /* calculates bounds when tessellated into N line segments */
  537. __forceinline BBox3fa accurateFlatBounds(int N) const
  538. {
  539. if (likely(N == 4))
  540. {
  541. const Vec4vf4 pi = eval0<4>(0,4);
  542. const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
  543. const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
  544. const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
  545. return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
  546. }
  547. else
  548. {
  549. Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
  550. for (int i=0; i<N; i+=VSIZEX)
  551. {
  552. vboolx valid = vintx(i)+vintx(StepTy()) < vintx(N);
  553. const Vec4vfx pi = eval0<VSIZEX>(i,N);
  554. pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
  555. pl.y = select(valid,min(pl.y,pi.y),pl.y);
  556. pl.z = select(valid,min(pl.z,pi.z),pl.z);
  557. pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
  558. pu.y = select(valid,max(pu.y,pi.y),pu.y);
  559. pu.z = select(valid,max(pu.z,pi.z),pu.z);
  560. ru = select(valid,max(ru,abs(pi.w)),ru);
  561. }
  562. const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
  563. const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
  564. const Vec3fa upper_r(reduce_max(ru));
  565. return enlarge(BBox3fa(min(lower,v3),max(upper,v3)),max(upper_r,Vec3fa(abs(v3.w))));
  566. }
  567. }
  568. friend __forceinline embree_ostream operator<<(embree_ostream cout, const CubicBezierCurve& curve) {
  569. return cout << "CubicBezierCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
  570. }
  571. };
  572. #if defined(__AVX__)
  573. template<>
  574. __forceinline CubicBezierCurve<vfloat4> CubicBezierCurve<vfloat4>::clip(const Interval1f& u1) const
  575. {
  576. const vfloat8 p00 = vfloat8(v0);
  577. const vfloat8 p01 = vfloat8(v1);
  578. const vfloat8 p02 = vfloat8(v2);
  579. const vfloat8 p03 = vfloat8(v3);
  580. const vfloat8 t(vfloat4(u1.lower),vfloat4(u1.upper));
  581. const vfloat8 p10 = lerp(p00,p01,t);
  582. const vfloat8 p11 = lerp(p01,p02,t);
  583. const vfloat8 p12 = lerp(p02,p03,t);
  584. const vfloat8 p20 = lerp(p10,p11,t);
  585. const vfloat8 p21 = lerp(p11,p12,t);
  586. const vfloat8 p30 = lerp(p20,p21,t);
  587. const vfloat8 f01 = p30;
  588. const vfloat8 df01 = vfloat8(3.0f)*(p21-p20);
  589. const vfloat4 f0 = extract4<0>(f01), f1 = extract4<1>(f01);
  590. const vfloat4 df0 = extract4<0>(df01), df1 = extract4<1>(df01);
  591. const float s = u1.upper-u1.lower;
  592. return CubicBezierCurve(f0,f0+s*(1.0f/3.0f)*df0,f1-s*(1.0f/3.0f)*df1,f1);
  593. }
  594. #endif
  595. template<typename Vertex> using BezierCurveT = CubicBezierCurve<Vertex>;
  596. typedef CubicBezierCurve<float> CubicBezierCurve1f;
  597. typedef CubicBezierCurve<Vec2fa> CubicBezierCurve2fa;
  598. typedef CubicBezierCurve<Vec3fa> CubicBezierCurve3fa;
  599. typedef CubicBezierCurve<Vec3fa> BezierCurve3fa;
  600. typedef CubicBezierCurve<Vec3ff> BezierCurve3ff;
  601. template<> __forceinline int CubicBezierCurve<float>::maxRoots() const
  602. {
  603. float eps = 1E-4f;
  604. bool neg0 = v0 <= 0.0f; bool zero0 = fabs(v0) < eps;
  605. bool neg1 = v1 <= 0.0f; bool zero1 = fabs(v1) < eps;
  606. bool neg2 = v2 <= 0.0f; bool zero2 = fabs(v2) < eps;
  607. bool neg3 = v3 <= 0.0f; bool zero3 = fabs(v3) < eps;
  608. return (neg0 != neg1 || zero0) + (neg1 != neg2 || zero1) + (neg2 != neg3 || zero2 || zero3);
  609. }
  610. template<> __forceinline int CubicBezierCurve<Interval1f>::maxRoots() const {
  611. return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3);
  612. }
  613. struct CurveGeometry; // FIXME: this code should move !
  614. template<typename CurveGeometry>
  615. __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const RayQueryContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve)
  616. {
  617. return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
  618. enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
  619. enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
  620. enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
  621. }
  622. }