curve_intersector_oriented.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../common/ray.h"
  5. #include "curve_intersector_precalculations.h"
  6. #include "curve_intersector_sweep.h"
  7. #include "../subdiv/linear_bezier_patch.h"
  8. #define DBG(x)
  9. namespace embree
  10. {
  11. namespace isa
  12. {
  13. template<typename Ray, typename Epilog, int N = VSIZEX-1, int V = VSIZEX>
  14. struct TensorLinearCubicBezierSurfaceIntersector
  15. {
  16. const LinearSpace3fa& ray_space;
  17. Ray& ray;
  18. TensorLinearCubicBezierSurface3fa curve3d;
  19. TensorLinearCubicBezierSurface2fa curve2d;
  20. float eps;
  21. const Epilog& epilog;
  22. bool isHit;
  23. __forceinline TensorLinearCubicBezierSurfaceIntersector (const LinearSpace3fa& ray_space, Ray& ray, const TensorLinearCubicBezierSurface3fa& curve3d, const Epilog& epilog)
  24. : ray_space(ray_space), ray(ray), curve3d(curve3d), epilog(epilog), isHit(false)
  25. {
  26. const TensorLinearCubicBezierSurface3fa curve3dray = curve3d.xfm(ray_space,ray.org);
  27. curve2d = TensorLinearCubicBezierSurface2fa(CubicBezierCurve2fa(curve3dray.L),CubicBezierCurve2fa(curve3dray.R));
  28. const BBox2fa b2 = curve2d.bounds();
  29. eps = 8.0f*float(ulp)*reduce_max(max(abs(b2.lower),abs(b2.upper)));
  30. }
  31. __forceinline Interval1f solve_linear(const float u0, const float u1, const float& p0, const float& p1)
  32. {
  33. if (p1 == p0) {
  34. if (p0 == 0.0f) return Interval1f(u0,u1);
  35. else return Interval1f(empty);
  36. }
  37. const float t = -p0/(p1-p0);
  38. const float tt = lerp(u0,u1,t);
  39. return Interval1f(tt);
  40. }
  41. __forceinline void solve_linear(const float u0, const float u1, const Interval1f& p0, const Interval1f& p1, Interval1f& u)
  42. {
  43. if (sign(p0.lower) != sign(p0.upper)) u.extend(u0);
  44. if (sign(p0.lower) != sign(p1.lower)) u.extend(solve_linear(u0,u1,p0.lower,p1.lower));
  45. if (sign(p0.upper) != sign(p1.upper)) u.extend(solve_linear(u0,u1,p0.upper,p1.upper));
  46. if (sign(p1.lower) != sign(p1.upper)) u.extend(u1);
  47. }
  48. __forceinline Interval1f bezier_clipping(const CubicBezierCurve<Interval1f>& curve)
  49. {
  50. Interval1f u = empty;
  51. solve_linear(0.0f/3.0f,1.0f/3.0f,curve.v0,curve.v1,u);
  52. solve_linear(0.0f/3.0f,2.0f/3.0f,curve.v0,curve.v2,u);
  53. solve_linear(0.0f/3.0f,3.0f/3.0f,curve.v0,curve.v3,u);
  54. solve_linear(1.0f/3.0f,2.0f/3.0f,curve.v1,curve.v2,u);
  55. solve_linear(1.0f/3.0f,3.0f/3.0f,curve.v1,curve.v3,u);
  56. solve_linear(2.0f/3.0f,3.0f/3.0f,curve.v2,curve.v3,u);
  57. return intersect(u,Interval1f(0.0f,1.0f));
  58. }
  59. __forceinline Interval1f bezier_clipping(const LinearBezierCurve<Interval1f>& curve)
  60. {
  61. Interval1f v = empty;
  62. solve_linear(0.0f,1.0f,curve.v0,curve.v1,v);
  63. return intersect(v,Interval1f(0.0f,1.0f));
  64. }
  65. __forceinline void solve_bezier_clipping(BBox1f cu, BBox1f cv, const TensorLinearCubicBezierSurface2fa& curve2)
  66. {
  67. BBox2fa bounds = curve2.bounds();
  68. if (bounds.upper.x < 0.0f) return;
  69. if (bounds.upper.y < 0.0f) return;
  70. if (bounds.lower.x > 0.0f) return;
  71. if (bounds.lower.y > 0.0f) return;
  72. if (max(cu.size(),cv.size()) < 1E-4f)
  73. {
  74. const float u = cu.center();
  75. const float v = cv.center();
  76. TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
  77. const float t = curve_z.eval(u,v);
  78. if (ray.tnear() <= t && t <= ray.tfar) {
  79. const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
  80. BezierCurveHit hit(t,u,v,Ng);
  81. isHit |= epilog(hit);
  82. }
  83. return;
  84. }
  85. const Vec2fa dv = curve2.axis_v();
  86. const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
  87. LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
  88. if (!curve0v.hasRoot()) return;
  89. const Interval1f v = bezier_clipping(curve0v);
  90. if (isEmpty(v)) return;
  91. TensorLinearCubicBezierSurface2fa curve2a = curve2.clip_v(v);
  92. cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
  93. const Vec2fa du = curve2.axis_u();
  94. const TensorLinearCubicBezierSurface1f curve1u = curve2a.xfm(du);
  95. CubicBezierCurve<Interval1f> curve0u = curve1u.reduce_v();
  96. int roots = curve0u.maxRoots();
  97. if (roots == 0) return;
  98. if (roots == 1)
  99. {
  100. const Interval1f u = bezier_clipping(curve0u);
  101. if (isEmpty(u)) return;
  102. TensorLinearCubicBezierSurface2fa curve2b = curve2a.clip_u(u);
  103. cu = BBox1f(lerp(cu.lower,cu.upper,u.lower),lerp(cu.lower,cu.upper,u.upper));
  104. solve_bezier_clipping(cu,cv,curve2b);
  105. return;
  106. }
  107. TensorLinearCubicBezierSurface2fa curve2l, curve2r;
  108. curve2a.split_u(curve2l,curve2r);
  109. solve_bezier_clipping(BBox1f(cu.lower,cu.center()),cv,curve2l);
  110. solve_bezier_clipping(BBox1f(cu.center(),cu.upper),cv,curve2r);
  111. }
  112. __forceinline bool solve_bezier_clipping()
  113. {
  114. solve_bezier_clipping(BBox1f(0.0f,1.0f),BBox1f(0.0f,1.0f),curve2d);
  115. return isHit;
  116. }
  117. __forceinline void solve_newton_raphson(BBox1f cu, BBox1f cv)
  118. {
  119. Vec2fa uv(cu.center(),cv.center());
  120. const Vec2fa dfdu = curve2d.eval_du(uv.x,uv.y);
  121. const Vec2fa dfdv = curve2d.eval_dv(uv.x,uv.y);
  122. const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
  123. solve_newton_raphson_loop(cu,cv,uv,dfdu,dfdv,rcp_J);
  124. }
  125. __forceinline void solve_newton_raphson_loop(BBox1f cu, BBox1f cv, const Vec2fa& uv_in, const Vec2fa& dfdu, const Vec2fa& dfdv, const LinearSpace2fa& rcp_J)
  126. {
  127. Vec2fa uv = uv_in;
  128. for (size_t i=0; i<200; i++)
  129. {
  130. const Vec2fa f = curve2d.eval(uv.x,uv.y);
  131. const Vec2fa duv = rcp_J*f;
  132. uv -= duv;
  133. if (max(abs(f.x),abs(f.y)) < eps)
  134. {
  135. const float u = uv.x;
  136. const float v = uv.y;
  137. if (!(u >= 0.0f && u <= 1.0f)) return; // rejects NaNs
  138. if (!(v >= 0.0f && v <= 1.0f)) return; // rejects NaNs
  139. const TensorLinearCubicBezierSurface1f curve_z = curve3d.xfm(ray_space.row2(),ray.org);
  140. const float t = curve_z.eval(u,v);
  141. if (!(ray.tnear() <= t && t <= ray.tfar)) return; // rejects NaNs
  142. const Vec3fa Ng = cross(curve3d.eval_du(u,v),curve3d.eval_dv(u,v));
  143. BezierCurveHit hit(t,u,v,Ng);
  144. isHit |= epilog(hit);
  145. return;
  146. }
  147. }
  148. }
  149. __forceinline bool clip_v(BBox1f& cu, BBox1f& cv)
  150. {
  151. const Vec2fa dv = curve2d.eval_dv(cu.lower,cv.lower);
  152. const TensorLinearCubicBezierSurface1f curve1v = curve2d.xfm(dv).clip(cu,cv);
  153. LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
  154. if (!curve0v.hasRoot()) return false;
  155. Interval1f v = bezier_clipping(curve0v);
  156. if (isEmpty(v)) return false;
  157. v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
  158. cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
  159. return true;
  160. }
  161. __forceinline bool solve_krawczyk(bool very_small, BBox1f& cu, BBox1f& cv)
  162. {
  163. /* perform bezier clipping in v-direction to get tight v-bounds */
  164. TensorLinearCubicBezierSurface2fa curve2 = curve2d.clip(cu,cv);
  165. const Vec2fa dv = curve2.axis_v();
  166. const TensorLinearCubicBezierSurface1f curve1v = curve2.xfm(dv);
  167. LinearBezierCurve<Interval1f> curve0v = curve1v.reduce_u();
  168. if (unlikely(!curve0v.hasRoot())) return true;
  169. Interval1f v = bezier_clipping(curve0v);
  170. if (unlikely(isEmpty(v))) return true;
  171. v = intersect(v + Interval1f(-0.1f,+0.1f),Interval1f(0.0f,1.0f));
  172. curve2 = curve2.clip_v(v);
  173. cv = BBox1f(lerp(cv.lower,cv.upper,v.lower),lerp(cv.lower,cv.upper,v.upper));
  174. /* perform one newton raphson iteration */
  175. Vec2fa c(cu.center(),cv.center());
  176. Vec2fa f,dfdu,dfdv; curve2d.eval(c.x,c.y,f,dfdu,dfdv);
  177. const LinearSpace2fa rcp_J = rcp(LinearSpace2fa(dfdu,dfdv));
  178. const Vec2fa c1 = c - rcp_J*f;
  179. /* calculate bounds of derivatives */
  180. const BBox2fa bounds_du = (1.0f/cu.size())*curve2.derivative_u().bounds();
  181. const BBox2fa bounds_dv = (1.0f/cv.size())*curve2.derivative_v().bounds();
  182. /* calculate krawczyk test */
  183. LinearSpace2<Vec2<Interval1f>> I(Interval1f(1.0f), Interval1f(0.0f),
  184. Interval1f(0.0f), Interval1f(1.0f));
  185. LinearSpace2<Vec2<Interval1f>> G(Interval1f(bounds_du.lower.x,bounds_du.upper.x), Interval1f(bounds_dv.lower.x,bounds_dv.upper.x),
  186. Interval1f(bounds_du.lower.y,bounds_du.upper.y), Interval1f(bounds_dv.lower.y,bounds_dv.upper.y));
  187. const LinearSpace2<Vec2f> rcp_J2(rcp_J);
  188. const LinearSpace2<Vec2<Interval1f>> rcp_Ji(rcp_J2);
  189. const Vec2<Interval1f> x(cu,cv);
  190. const Vec2<Interval1f> K = Vec2<Interval1f>(Vec2f(c1)) + (I - rcp_Ji*G)*(x-Vec2<Interval1f>(Vec2f(c)));
  191. /* test if there is no solution */
  192. const Vec2<Interval1f> KK = intersect(K,x);
  193. if (unlikely(isEmpty(KK.x) || isEmpty(KK.y))) return true;
  194. /* exit if convergence cannot get proven, but terminate if we are very small */
  195. if (unlikely(!subset(K,x) && !very_small)) return false;
  196. /* solve using newton raphson iteration of convergence is guaranteed */
  197. solve_newton_raphson_loop(cu,cv,c1,dfdu,dfdv,rcp_J);
  198. return true;
  199. }
  200. __forceinline void solve_newton_raphson_no_recursion(BBox1f cu, BBox1f cv)
  201. {
  202. if (!clip_v(cu,cv)) return;
  203. return solve_newton_raphson(cu,cv);
  204. }
  205. __forceinline void solve_newton_raphson_recursion(BBox1f cu, BBox1f cv)
  206. {
  207. unsigned int sptr = 0;
  208. const unsigned int stack_size = 4;
  209. unsigned int mask_stack[stack_size];
  210. BBox1f cu_stack[stack_size];
  211. BBox1f cv_stack[stack_size];
  212. goto entry;
  213. /* terminate if stack is empty */
  214. while (sptr)
  215. {
  216. /* pop from stack */
  217. {
  218. sptr--;
  219. size_t mask = mask_stack[sptr];
  220. cu = cu_stack[sptr];
  221. cv = cv_stack[sptr];
  222. const size_t i = bscf(mask);
  223. mask_stack[sptr] = mask;
  224. if (mask) sptr++; // there are still items on the stack
  225. /* process next element recurse into each hit curve segment */
  226. const float u0 = float(i+0)*(1.0f/(N));
  227. const float u1 = float(i+1)*(1.0f/(N));
  228. const BBox1f cui(lerp(cu.lower,cu.upper,u0),lerp(cu.lower,cu.upper,u1));
  229. cu = cui;
  230. }
  231. #if 0
  232. solve_newton_raphson_no_recursion(cu,cv);
  233. continue;
  234. #else
  235. /* we assume convergence for small u ranges and verify using krawczyk */
  236. if (cu.size() < 1.0f/6.0f) {
  237. const bool very_small = cu.size() < 0.001f || sptr >= stack_size;
  238. if (solve_krawczyk(very_small,cu,cv)) {
  239. continue;
  240. }
  241. }
  242. #endif
  243. entry:
  244. /* split the curve into N segments in u-direction */
  245. unsigned int mask = 0;
  246. for (int i=0; i<N;)
  247. {
  248. int i0 = i;
  249. vbool<V> valid = true;
  250. TensorLinearCubicBezierSurface<Vec2vf<V>> subcurves = curve2d.clip_v(cv).template vsplit_u<V>(valid,cu,i,N);
  251. /* slabs test in u-direction */
  252. Vec2vf<V> ndv = cross(subcurves.axis_v());
  253. BBox<vfloat<V>> boundsv = subcurves.template vxfm<V>(ndv).bounds();
  254. valid &= boundsv.lower <= eps;
  255. valid &= boundsv.upper >= -eps;
  256. if (none(valid)) continue;
  257. /* slabs test in v-direction */
  258. Vec2vf<V> ndu = cross(subcurves.axis_u());
  259. BBox<vfloat<V>> boundsu = subcurves.template vxfm<V>(ndu).bounds();
  260. valid &= boundsu.lower <= eps;
  261. valid &= boundsu.upper >= -eps;
  262. if (none(valid)) continue;
  263. mask |= movemask(valid) << i0;
  264. }
  265. if (!mask) continue;
  266. /* push valid segments to stack */
  267. assert(sptr < stack_size);
  268. mask_stack [sptr] = mask;
  269. cu_stack [sptr] = cu;
  270. cv_stack [sptr] = cv;
  271. sptr++;
  272. }
  273. }
  274. __forceinline bool solve_newton_raphson_main()
  275. {
  276. BBox1f vu(0.0f,1.0f);
  277. BBox1f vv(0.0f,1.0f);
  278. solve_newton_raphson_recursion(vu,vv);
  279. return isHit;
  280. }
  281. };
  282. template<template<typename Ty> class SourceCurve, int N = VSIZEX-1, int V = VSIZEX>
  283. struct OrientedCurve1Intersector1
  284. {
  285. //template<typename Ty> using Curve = SourceCurve<Ty>;
  286. typedef SourceCurve<Vec3ff> SourceCurve3ff;
  287. typedef SourceCurve<Vec3fa> SourceCurve3fa;
  288. __forceinline OrientedCurve1Intersector1() {}
  289. __forceinline OrientedCurve1Intersector1(const Ray& ray, const void* ptr) {}
  290. template<typename Ray, typename Epilog>
  291. __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
  292. RayQueryContext* context,
  293. const CurveGeometry* geom, const unsigned int primID,
  294. const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
  295. const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
  296. const Epilog& epilog) const
  297. {
  298. STAT3(normal.trav_prims,1,1,1);
  299. SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
  300. SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
  301. ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
  302. TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
  303. //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
  304. return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog,N,V>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
  305. }
  306. template<typename Ray, typename Epilog>
  307. __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
  308. RayQueryContext* context,
  309. const CurveGeometry* geom, const unsigned int primID,
  310. const TensorLinearCubicBezierSurface3fa& curve, const Epilog& epilog) const
  311. {
  312. STAT3(normal.trav_prims,1,1,1);
  313. //return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog>(pre.ray_space,ray,curve,epilog).solve_bezier_clipping();
  314. return TensorLinearCubicBezierSurfaceIntersector<Ray,Epilog,N,V>(pre.ray_space,ray,curve,epilog).solve_newton_raphson_main();
  315. }
  316. };
  317. template<template<typename Ty> class SourceCurve, int K>
  318. struct OrientedCurve1IntersectorK
  319. {
  320. //template<typename Ty> using Curve = SourceCurve<Ty>;
  321. typedef SourceCurve<Vec3ff> SourceCurve3ff;
  322. typedef SourceCurve<Vec3fa> SourceCurve3fa;
  323. struct Ray1
  324. {
  325. __forceinline Ray1(RayK<K>& ray, size_t k)
  326. : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
  327. Vec3fa org;
  328. Vec3fa dir;
  329. float _tnear;
  330. float& tfar;
  331. __forceinline float& tnear() { return _tnear; }
  332. //__forceinline float& tfar() { return _tfar; }
  333. __forceinline const float& tnear() const { return _tnear; }
  334. //__forceinline const float& tfar() const { return _tfar; }
  335. };
  336. template<typename Epilog>
  337. __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
  338. RayQueryContext* context,
  339. const CurveGeometry* geom, const unsigned int primID,
  340. const Vec3ff& v0i, const Vec3ff& v1i, const Vec3ff& v2i, const Vec3ff& v3i,
  341. const Vec3fa& n0i, const Vec3fa& n1i, const Vec3fa& n2i, const Vec3fa& n3i,
  342. const Epilog& epilog)
  343. {
  344. STAT3(normal.trav_prims,1,1,1);
  345. Ray1 ray(vray,k);
  346. SourceCurve3ff ccurve(v0i,v1i,v2i,v3i);
  347. SourceCurve3fa ncurve(n0i,n1i,n2i,n3i);
  348. ccurve = enlargeRadiusToMinWidth(context,geom,ray.org,ccurve);
  349. TensorLinearCubicBezierSurface3fa curve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve);
  350. //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
  351. return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
  352. }
  353. template<typename Epilog>
  354. __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
  355. RayQueryContext* context,
  356. const CurveGeometry* geom, const unsigned int primID,
  357. const TensorLinearCubicBezierSurface3fa& curve,
  358. const Epilog& epilog)
  359. {
  360. STAT3(normal.trav_prims,1,1,1);
  361. Ray1 ray(vray,k);
  362. //return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_bezier_clipping();
  363. return TensorLinearCubicBezierSurfaceIntersector<Ray1,Epilog>(pre.ray_space[k],ray,curve,epilog).solve_newton_raphson_main();
  364. }
  365. };
  366. }
  367. }