curve_intersector_sweep.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../common/ray.h"
  5. #include "cylinder.h"
  6. #include "plane.h"
  7. #include "line_intersector.h"
  8. #include "curve_intersector_precalculations.h"
  9. namespace embree
  10. {
  11. namespace isa
  12. {
  13. static const size_t numJacobianIterations = 5;
  14. #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
  15. static const size_t numBezierSubdivisions = 2;
  16. #elif defined(__AVX__)
  17. static const size_t numBezierSubdivisions = 2;
  18. #else
  19. static const size_t numBezierSubdivisions = 3;
  20. #endif
  21. struct BezierCurveHit
  22. {
  23. __forceinline BezierCurveHit() {}
  24. __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
  25. : t(t), u(u), v(0.0f), Ng(Ng) {}
  26. __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
  27. : t(t), u(u), v(v), Ng(Ng) {}
  28. __forceinline void finalize() {}
  29. public:
  30. float t;
  31. float u;
  32. float v;
  33. Vec3fa Ng;
  34. };
  35. template<typename NativeCurve3ff, typename Ray, typename Epilog>
  36. __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
  37. const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
  38. const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
  39. const Epilog& epilog)
  40. {
  41. if (tp.lower[i]+dt > ray.tfar) return false;
  42. Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
  43. if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
  44. if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
  45. BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
  46. return epilog(hit);
  47. }
  48. template<typename NativeCurve3ff, typename Ray, typename Epilog>
  49. __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
  50. {
  51. const Vec3fa org = zero;
  52. const Vec3fa dir = ray.dir;
  53. const float length_ray_dir = length(dir);
  54. /* error of curve evaluations is proportional to largest coordinate */
  55. const BBox3ff box = curve.bounds();
  56. const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
  57. for (size_t i=0; i<numJacobianIterations; i++)
  58. {
  59. const Vec3fa Q = madd(Vec3fa(t),dir,org);
  60. //const Vec3fa dQdu = zero;
  61. const Vec3fa dQdt = dir;
  62. const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
  63. Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
  64. //const Vec3fa dPdt = zero;
  65. const Vec3fa R = Q-P;
  66. const float len_R = length(R); //reduce_max(abs(R));
  67. const float R_err = max(Q_err,P_err);
  68. const Vec3fa dRdu = /*dQdu*/-dPdu;
  69. const Vec3fa dRdt = dQdt;//-dPdt;
  70. const Vec3fa T = normalize(dPdu);
  71. const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
  72. //const Vec3fa dTdt = zero;
  73. const float cos_err = P_err/length(dPdu);
  74. /* Error estimate for dot(R,T):
  75. dot(R,T) = cos(R,T) |R| |T|
  76. = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
  77. = cos(R,T)*|R|*|T|
  78. +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
  79. +- cos_error*(|R| + |T|)
  80. +- lower order terms
  81. with cos(R,T) being in [0,1] and |T| = 1 we get:
  82. dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
  83. */
  84. const float f = dot(R,T);
  85. const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
  86. const float dfdu = dot(dRdu,T) + dot(R,dTdu);
  87. const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
  88. const float K = dot(R,R)-sqr(f);
  89. const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
  90. const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
  91. const float rsqrt_K = rsqrt(K);
  92. const float g = sqrt(K)-P.w;
  93. const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
  94. const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
  95. const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
  96. const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
  97. const Vec2f dut = rcp(J)*Vec2f(f,g);
  98. const Vec2f ut = Vec2f(u,t) - dut;
  99. u = ut.x; t = ut.y;
  100. if (abs(f) < f_err && abs(g) < g_err)
  101. {
  102. t+=dt;
  103. if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
  104. if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
  105. const Vec3fa R = normalize(Q-P);
  106. const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
  107. const Vec3fa V = cross(dPdu,R);
  108. BezierCurveHit hit(t,u,cross(V,U));
  109. return epilog(hit);
  110. }
  111. }
  112. return false;
  113. }
  114. #if !defined(__SYCL_DEVICE_ONLY__)
  115. template<typename NativeCurve3ff, typename Ray, typename Epilog>
  116. __forceinline bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, const Epilog& epilog)
  117. {
  118. float u0 = 0.0f;
  119. float u1 = 1.0f;
  120. unsigned int depth = 1;
  121. #if defined(__AVX__)
  122. enum { VSIZEX_ = 8 };
  123. typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
  124. typedef vint8 vintx;
  125. typedef vfloat8 vfloatx;
  126. #else
  127. enum { VSIZEX_ = 4 };
  128. typedef vbool4 vboolx;
  129. typedef vint4 vintx;
  130. typedef vfloat4 vfloatx;
  131. #endif
  132. unsigned int maxDepth = numBezierSubdivisions;
  133. bool found = false;
  134. const Vec3fa org = zero;
  135. const Vec3fa dir = ray.dir;
  136. unsigned int sptr = 0;
  137. const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
  138. struct StackEntry {
  139. vboolx valid;
  140. vfloatx tlower;
  141. float u0;
  142. float u1;
  143. unsigned int depth;
  144. };
  145. StackEntry stack[stack_size];
  146. goto entry;
  147. /* terminate if stack is empty */
  148. while (sptr)
  149. {
  150. /* pop from stack */
  151. {
  152. sptr--;
  153. vboolx valid = stack[sptr].valid;
  154. const vfloatx tlower = stack[sptr].tlower;
  155. valid &= tlower+dt <= ray.tfar;
  156. if (none(valid)) continue;
  157. u0 = stack[sptr].u0;
  158. u1 = stack[sptr].u1;
  159. depth = stack[sptr].depth;
  160. const size_t i = select_min(valid,tlower); clear(valid,i);
  161. stack[sptr].valid = valid;
  162. if (any(valid)) sptr++; // there are still items on the stack
  163. /* process next segment */
  164. const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
  165. u0 = vu0[i+0];
  166. u1 = vu0[i+1];
  167. }
  168. entry:
  169. /* subdivide curve */
  170. const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
  171. const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
  172. Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
  173. const Vec4vfx P3 = shift_right_1(P0);
  174. const Vec4vfx dP3du = shift_right_1(dP0du);
  175. const Vec4vfx P1 = P0 + dP0du;
  176. const Vec4vfx P2 = P3 - dP3du;
  177. /* calculate bounding cylinders */
  178. const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
  179. const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
  180. const vfloatx maxr12 = sqrt(max(rr1,rr2));
  181. const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp);
  182. const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
  183. vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
  184. vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
  185. r_outer = one_plus_ulp*r_outer;
  186. r_inner = max(0.0f,one_minus_ulp*r_inner);
  187. const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
  188. const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
  189. vboolx valid = true; clear(valid,vfloatx::size-1);
  190. /* intersect with outer cylinder */
  191. BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
  192. valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
  193. if (none(valid)) continue;
  194. /* intersect with cap-planes */
  195. BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
  196. tp = embree::intersect(tp,tc_outer);
  197. BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
  198. tp = embree::intersect(tp,h0);
  199. BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
  200. tp = embree::intersect(tp,h1);
  201. valid &= tp.lower <= tp.upper;
  202. if (none(valid)) continue;
  203. /* clamp and correct u parameter */
  204. u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
  205. u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
  206. u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
  207. u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
  208. /* intersect with inner cylinder */
  209. BBox<vfloatx> tc_inner;
  210. vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
  211. const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
  212. /* at the unstable area we subdivide deeper */
  213. const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);
  214. const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);
  215. /* subtract the inner interval from the current hit interval */
  216. BBox<vfloatx> tp0, tp1;
  217. subtract(tp,tc_inner,tp0,tp1);
  218. vboolx valid0 = valid & (tp0.lower <= tp0.upper);
  219. vboolx valid1 = valid & (tp1.lower <= tp1.upper);
  220. if (none(valid0 | valid1)) continue;
  221. /* iterate over all first hits front to back */
  222. const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));
  223. vboolx recursion_valid0 = valid0 & (depth < termDepth0);
  224. valid0 &= depth >= termDepth0;
  225. while (any(valid0))
  226. {
  227. const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
  228. found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
  229. //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
  230. valid0 &= tp0.lower+dt <= ray.tfar;
  231. }
  232. valid1 &= tp1.lower+dt <= ray.tfar;
  233. /* iterate over all second hits front to back */
  234. const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));
  235. vboolx recursion_valid1 = valid1 & (depth < termDepth1);
  236. valid1 &= depth >= termDepth1;
  237. while (any(valid1))
  238. {
  239. const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
  240. found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
  241. //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
  242. valid1 &= tp1.lower+dt <= ray.tfar;
  243. }
  244. /* push valid segments to stack */
  245. recursion_valid0 &= tp0.lower+dt <= ray.tfar;
  246. recursion_valid1 &= tp1.lower+dt <= ray.tfar;
  247. const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
  248. if (any(recursion_valid))
  249. {
  250. assert(sptr < stack_size);
  251. stack[sptr].valid = recursion_valid;
  252. stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
  253. stack[sptr].u0 = u0;
  254. stack[sptr].u1 = u1;
  255. stack[sptr].depth = depth+1;
  256. sptr++;
  257. }
  258. }
  259. return found;
  260. }
  261. #else
  262. template<typename NativeCurve3ff, typename Ray, typename Epilog>
  263. __forceinline bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, const Epilog& epilog)
  264. {
  265. const Vec3fa org = zero;
  266. const Vec3fa dir = ray.dir;
  267. const unsigned int max_depth = 7;
  268. bool found = false;
  269. struct ShortStack
  270. {
  271. /* pushes both children */
  272. __forceinline void push() {
  273. depth++;
  274. }
  275. /* pops next node */
  276. __forceinline void pop() {
  277. short_stack += (1<<(31-depth));
  278. depth = 31-bsf(short_stack);
  279. }
  280. unsigned int depth = 0;
  281. unsigned int short_stack = 0;
  282. };
  283. ShortStack stack;
  284. do
  285. {
  286. const float u0 = (stack.short_stack+0*(1<<(31-stack.depth)))/float(0x80000000);
  287. const float u1 = (stack.short_stack+1*(1<<(31-stack.depth)))/float(0x80000000);
  288. /* subdivide bezier curve */
  289. Vec3ff P0, dP0du; curve.eval(u0,P0,dP0du); dP0du = dP0du * (u1-u0);
  290. Vec3ff P3, dP3du; curve.eval(u1,P3,dP3du); dP3du = dP3du * (u1-u0);
  291. const Vec3ff P1 = P0 + dP0du*(1.0f/3.0f);
  292. const Vec3ff P2 = P3 - dP3du*(1.0f/3.0f);
  293. /* check if curve is well behaved, by checking deviation of tangents from straight line */
  294. const Vec3ff W = Vec3ff(P3-P0,0.0f);
  295. const Vec3ff dQ0 = abs(3.0f*(P1-P0) - W);
  296. const Vec3ff dQ1 = abs(3.0f*(P2-P1) - W);
  297. const Vec3ff dQ2 = abs(3.0f*(P3-P2) - W);
  298. const Vec3ff max_dQ = max(dQ0,dQ1,dQ2);
  299. const float m = max(max_dQ.x,max_dQ.y,max_dQ.z); //,max_dQ.w);
  300. const float l = length(Vec3f(W));
  301. const bool well_behaved = m < 0.2f*l;
  302. if (!well_behaved && stack.depth < max_depth) {
  303. stack.push();
  304. continue;
  305. }
  306. /* calculate bounding cylinders */
  307. const float rr1 = sqr_point_to_line_distance(Vec3f(dP0du),Vec3f(P3-P0));
  308. const float rr2 = sqr_point_to_line_distance(Vec3f(dP3du),Vec3f(P3-P0));
  309. const float maxr12 = sqrt(max(rr1,rr2));
  310. const float one_plus_ulp = 1.0f+2.0f*float(ulp);
  311. const float one_minus_ulp = 1.0f-2.0f*float(ulp);
  312. float r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
  313. float r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
  314. r_outer = one_plus_ulp*r_outer;
  315. r_inner = max(0.0f,one_minus_ulp*r_inner);
  316. const Cylinder cylinder_outer(Vec3f(P0),Vec3f(P3),r_outer);
  317. const Cylinder cylinder_inner(Vec3f(P0),Vec3f(P3),r_inner);
  318. /* intersect with outer cylinder */
  319. BBox<float> tc_outer; float u_outer0; Vec3fa Ng_outer0; float u_outer1; Vec3fa Ng_outer1;
  320. if (!cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1))
  321. {
  322. stack.pop();
  323. continue;
  324. }
  325. /* intersect with cap-planes */
  326. BBox<float> tp(ray.tnear()-dt,ray.tfar-dt);
  327. tp = embree::intersect(tp,tc_outer);
  328. BBox<float> h0 = HalfPlane(Vec3f(P0),+Vec3f(dP0du)).intersect(org,dir);
  329. tp = embree::intersect(tp,h0);
  330. BBox<float> h1 = HalfPlane(Vec3f(P3),-Vec3f(dP3du)).intersect(org,dir);
  331. tp = embree::intersect(tp,h1);
  332. if (tp.lower > tp.upper)
  333. {
  334. stack.pop();
  335. continue;
  336. }
  337. bool valid = true;
  338. /* clamp and correct u parameter */
  339. u_outer0 = clamp(u_outer0,float(0.0f),float(1.0f));
  340. u_outer1 = clamp(u_outer1,float(0.0f),float(1.0f));
  341. u_outer0 = lerp(u0,u1,u_outer0);
  342. u_outer1 = lerp(u0,u1,u_outer1);
  343. /* intersect with inner cylinder */
  344. BBox<float> tc_inner;
  345. float u_inner0 = zero; Vec3fa Ng_inner0 = zero; float u_inner1 = zero; Vec3fa Ng_inner1 = zero;
  346. const bool valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
  347. /* subtract the inner interval from the current hit interval */
  348. BBox<float> tp0, tp1;
  349. subtract(tp,tc_inner,tp0,tp1);
  350. bool valid0 = valid & (tp0.lower <= tp0.upper);
  351. bool valid1 = valid & (tp1.lower <= tp1.upper);
  352. if (!(valid0 | valid1))
  353. {
  354. stack.pop();
  355. continue;
  356. }
  357. /* at the unstable area we subdivide deeper */
  358. const bool unstable0 = valid0 && ((!valid_inner) | (abs(dot(Vec3fa(ray.dir),Ng_inner0)) < 0.3f));
  359. const bool unstable1 = valid1 && ((!valid_inner) | (abs(dot(Vec3fa(ray.dir),Ng_inner1)) < 0.3f));
  360. if ((unstable0 | unstable1) && (stack.depth < max_depth)) {
  361. stack.push();
  362. continue;
  363. }
  364. if (valid0)
  365. found |= intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0,tp0.lower,epilog);
  366. /* the far hit cannot be closer, thus skip if we hit entry already */
  367. valid1 &= tp1.lower+dt <= ray.tfar;
  368. /* iterate over second hit */
  369. if (valid1)
  370. found |= intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1,tp1.upper,epilog);
  371. stack.pop();
  372. } while (stack.short_stack != 0x80000000);
  373. return found;
  374. }
  375. #endif
  376. template<template<typename Ty> class NativeCurve>
  377. struct SweepCurve1Intersector1
  378. {
  379. typedef NativeCurve<Vec3ff> NativeCurve3ff;
  380. template<typename Ray, typename Epilog>
  381. __forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
  382. RayQueryContext* context,
  383. const CurveGeometry* geom, const unsigned int primID,
  384. const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
  385. const Epilog& epilog)
  386. {
  387. STAT3(normal.trav_prims,1,1,1);
  388. /* move ray closer to make intersection stable */
  389. NativeCurve3ff curve0(v0,v1,v2,v3);
  390. curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
  391. const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
  392. const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
  393. const NativeCurve3ff curve1 = curve0-ref;
  394. return intersect_bezier_recursive_jacobian(ray,dt,curve1,epilog);
  395. }
  396. };
  397. template<template<typename Ty> class NativeCurve, int K>
  398. struct SweepCurve1IntersectorK
  399. {
  400. typedef NativeCurve<Vec3ff> NativeCurve3ff;
  401. struct Ray1
  402. {
  403. __forceinline Ray1(RayK<K>& ray, size_t k)
  404. : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
  405. Vec3fa org;
  406. Vec3fa dir;
  407. float _tnear;
  408. float& tfar;
  409. __forceinline float& tnear() { return _tnear; }
  410. //__forceinline float& tfar() { return _tfar; }
  411. __forceinline const float& tnear() const { return _tnear; }
  412. //__forceinline const float& tfar() const { return _tfar; }
  413. };
  414. template<typename Epilog>
  415. __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
  416. RayQueryContext* context,
  417. const CurveGeometry* geom, const unsigned int primID,
  418. const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
  419. const Epilog& epilog)
  420. {
  421. STAT3(normal.trav_prims,1,1,1);
  422. Ray1 ray(vray,k);
  423. /* move ray closer to make intersection stable */
  424. NativeCurve3ff curve0(v0,v1,v2,v3);
  425. curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
  426. const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
  427. const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
  428. const NativeCurve3ff curve1 = curve0-ref;
  429. return intersect_bezier_recursive_jacobian(ray,dt,curve1,epilog);
  430. }
  431. };
  432. }
  433. }