bezier_curve_intersector.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "../common/ray.h"
  18. #include "filter.h"
  19. #include "cylinder.h"
  20. #include "plane.h"
  21. #include "line_intersector.h"
  22. namespace embree
  23. {
  24. namespace isa
  25. {
  26. static const size_t numJacobianIterations = 5;
  27. #if defined(__AVX512F__)
  28. static const size_t numBezierSubdivisions = 2;
  29. #elif defined(__AVX__)
  30. static const size_t numBezierSubdivisions = 2;
  31. #else
  32. static const size_t numBezierSubdivisions = 3;
  33. #endif
  34. template<typename NativeCurve3fa>
  35. struct BezierCurveHit
  36. {
  37. __forceinline BezierCurveHit() {}
  38. __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
  39. : t(t), u(u), v(0.0f), Ng(Ng) {}
  40. __forceinline void finalize() {}
  41. public:
  42. float t;
  43. float u;
  44. float v;
  45. Vec3fa Ng;
  46. };
  47. template<typename NativeCurve3fa, typename Ray, typename Epilog>
  48. __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3fa& curve, size_t i,
  49. const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
  50. const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
  51. const Epilog& epilog)
  52. {
  53. if (tp.lower[i]+dt >= ray.tfar) return false;
  54. Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
  55. if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
  56. if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
  57. BezierCurveHit<NativeCurve3fa> hit(tp.lower[i]+dt,u[i],Ng_o);
  58. return epilog(hit);
  59. }
  60. template<typename NativeCurve3fa, typename Ray, typename Epilog>
  61. __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3fa& curve, float u, float t, const Epilog& epilog)
  62. {
  63. const Vec3fa org = zero;
  64. const Vec3fa dir = ray.dir;
  65. const float length_ray_dir = length(dir);
  66. for (size_t i=0; i<numJacobianIterations; i++)
  67. {
  68. const Vec3fa Q = madd(Vec3fa(t),dir,org);
  69. //const Vec3fa dQdu = zero;
  70. const Vec3fa dQdt = dir;
  71. Vec3fa P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
  72. //const Vec3fa dPdt = zero;
  73. const Vec3fa R = Q-P;
  74. const Vec3fa dRdu = /*dQdu*/-dPdu;
  75. const Vec3fa dRdt = dQdt;//-dPdt;
  76. const Vec3fa T = normalize(dPdu);
  77. const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
  78. //const Vec3fa dTdt = zero;
  79. const float f = dot(R,T);
  80. const float dfdu = dot(dRdu,T) + dot(R,dTdu);
  81. const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
  82. const float K = dot(R,R)-sqr(f);
  83. const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
  84. const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
  85. const float rsqrt_K = rsqrt(K);
  86. const float g = sqrt(K)-P.w;
  87. const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
  88. const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
  89. const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
  90. const Vec2f dut = rcp(J)*Vec2f(f,g);
  91. const Vec2f ut = Vec2f(u,t) - dut;
  92. u = ut.x; t = ut.y;
  93. const bool converged_u = abs(f) < 16.0f*float(ulp)*reduce_max(abs(dPdu));
  94. const bool converged_t = abs(g) < 16.0f*float(ulp)*length_ray_dir;
  95. if (converged_u && converged_t)
  96. {
  97. t+=dt;
  98. if (t < ray.tnear || t > ray.tfar) return false;
  99. if (u < 0.0f || u > 1.0f) return false;
  100. const Vec3fa R = normalize(Q-P);
  101. const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
  102. const Vec3fa V = cross(dPdu,R);
  103. BezierCurveHit<NativeCurve3fa> hit(t,u,cross(V,U));
  104. return epilog(hit);
  105. }
  106. }
  107. return false;
  108. }
  109. template<typename NativeCurve3fa, typename Ray, typename Epilog>
  110. bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3fa& curve,
  111. const float u0, const float u1, const size_t depth, const Epilog& epilog)
  112. {
  113. int maxDepth = numBezierSubdivisions;
  114. //int maxDepth = Device::debug_int1+1;
  115. const Vec3fa org = zero;
  116. const Vec3fa dir = ray.dir;
  117. /* subdivide curve */
  118. const float dscale = (u1-u0)*(1.0f/(3.0f*(VSIZEX-1)));
  119. const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(VSIZEX-1)));
  120. Vec4vfx P0, dP0du; curve.evalN(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
  121. const Vec4vfx P3 = shift_right_1(P0);
  122. const Vec4vfx dP3du = shift_right_1(dP0du);
  123. const Vec4vfx P1 = P0 + dP0du;
  124. const Vec4vfx P2 = P3 - dP3du;
  125. /* calculate bounding cylinders */
  126. const Vec3vfx nP3P0 = normalize(P3-P0);
  127. const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
  128. const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
  129. const vfloatx maxr12 = sqrt(max(rr1,rr2));
  130. const vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
  131. const vfloatx sr = min(abs(dot(nP3P0,normalize(Vec3vfx(dP0du)))),abs(dot(nP3P0,normalize(Vec3vfx(dP3du)))));
  132. const vfloatx r_inner = max(0.0f,min(P0.w,P1.w,P2.w,P3.w)-maxr12)*sr;
  133. const CylinderN<VSIZEX> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
  134. const CylinderN<VSIZEX> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
  135. vboolx valid = true; clear(valid,VSIZEX-1);
  136. /* intersect with outer cylinder */
  137. BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
  138. valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
  139. if (none(valid)) return false;
  140. /* intersect with cap-planes */
  141. BBox<vfloatx> tp(ray.tnear-dt,ray.tfar-dt);
  142. tp = embree::intersect(tp,tc_outer);
  143. BBox<vfloatx> h0 = HalfPlaneN<VSIZEX>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
  144. tp = embree::intersect(tp,h0);
  145. BBox<vfloatx> h1 = HalfPlaneN<VSIZEX>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
  146. tp = embree::intersect(tp,h1);
  147. valid &= tp.lower <= tp.upper;
  148. if (none(valid)) return false;
  149. /* clamp and correct u parameter */
  150. u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
  151. u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
  152. u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(VSIZEX)));
  153. u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(VSIZEX)));
  154. /* intersect with inner cylinder */
  155. BBox<vfloatx> tc_inner;
  156. vfloatx u_inner0; Vec3vfx Ng_inner0; vfloatx u_inner1; Vec3vfx Ng_inner1;
  157. const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
  158. /* at the unstable area we subdivide deeper */
  159. const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(normalize(ray.dir)),normalize(Ng_inner0))) < 0.3f);
  160. const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(normalize(ray.dir)),normalize(Ng_inner1))) < 0.3f);
  161. /* subtract the inner interval from the current hit interval */
  162. BBox<vfloatx> tp0, tp1;
  163. subtract(tp,tc_inner,tp0,tp1);
  164. vboolx valid0 = valid & (tp0.lower <= tp0.upper);
  165. vboolx valid1 = valid & (tp1.lower <= tp1.upper);
  166. if (none(valid0 | valid1)) return false;
  167. /* iterate over all first hits front to back */
  168. bool found = false;
  169. while (any(valid0))
  170. {
  171. const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
  172. const size_t termDepth = unstable0[i] ? maxDepth+1 : maxDepth;
  173. if (depth >= termDepth) found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
  174. //if (depth >= maxDepth) found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
  175. else found = found | intersect_bezier_recursive_jacobian(ray,dt,curve,vu0[i+0],vu0[i+1],depth+1,epilog);
  176. valid0 &= tp0.lower+dt < ray.tfar;
  177. }
  178. valid1 &= tp1.lower+dt < ray.tfar;
  179. /* iterate over all second hits front to back */
  180. while (any(valid1))
  181. {
  182. const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
  183. const size_t termDepth = unstable1[i] ? maxDepth+1 : maxDepth;
  184. if (depth >= termDepth) found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
  185. //if (depth >= maxDepth) found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
  186. else found = found | intersect_bezier_recursive_jacobian(ray,dt,curve,vu0[i+0],vu0[i+1],depth+1,epilog);
  187. valid1 &= tp1.lower+dt < ray.tfar;
  188. }
  189. return found;
  190. }
  191. template<typename NativeCurve3fa>
  192. struct BezierCurve1Intersector1
  193. {
  194. __forceinline BezierCurve1Intersector1() {}
  195. __forceinline BezierCurve1Intersector1(const Ray& ray, const void* ptr) {}
  196. template<typename Epilog>
  197. __noinline bool intersect(Ray& ray,
  198. const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3,
  199. const Epilog& epilog) const
  200. {
  201. STAT3(normal.trav_prims,1,1,1);
  202. /* move ray closer to make intersection stable */
  203. const float dt = dot(0.25f*(v0+v1+v2+v3)-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
  204. const Vec3fa ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
  205. const Vec3fa p0 = v0-ref;
  206. const Vec3fa p1 = v1-ref;
  207. const Vec3fa p2 = v2-ref;
  208. const Vec3fa p3 = v3-ref;
  209. const NativeCurve3fa curve(p0,p1,p2,p3);
  210. return intersect_bezier_recursive_jacobian(ray,dt,curve,0.0f,1.0f,1,epilog);
  211. }
  212. };
  213. template<typename NativeCurve3fa, int K>
  214. struct BezierCurve1IntersectorK
  215. {
  216. struct Ray1
  217. {
  218. __forceinline Ray1(RayK<K>& ray, size_t k)
  219. : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), tnear(ray.tnear[k]), tfar(ray.tfar[k]) {}
  220. Vec3fa org;
  221. Vec3fa dir;
  222. float tnear;
  223. float& tfar;
  224. };
  225. __forceinline BezierCurve1IntersectorK(const vbool<K>& valid, const RayK<K>& ray) {
  226. }
  227. template<typename Epilog>
  228. __forceinline bool intersect(RayK<K>& vray, size_t k,
  229. const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3,
  230. const Epilog& epilog) const
  231. {
  232. STAT3(normal.trav_prims,1,1,1);
  233. Ray1 ray(vray,k);
  234. /* move ray closer to make intersection stable */
  235. const float dt = dot(0.25f*(v0+v1+v2+v3)-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
  236. const Vec3fa ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
  237. const Vec3fa p0 = v0-ref;
  238. const Vec3fa p1 = v1-ref;
  239. const Vec3fa p2 = v2-ref;
  240. const Vec3fa p3 = v3-ref;
  241. const NativeCurve3fa curve(p0,p1,p2,p3);
  242. return intersect_bezier_recursive_jacobian(ray,dt,curve,0.0f,1.0f,1,epilog);
  243. }
  244. };
  245. }
  246. }