bezier_hair_intersector.h 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "../common/ray.h"
  18. #include "filter.h"
  19. // FIXME: remove this file later
  20. //#define Bezier1Intersector1 Hair1Intersector1
  21. //#define Bezier1IntersectorK Hair1IntersectorK
  22. namespace embree
  23. {
  24. namespace isa
  25. {
  26. template<typename NativeCurve3fa, int M>
  27. struct HairHit
  28. {
  29. __forceinline HairHit() {}
  30. __forceinline HairHit(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const int i, const int N,
  31. const Vec3fa& p0, const Vec3fa& p1, const Vec3fa& p2, const Vec3fa& p3)
  32. : U(U), V(V), T(T), i(i), N(N), p0(p0), p1(p1), p2(p2), p3(p3), valid(valid) {}
  33. __forceinline void finalize()
  34. {
  35. vu = (vfloat<M>(step)+U+vfloat<M>(float(i)))*(1.0f/float(N));
  36. vv = V;
  37. vt = T;
  38. }
  39. __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); }
  40. __forceinline float t (const size_t i) const { return vt[i]; }
  41. __forceinline Vec3fa Ng(const size_t i) const
  42. {
  43. Vec3fa T = NativeCurve3fa(p0,p1,p2,p3).eval_du(vu[i]);
  44. return T == Vec3fa(zero) ? Vec3fa(one) : T;
  45. }
  46. public:
  47. vfloat<M> U;
  48. vfloat<M> V;
  49. vfloat<M> T;
  50. int i, N;
  51. Vec3fa p0,p1,p2,p3;
  52. public:
  53. vbool<M> valid;
  54. vfloat<M> vu;
  55. vfloat<M> vv;
  56. vfloat<M> vt;
  57. };
  58. template<typename NativeCurve3fa>
  59. struct Hair1Intersector1
  60. {
  61. float depth_scale;
  62. LinearSpace3fa ray_space;
  63. __forceinline Hair1Intersector1() {}
  64. __forceinline Hair1Intersector1(const Ray& ray, const void* ptr)
  65. : depth_scale(rsqrt(dot(ray.dir,ray.dir))), ray_space(frame(depth_scale*ray.dir).transposed()) {}
  66. template<typename Epilog>
  67. __forceinline bool intersect(Ray& ray,
  68. const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3, const int N,
  69. const Epilog& epilog) const
  70. {
  71. /* transform control points into ray space */
  72. Vec3fa w0 = xfmVector(ray_space,v0-ray.org); w0.w = v0.w;
  73. Vec3fa w1 = xfmVector(ray_space,v1-ray.org); w1.w = v1.w;
  74. Vec3fa w2 = xfmVector(ray_space,v2-ray.org); w2.w = v2.w;
  75. Vec3fa w3 = xfmVector(ray_space,v3-ray.org); w3.w = v3.w;
  76. NativeCurve3fa curve2D(w0,w1,w2,w3);
  77. /* evaluate the bezier curve */
  78. vboolx valid = vfloatx(step) < vfloatx(float(N));
  79. const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(0,N);
  80. const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(0,N);
  81. /* approximative intersection with cone */
  82. const Vec4vfx v = p1-p0;
  83. const Vec4vfx w = -p0;
  84. const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
  85. const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
  86. const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
  87. const Vec4vfx p = madd(u,v,p0);
  88. const vfloatx t = p.z*depth_scale;
  89. const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
  90. const vfloatx r = p.w;
  91. const vfloatx r2 = r*r;
  92. valid &= (d2 <= r2) & (vfloatx(ray.tnear) < t) & (t < vfloatx(ray.tfar));
  93. /* update hit information */
  94. bool ishit = false;
  95. if (unlikely(any(valid))) {
  96. HairHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,0,N,v0,v1,v2,v3);
  97. ishit = ishit | epilog(valid,hit);
  98. }
  99. if (unlikely(VSIZEX < N))
  100. {
  101. /* process SIMD-size many segments per iteration */
  102. for (int i=VSIZEX; i<N; i+=VSIZEX)
  103. {
  104. /* evaluate the bezier curve */
  105. vboolx valid = vintx(i)+vintx(step) < vintx(N);
  106. const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N);
  107. const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N);
  108. /* approximative intersection with cone */
  109. const Vec4vfx v = p1-p0;
  110. const Vec4vfx w = -p0;
  111. const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
  112. const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
  113. const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
  114. const Vec4vfx p = madd(u,v,p0);
  115. const vfloatx t = p.z*depth_scale;
  116. const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
  117. const vfloatx r = p.w;
  118. const vfloatx r2 = r*r;
  119. valid &= (d2 <= r2) & (vfloatx(ray.tnear) < t) & (t < vfloatx(ray.tfar));
  120. /* update hit information */
  121. if (unlikely(any(valid))) {
  122. HairHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,i,N,v0,v1,v2,v3);
  123. ishit = ishit | epilog(valid,hit);
  124. }
  125. }
  126. }
  127. return ishit;
  128. }
  129. };
  130. template<typename NativeCurve3fa, int K>
  131. struct Hair1IntersectorK
  132. {
  133. vfloat<K> depth_scale;
  134. LinearSpace3fa ray_space[K];
  135. __forceinline Hair1IntersectorK(const vbool<K>& valid, const RayK<K>& ray)
  136. {
  137. size_t mask = movemask(valid);
  138. depth_scale = rsqrt(dot(ray.dir,ray.dir));
  139. while (mask) {
  140. size_t k = __bscf(mask);
  141. ray_space[k] = frame(depth_scale[k]*Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k])).transposed();
  142. }
  143. }
  144. __forceinline Hair1IntersectorK (const RayK<K>& ray, size_t k)
  145. {
  146. Vec3fa ray_dir = Vec3fa(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]);
  147. depth_scale[k] = rsqrt(dot(ray_dir,ray_dir));
  148. ray_space [k] = frame(depth_scale[k]*ray_dir).transposed();
  149. }
  150. template<typename Epilog>
  151. __forceinline bool intersect(RayK<K>& ray, size_t k,
  152. const Vec3fa& v0, const Vec3fa& v1, const Vec3fa& v2, const Vec3fa& v3, const int N,
  153. const Epilog& epilog) const
  154. {
  155. /* load ray */
  156. const Vec3fa ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]);
  157. const float ray_tnear = ray.tnear[k];
  158. const float ray_tfar = ray.tfar [k];
  159. /* transform control points into ray space */
  160. Vec3fa w0 = xfmVector(ray_space[k],v0-ray_org); w0.w = v0.w;
  161. Vec3fa w1 = xfmVector(ray_space[k],v1-ray_org); w1.w = v1.w;
  162. Vec3fa w2 = xfmVector(ray_space[k],v2-ray_org); w2.w = v2.w;
  163. Vec3fa w3 = xfmVector(ray_space[k],v3-ray_org); w3.w = v3.w;
  164. NativeCurve3fa curve2D(w0,w1,w2,w3);
  165. /* process SIMD-size many segments per iteration */
  166. bool ishit = false;
  167. for (int i=0; i<N; i+=VSIZEX)
  168. {
  169. /* evaluate the bezier curve */
  170. vboolx valid = vintx(i)+vintx(step) < vintx(N);
  171. const Vec4vfx p0 = curve2D.template eval0<VSIZEX>(i,N);
  172. const Vec4vfx p1 = curve2D.template eval1<VSIZEX>(i,N);
  173. /* approximative intersection with cone */
  174. const Vec4vfx v = p1-p0;
  175. const Vec4vfx w = -p0;
  176. const vfloatx d0 = madd(w.x,v.x,w.y*v.y);
  177. const vfloatx d1 = madd(v.x,v.x,v.y*v.y);
  178. const vfloatx u = clamp(d0*rcp(d1),vfloatx(zero),vfloatx(one));
  179. const Vec4vfx p = madd(u,v,p0);
  180. const vfloatx t = p.z*depth_scale[k];
  181. const vfloatx d2 = madd(p.x,p.x,p.y*p.y);
  182. const vfloatx r = p.w;
  183. const vfloatx r2 = r*r;
  184. valid &= (d2 <= r2) & (vfloatx(ray_tnear) < t) & (t < vfloatx(ray_tfar));
  185. if (likely(none(valid))) continue;
  186. /* update hit information */
  187. HairHit<NativeCurve3fa,VSIZEX> hit(valid,u,0.0f,t,i,N,v0,v1,v2,v3);
  188. ishit = ishit | epilog(valid,hit);
  189. }
  190. return ishit;
  191. }
  192. };
  193. }
  194. }