node_intersector_frustum.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "node_intersector.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. //////////////////////////////////////////////////////////////////////////////////////
  10. // Frustum structure used in hybrid and stream traversal
  11. //////////////////////////////////////////////////////////////////////////////////////
  12. /*
  13. Optimized frustum test. We calculate t=(p-org)/dir in ray/box
  14. intersection. We assume the rays are split by octant, thus
  15. dir intervals are either positive or negative in each
  16. dimension.
  17. Case 1: dir.min >= 0 && dir.max >= 0:
  18. t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
  19. t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
  20. Case 2: dir.min < 0 && dir.max < 0:
  21. t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
  22. t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
  23. */
  24. template<bool robust>
  25. struct Frustum;
  26. /* Fast variant */
  27. template<>
  28. struct Frustum<false>
  29. {
  30. __forceinline Frustum() {}
  31. template<int K>
  32. __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
  33. {
  34. const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
  35. reduce_min(select(valid, org.y, pos_inf)),
  36. reduce_min(select(valid, org.z, pos_inf)));
  37. const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
  38. reduce_max(select(valid, org.y, neg_inf)),
  39. reduce_max(select(valid, org.z, neg_inf)));
  40. const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
  41. reduce_min(select(valid, rdir.y, pos_inf)),
  42. reduce_min(select(valid, rdir.z, pos_inf)));
  43. const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
  44. reduce_max(select(valid, rdir.y, neg_inf)),
  45. reduce_max(select(valid, rdir.z, neg_inf)));
  46. const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
  47. const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
  48. init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
  49. }
  50. __forceinline void init(const Vec3fa& reduced_min_org,
  51. const Vec3fa& reduced_max_org,
  52. const Vec3fa& reduced_min_rdir,
  53. const Vec3fa& reduced_max_rdir,
  54. float reduced_min_dist,
  55. float reduced_max_dist,
  56. int N)
  57. {
  58. const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
  59. min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
  60. max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
  61. #if defined (__aarch64__)
  62. neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
  63. neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
  64. #else
  65. min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
  66. max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
  67. #endif
  68. min_dist = reduced_min_dist;
  69. max_dist = reduced_max_dist;
  70. nf = NearFarPrecalculations(min_rdir, N);
  71. }
  72. template<int K>
  73. __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
  74. {
  75. max_dist = reduce_max(ray_tfar);
  76. }
  77. NearFarPrecalculations nf;
  78. Vec3fa min_rdir;
  79. Vec3fa max_rdir;
  80. #if defined (__aarch64__)
  81. Vec3fa neg_min_org_rdir;
  82. Vec3fa neg_max_org_rdir;
  83. #else
  84. Vec3fa min_org_rdir;
  85. Vec3fa max_org_rdir;
  86. #endif
  87. float min_dist;
  88. float max_dist;
  89. };
  90. typedef Frustum<false> FrustumFast;
  91. /* Robust variant */
  92. template<>
  93. struct Frustum<true>
  94. {
  95. __forceinline Frustum() {}
  96. template<int K>
  97. __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
  98. {
  99. const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
  100. reduce_min(select(valid, org.y, pos_inf)),
  101. reduce_min(select(valid, org.z, pos_inf)));
  102. const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
  103. reduce_max(select(valid, org.y, neg_inf)),
  104. reduce_max(select(valid, org.z, neg_inf)));
  105. const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
  106. reduce_min(select(valid, rdir.y, pos_inf)),
  107. reduce_min(select(valid, rdir.z, pos_inf)));
  108. const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
  109. reduce_max(select(valid, rdir.y, neg_inf)),
  110. reduce_max(select(valid, rdir.z, neg_inf)));
  111. const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
  112. const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
  113. init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
  114. }
  115. __forceinline void init(const Vec3fa& reduced_min_org,
  116. const Vec3fa& reduced_max_org,
  117. const Vec3fa& reduced_min_rdir,
  118. const Vec3fa& reduced_max_rdir,
  119. float reduced_min_dist,
  120. float reduced_max_dist,
  121. int N)
  122. {
  123. const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
  124. min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
  125. max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
  126. min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
  127. max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
  128. min_dist = reduced_min_dist;
  129. max_dist = reduced_max_dist;
  130. nf = NearFarPrecalculations(min_rdir, N);
  131. }
  132. template<int K>
  133. __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
  134. {
  135. max_dist = reduce_max(ray_tfar);
  136. }
  137. NearFarPrecalculations nf;
  138. Vec3fa min_rdir;
  139. Vec3fa max_rdir;
  140. Vec3fa min_org;
  141. Vec3fa max_org;
  142. float min_dist;
  143. float max_dist;
  144. };
  145. typedef Frustum<true> FrustumRobust;
  146. //////////////////////////////////////////////////////////////////////////////////////
  147. // Fast AABBNode intersection
  148. //////////////////////////////////////////////////////////////////////////////////////
  149. template<int N>
  150. __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
  151. const FrustumFast& frustum, vfloat<N>& dist)
  152. {
  153. const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
  154. const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
  155. const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
  156. const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
  157. const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
  158. const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
  159. #if defined (__aarch64__)
  160. const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
  161. const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
  162. const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
  163. const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
  164. const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
  165. const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
  166. #else
  167. const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
  168. const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
  169. const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
  170. const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
  171. const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
  172. const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
  173. #endif
  174. const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
  175. dist = fmin;
  176. const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
  177. const vbool<N> vmask_node_hit = fmin <= fmax;
  178. size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
  179. return m_node;
  180. }
  181. //////////////////////////////////////////////////////////////////////////////////////
  182. // Robust AABBNode intersection
  183. //////////////////////////////////////////////////////////////////////////////////////
  184. template<int N>
  185. __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
  186. const FrustumRobust& frustum, vfloat<N>& dist)
  187. {
  188. const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
  189. const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
  190. const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
  191. const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
  192. const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
  193. const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
  194. const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
  195. const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
  196. const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
  197. const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
  198. const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
  199. const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
  200. const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
  201. const float round_up = 1.0f+2.0f*float(ulp);
  202. const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
  203. dist = fmin;
  204. const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
  205. const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
  206. size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
  207. return m_node;
  208. }
  209. }
  210. }