2
0

node_intersector_packet_stream.h 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "node_intersector.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. //////////////////////////////////////////////////////////////////////////////////////
  10. // Ray packet structure used in stream traversal
  11. //////////////////////////////////////////////////////////////////////////////////////
  12. template<int K, bool robust>
  13. struct TravRayKStream;
  14. /* Fast variant */
  15. template<int K>
  16. struct TravRayKStream<K, false>
  17. {
  18. __forceinline TravRayKStream() {}
  19. __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
  20. {
  21. init(ray_org, ray_dir);
  22. tnear = ray_tnear;
  23. tfar = ray_tfar;
  24. }
  25. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
  26. {
  27. rdir = rcp_safe(ray_dir);
  28. org_rdir = ray_org * rdir;
  29. }
  30. Vec3vf<K> rdir;
  31. Vec3vf<K> org_rdir;
  32. vfloat<K> tnear;
  33. vfloat<K> tfar;
  34. };
  35. template<int K>
  36. using TravRayKStreamFast = TravRayKStream<K, false>;
  37. /* Robust variant */
  38. template<int K>
  39. struct TravRayKStream<K, true>
  40. {
  41. __forceinline TravRayKStream() {}
  42. __forceinline TravRayKStream(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar)
  43. {
  44. init(ray_org, ray_dir);
  45. tnear = ray_tnear;
  46. tfar = ray_tfar;
  47. }
  48. __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir)
  49. {
  50. rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
  51. org = ray_org;
  52. }
  53. Vec3vf<K> rdir;
  54. Vec3vf<K> org;
  55. vfloat<K> tnear;
  56. vfloat<K> tfar;
  57. };
  58. template<int K>
  59. using TravRayKStreamRobust = TravRayKStream<K, true>;
  60. //////////////////////////////////////////////////////////////////////////////////////
  61. // Fast AABBNode intersection
  62. //////////////////////////////////////////////////////////////////////////////////////
  63. template<int N, int K>
  64. __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
  65. const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf)
  66. {
  67. const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
  68. const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
  69. const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
  70. const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
  71. const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
  72. const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
  73. const vfloat<N> rminX = msub(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
  74. const vfloat<N> rminY = msub(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
  75. const vfloat<N> rminZ = msub(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
  76. const vfloat<N> rmaxX = msub(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k]));
  77. const vfloat<N> rmaxY = msub(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k]));
  78. const vfloat<N> rmaxZ = msub(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k]));
  79. const vfloat<N> rmin = maxi(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
  80. const vfloat<N> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
  81. const vbool<N> vmask_first_hit = rmin <= rmax;
  82. return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
  83. }
  84. template<int N, int K>
  85. __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
  86. const TravRayKStreamFast<K>& ray, const NearFarPrecalculations& nf)
  87. {
  88. char* ptr = (char*)&node->lower_x + i*sizeof(float);
  89. const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
  90. const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
  91. const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
  92. const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
  93. const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
  94. const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
  95. const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x);
  96. const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y);
  97. const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z);
  98. const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x);
  99. const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y);
  100. const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z);
  101. const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear);
  102. const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar);
  103. const vbool<K> vmask_first_hit = rmin <= rmax;
  104. return movemask(vmask_first_hit);
  105. }
  106. //////////////////////////////////////////////////////////////////////////////////////
  107. // Robust AABBNode intersection
  108. //////////////////////////////////////////////////////////////////////////////////////
  109. template<int N, int K>
  110. __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node,
  111. const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf)
  112. {
  113. const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX));
  114. const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY));
  115. const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ));
  116. const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX));
  117. const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY));
  118. const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ));
  119. const vfloat<N> rminX = (bminX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
  120. const vfloat<N> rminY = (bminY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
  121. const vfloat<N> rminZ = (bminZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
  122. const vfloat<N> rmaxX = (bmaxX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]);
  123. const vfloat<N> rmaxY = (bmaxY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]);
  124. const vfloat<N> rmaxZ = (bmaxZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]);
  125. const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
  126. const vfloat<N> rmin = max(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k]));
  127. const vfloat<N> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k]));
  128. const vbool<N> vmask_first_hit = rmin <= rmax;
  129. return movemask(vmask_first_hit) & (((size_t)1 << N)-1);
  130. }
  131. template<int N, int K>
  132. __forceinline size_t intersectNodeK(const typename BVHN<N>::AABBNode* __restrict__ node, size_t i,
  133. const TravRayKStreamRobust<K>& ray, const NearFarPrecalculations& nf)
  134. {
  135. char *ptr = (char*)&node->lower_x + i*sizeof(float);
  136. const vfloat<K> bminX = *(const float*)(ptr + nf.nearX);
  137. const vfloat<K> bminY = *(const float*)(ptr + nf.nearY);
  138. const vfloat<K> bminZ = *(const float*)(ptr + nf.nearZ);
  139. const vfloat<K> bmaxX = *(const float*)(ptr + nf.farX);
  140. const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY);
  141. const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ);
  142. const vfloat<K> rminX = (bminX - ray.org.x) * ray.rdir.x;
  143. const vfloat<K> rminY = (bminY - ray.org.y) * ray.rdir.y;
  144. const vfloat<K> rminZ = (bminZ - ray.org.z) * ray.rdir.z;
  145. const vfloat<K> rmaxX = (bmaxX - ray.org.x) * ray.rdir.x;
  146. const vfloat<K> rmaxY = (bmaxY - ray.org.y) * ray.rdir.y;
  147. const vfloat<K> rmaxZ = (bmaxZ - ray.org.z) * ray.rdir.z;
  148. const float round_up = 1.0f+3.0f*float(ulp);
  149. const vfloat<K> rmin = max(rminX, rminY, rminZ, vfloat<K>(ray.tnear));
  150. const vfloat<K> rmax = round_up * min(rmaxX, rmaxY, rmaxZ, vfloat<K>(ray.tfar));
  151. const vbool<K> vmask_first_hit = rmin <= rmax;
  152. return movemask(vmask_first_hit);
  153. }
  154. }
  155. }