node_intersector1.h 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "node_intersector.h"
  5. namespace embree
  6. {
  7. namespace isa
  8. {
  9. //////////////////////////////////////////////////////////////////////////////////////
  10. // Ray structure used in single-ray traversal
  11. //////////////////////////////////////////////////////////////////////////////////////
  12. template<int N, bool robust>
  13. struct TravRayBase;
  14. /* Base (without tnear and tfar) */
  15. template<int N>
  16. struct TravRayBase<N,false>
  17. {
  18. __forceinline TravRayBase() {}
  19. __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
  20. : org_xyz(ray_org), dir_xyz(ray_dir)
  21. {
  22. const Vec3fa ray_rdir = rcp_safe(ray_dir);
  23. org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
  24. dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
  25. rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z);
  26. #if defined(__AVX2__) || defined(__ARM_NEON)
  27. const Vec3fa ray_org_rdir = ray_org*ray_rdir;
  28. org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
  29. #endif
  30. nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
  31. nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
  32. nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
  33. farX = nearX ^ sizeof(vfloat<N>);
  34. farY = nearY ^ sizeof(vfloat<N>);
  35. farZ = nearZ ^ sizeof(vfloat<N>);
  36. }
  37. template<int K>
  38. __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
  39. const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
  40. size_t flip = sizeof(vfloat<N>))
  41. {
  42. org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
  43. dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
  44. rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
  45. #if defined(__AVX2__) || defined(__ARM_NEON)
  46. org_rdir = org*rdir;
  47. #endif
  48. nearX = nearXYZ.x[k];
  49. nearY = nearXYZ.y[k];
  50. nearZ = nearXYZ.z[k];
  51. farX = nearX ^ flip;
  52. farY = nearY ^ flip;
  53. farZ = nearZ ^ flip;
  54. }
  55. Vec3fa org_xyz, dir_xyz;
  56. Vec3vf<N> org, dir, rdir;
  57. #if defined(__AVX2__) || defined(__ARM_NEON)
  58. Vec3vf<N> org_rdir;
  59. #endif
  60. size_t nearX, nearY, nearZ;
  61. size_t farX, farY, farZ;
  62. };
  63. /* Base (without tnear and tfar) */
  64. template<int N>
  65. struct TravRayBase<N,true>
  66. {
  67. __forceinline TravRayBase() {}
  68. __forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
  69. : org_xyz(ray_org), dir_xyz(ray_dir)
  70. {
  71. const float round_down = 1.0f-3.0f*float(ulp);
  72. const float round_up = 1.0f+3.0f*float(ulp);
  73. const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir);
  74. const Vec3fa ray_rdir_near = round_down*ray_rdir;
  75. const Vec3fa ray_rdir_far = round_up *ray_rdir;
  76. org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
  77. dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
  78. rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z);
  79. rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z);
  80. nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
  81. nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
  82. nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
  83. farX = nearX ^ sizeof(vfloat<N>);
  84. farY = nearY ^ sizeof(vfloat<N>);
  85. farZ = nearZ ^ sizeof(vfloat<N>);
  86. }
  87. template<int K>
  88. __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
  89. const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
  90. size_t flip = sizeof(vfloat<N>))
  91. {
  92. const vfloat<N> round_down = 1.0f-3.0f*float(ulp);
  93. const vfloat<N> round_up = 1.0f+3.0f*float(ulp);
  94. org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
  95. dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
  96. rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
  97. rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
  98. nearX = nearXYZ.x[k];
  99. nearY = nearXYZ.y[k];
  100. nearZ = nearXYZ.z[k];
  101. farX = nearX ^ flip;
  102. farY = nearY ^ flip;
  103. farZ = nearZ ^ flip;
  104. }
  105. Vec3fa org_xyz, dir_xyz;
  106. Vec3vf<N> org, dir, rdir_near, rdir_far;
  107. size_t nearX, nearY, nearZ;
  108. size_t farX, farY, farZ;
  109. };
  110. /* Full (with tnear and tfar) */
  111. template<int N, bool robust>
  112. struct TravRay : TravRayBase<N,robust>
  113. {
  114. __forceinline TravRay() {}
  115. __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar)
  116. : TravRayBase<N,robust>(ray_org, ray_dir),
  117. tnear(ray_tnear), tfar(ray_tfar) {}
  118. template<int K>
  119. __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
  120. const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
  121. float ray_tnear, float ray_tfar,
  122. size_t flip = sizeof(vfloat<N>))
  123. {
  124. TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip);
  125. tnear = ray_tnear; tfar = ray_tfar;
  126. }
  127. vfloat<N> tnear;
  128. vfloat<N> tfar;
  129. };
  130. //////////////////////////////////////////////////////////////////////////////////////
  131. // Point Query structure used in single-ray traversal
  132. //////////////////////////////////////////////////////////////////////////////////////
  133. template<int N>
  134. struct TravPointQuery
  135. {
  136. __forceinline TravPointQuery() {}
  137. __forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad)
  138. {
  139. org = Vec3vf<N>(query_org.x, query_org.y, query_org.z);
  140. rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z);
  141. }
  142. __forceinline vfloat<N> const& tfar() const {
  143. return rad.x;
  144. }
  145. Vec3vf<N> org, rad;
  146. };
  147. //////////////////////////////////////////////////////////////////////////////////////
  148. // point query
  149. //////////////////////////////////////////////////////////////////////////////////////
  150. template<int N>
  151. __forceinline size_t pointQuerySphereDistAndMask(
  152. const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
  153. vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
  154. {
  155. const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
  156. const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
  157. const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
  158. dist = vX * vX + vY * vY + vZ * vZ;
  159. const vbool<N> vmask = dist <= query.tfar()*query.tfar();
  160. const vbool<N> valid = minX <= maxX;
  161. return movemask(vmask) & movemask(valid);
  162. }
  163. template<int N>
  164. __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  165. {
  166. const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
  167. const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
  168. const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
  169. const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
  170. const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
  171. const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
  172. return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
  173. }
  174. template<int N>
  175. __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  176. {
  177. const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
  178. const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
  179. const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
  180. const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
  181. const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
  182. const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
  183. const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
  184. const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
  185. const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
  186. const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
  187. const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
  188. const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
  189. return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
  190. }
  191. template<int N>
  192. __forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  193. {
  194. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  195. size_t mask = pointQueryNodeSphere(node, query, time, dist);
  196. if (unlikely(ref.isAABBNodeMB4D())) {
  197. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  198. const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
  199. mask &= movemask(vmask);
  200. }
  201. return mask;
  202. }
  203. template<int N>
  204. __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  205. {
  206. const vfloat<N> start_x(node->start.x);
  207. const vfloat<N> scale_x(node->scale.x);
  208. const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
  209. const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
  210. const vfloat<N> start_y(node->start.y);
  211. const vfloat<N> scale_y(node->scale.y);
  212. const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
  213. const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
  214. const vfloat<N> start_z(node->start.z);
  215. const vfloat<N> scale_z(node->scale.z);
  216. const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
  217. const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
  218. return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
  219. }
  220. template<int N>
  221. __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  222. {
  223. const vfloat<N> minX = node->dequantizeLowerX(time);
  224. const vfloat<N> maxX = node->dequantizeUpperX(time);
  225. const vfloat<N> minY = node->dequantizeLowerY(time);
  226. const vfloat<N> maxY = node->dequantizeUpperY(time);
  227. const vfloat<N> minZ = node->dequantizeLowerZ(time);
  228. const vfloat<N> maxZ = node->dequantizeUpperZ(time);
  229. return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
  230. }
  231. template<int N>
  232. __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  233. {
  234. // TODO: point query - implement
  235. const vbool<N> vmask = vbool<N>(true);
  236. const size_t mask = movemask(vmask) & ((1<<N)-1);
  237. dist = vfloat<N>(0.0f);
  238. return mask;
  239. }
  240. template<int N>
  241. __forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  242. {
  243. // TODO: point query - implement
  244. const vbool<N> vmask = vbool<N>(true);
  245. const size_t mask = movemask(vmask) & ((1<<N)-1);
  246. dist = vfloat<N>(0.0f);
  247. return mask;
  248. }
  249. template<int N>
  250. __forceinline size_t pointQueryAABBDistAndMask(
  251. const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
  252. vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
  253. {
  254. const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
  255. const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
  256. const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
  257. dist = vX * vX + vY * vY + vZ * vZ;
  258. const vbool<N> valid = minX <= maxX;
  259. const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) |
  260. (maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) |
  261. (maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z));
  262. return movemask(vmask) & movemask(valid);
  263. }
  264. template<int N>
  265. __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  266. {
  267. const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
  268. const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
  269. const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
  270. const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
  271. const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
  272. const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
  273. return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
  274. }
  275. template<int N>
  276. __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  277. {
  278. const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
  279. const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
  280. const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
  281. const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
  282. const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
  283. const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
  284. const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
  285. const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
  286. const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
  287. const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
  288. const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
  289. const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
  290. return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
  291. }
  292. template<int N>
  293. __forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  294. {
  295. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  296. size_t mask = pointQueryNodeAABB(node, query, time, dist);
  297. if (unlikely(ref.isAABBNodeMB4D())) {
  298. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  299. const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
  300. mask &= movemask(vmask);
  301. }
  302. return mask;
  303. }
  304. template<int N>
  305. __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  306. {
  307. const size_t mvalid = movemask(node->validMask());
  308. const vfloat<N> start_x(node->start.x);
  309. const vfloat<N> scale_x(node->scale.x);
  310. const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
  311. const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
  312. const vfloat<N> start_y(node->start.y);
  313. const vfloat<N> scale_y(node->scale.y);
  314. const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
  315. const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
  316. const vfloat<N> start_z(node->start.z);
  317. const vfloat<N> scale_z(node->scale.z);
  318. const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
  319. const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
  320. return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
  321. }
  322. template<int N>
  323. __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  324. {
  325. const size_t mvalid = movemask(node->validMask());
  326. const vfloat<N> minX = node->dequantizeLowerX(time);
  327. const vfloat<N> maxX = node->dequantizeUpperX(time);
  328. const vfloat<N> minY = node->dequantizeLowerY(time);
  329. const vfloat<N> maxY = node->dequantizeUpperY(time);
  330. const vfloat<N> minZ = node->dequantizeLowerZ(time);
  331. const vfloat<N> maxZ = node->dequantizeUpperZ(time);
  332. return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
  333. }
  334. template<int N>
  335. __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  336. {
  337. // TODO: point query - implement
  338. const vbool<N> vmask = vbool<N>(true);
  339. const size_t mask = movemask(vmask) & ((1<<N)-1);
  340. dist = vfloat<N>(0.0f);
  341. return mask;
  342. }
  343. template<int N>
  344. __forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  345. {
  346. // TODO: point query - implement
  347. const vbool<N> vmask = vbool<N>(true);
  348. const size_t mask = movemask(vmask) & ((1<<N)-1);
  349. dist = vfloat<N>(0.0f);
  350. return mask;
  351. }
  352. //////////////////////////////////////////////////////////////////////////////////////
  353. // Fast AABBNode intersection
  354. //////////////////////////////////////////////////////////////////////////////////////
  355. template<int N, bool robust>
  356. __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
  357. template<>
  358. __forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist)
  359. {
  360. #if defined(__AVX2__) || defined(__ARM_NEON)
  361. const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
  362. const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
  363. const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
  364. const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
  365. const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
  366. const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
  367. #else
  368. const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
  369. const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
  370. const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
  371. const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
  372. const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
  373. const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
  374. #endif
  375. #if defined(__aarch64__)
  376. const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear);
  377. const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar);
  378. const vbool4 vmask = asInt(tNear) <= asInt(tFar);
  379. const size_t mask = movemask(vmask);
  380. #elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
  381. const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  382. const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  383. const vbool4 vmask = asInt(tNear) > asInt(tFar);
  384. const size_t mask = movemask(vmask) ^ ((1<<4)-1);
  385. #elif defined(__AVX512F__) // SKX
  386. const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  387. const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  388. const vbool4 vmask = asInt(tNear) <= asInt(tFar);
  389. const size_t mask = movemask(vmask);
  390. #else
  391. const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  392. const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  393. const vbool4 vmask = tNear <= tFar;
  394. const size_t mask = movemask(vmask);
  395. #endif
  396. dist = tNear;
  397. return mask;
  398. }
  399. #if defined(__AVX__)
  400. template<>
  401. __forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist)
  402. {
  403. #if defined(__AVX2__) || defined(__ARM_NEON)
  404. const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
  405. const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
  406. const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
  407. const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
  408. const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
  409. const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
  410. #else
  411. const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
  412. const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
  413. const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
  414. const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
  415. const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
  416. const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
  417. #endif
  418. #if defined(__AVX2__) && !defined(__AVX512F__) // HSW
  419. const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  420. const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  421. const vbool8 vmask = asInt(tNear) > asInt(tFar);
  422. const size_t mask = movemask(vmask) ^ ((1<<8)-1);
  423. #elif defined(__AVX512F__) // SKX
  424. const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  425. const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  426. const vbool8 vmask = asInt(tNear) <= asInt(tFar);
  427. const size_t mask = movemask(vmask);
  428. #else
  429. const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  430. const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  431. const vbool8 vmask = tNear <= tFar;
  432. const size_t mask = movemask(vmask);
  433. #endif
  434. dist = tNear;
  435. return mask;
  436. }
  437. #endif
  438. //////////////////////////////////////////////////////////////////////////////////////
  439. // Robust AABBNode intersection
  440. //////////////////////////////////////////////////////////////////////////////////////
  441. template<int N>
  442. __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
  443. {
  444. const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
  445. const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
  446. const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
  447. const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
  448. const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
  449. const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
  450. const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  451. const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  452. const vbool<N> vmask = tNear <= tFar;
  453. const size_t mask = movemask(vmask);
  454. dist = tNear;
  455. return mask;
  456. }
  457. //////////////////////////////////////////////////////////////////////////////////////
  458. // Fast AABBNodeMB intersection
  459. //////////////////////////////////////////////////////////////////////////////////////
  460. template<int N>
  461. __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
  462. {
  463. const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
  464. const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
  465. const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
  466. const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
  467. const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
  468. const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
  469. #if defined(__AVX2__) || defined(__ARM_NEON)
  470. const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
  471. const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
  472. const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
  473. const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
  474. const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
  475. const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
  476. #else
  477. const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
  478. const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
  479. const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
  480. const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
  481. const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
  482. const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
  483. #endif
  484. #if defined(__AVX2__) && !defined(__AVX512F__) // HSW
  485. const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  486. const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  487. const vbool<N> vmask = asInt(tNear) > asInt(tFar);
  488. const size_t mask = movemask(vmask) ^ ((1<<N)-1);
  489. #elif defined(__AVX512F__) // SKX
  490. const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  491. const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  492. const vbool<N> vmask = asInt(tNear) <= asInt(tFar);
  493. const size_t mask = movemask(vmask);
  494. #else
  495. const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
  496. const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
  497. const vbool<N> vmask = tNear <= tFar;
  498. const size_t mask = movemask(vmask);
  499. #endif
  500. dist = tNear;
  501. return mask;
  502. }
  503. //////////////////////////////////////////////////////////////////////////////////////
  504. // Robust AABBNodeMB intersection
  505. //////////////////////////////////////////////////////////////////////////////////////
  506. template<int N>
  507. __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
  508. {
  509. const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
  510. const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
  511. const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
  512. const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
  513. const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
  514. const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
  515. const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
  516. const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
  517. const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
  518. const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
  519. const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
  520. const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
  521. const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
  522. const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
  523. const size_t mask = movemask(tNear <= tFar);
  524. dist = tNear;
  525. return mask;
  526. }
  527. //////////////////////////////////////////////////////////////////////////////////////
  528. // Fast AABBNodeMB4D intersection
  529. //////////////////////////////////////////////////////////////////////////////////////
  530. template<int N>
  531. __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
  532. {
  533. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  534. const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
  535. const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
  536. const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
  537. const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
  538. const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
  539. const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
  540. #if defined (__AVX2__) || defined(__ARM_NEON)
  541. const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
  542. const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
  543. const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
  544. const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
  545. const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
  546. const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
  547. #else
  548. const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
  549. const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
  550. const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
  551. const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
  552. const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
  553. const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
  554. #endif
  555. #if defined(__AVX2__) && !defined(__AVX512F__)
  556. const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear));
  557. const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar ));
  558. #else
  559. const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
  560. const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
  561. #endif
  562. vbool<N> vmask = tNear <= tFar;
  563. if (unlikely(ref.isAABBNodeMB4D())) {
  564. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  565. vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
  566. }
  567. const size_t mask = movemask(vmask);
  568. dist = tNear;
  569. return mask;
  570. }
  571. //////////////////////////////////////////////////////////////////////////////////////
  572. // Robust AABBNodeMB4D intersection
  573. //////////////////////////////////////////////////////////////////////////////////////
  574. template<int N>
  575. __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
  576. {
  577. const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
  578. const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
  579. const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
  580. const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
  581. const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
  582. const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
  583. const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
  584. const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
  585. const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
  586. const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
  587. const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
  588. const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
  589. const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
  590. const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
  591. const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
  592. vbool<N> vmask = tNear <= tFar;
  593. if (unlikely(ref.isAABBNodeMB4D())) {
  594. const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
  595. vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
  596. }
  597. const size_t mask = movemask(vmask);
  598. dist = tNear;
  599. return mask;
  600. }
  601. //////////////////////////////////////////////////////////////////////////////////////
  602. // Fast QuantizedBaseNode intersection
  603. //////////////////////////////////////////////////////////////////////////////////////
  604. template<int N, bool robust>
  605. __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
  606. template<>
  607. __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist)
  608. {
  609. const size_t mvalid = movemask(node->validMask());
  610. const vfloat4 start_x(node->start.x);
  611. const vfloat4 scale_x(node->scale.x);
  612. const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
  613. const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
  614. const vfloat4 start_y(node->start.y);
  615. const vfloat4 scale_y(node->scale.y);
  616. const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
  617. const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
  618. const vfloat4 start_z(node->start.z);
  619. const vfloat4 scale_z(node->scale.z);
  620. const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
  621. const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
  622. #if defined(__AVX2__) || defined(__ARM_NEON)
  623. const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
  624. const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
  625. const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
  626. const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
  627. const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
  628. const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
  629. #else
  630. const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
  631. const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
  632. const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
  633. const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
  634. const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
  635. const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
  636. #endif
  637. #if defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
  638. const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  639. const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  640. const vbool4 vmask = asInt(tNear) > asInt(tFar);
  641. const size_t mask = movemask(vmask) ^ ((1<<4)-1);
  642. #elif defined(__AVX512F__) // SKX
  643. const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  644. const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  645. const vbool4 vmask = asInt(tNear) <= asInt(tFar);
  646. const size_t mask = movemask(vmask);
  647. #else
  648. const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  649. const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  650. const vbool4 vmask = tNear <= tFar;
  651. const size_t mask = movemask(vmask);
  652. #endif
  653. dist = tNear;
  654. return mask & mvalid;
  655. }
  656. template<>
  657. __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist)
  658. {
  659. const size_t mvalid = movemask(node->validMask());
  660. const vfloat4 start_x(node->start.x);
  661. const vfloat4 scale_x(node->scale.x);
  662. const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
  663. const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
  664. const vfloat4 start_y(node->start.y);
  665. const vfloat4 scale_y(node->scale.y);
  666. const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
  667. const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
  668. const vfloat4 start_z(node->start.z);
  669. const vfloat4 scale_z(node->scale.z);
  670. const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
  671. const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
  672. const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
  673. const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
  674. const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
  675. const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
  676. const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
  677. const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
  678. const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  679. const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  680. const vbool4 vmask = tNear <= tFar;
  681. const size_t mask = movemask(vmask);
  682. dist = tNear;
  683. return mask & mvalid;
  684. }
  685. #if defined(__AVX__)
  686. template<>
  687. __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist)
  688. {
  689. const size_t mvalid = movemask(node->validMask());
  690. const vfloat8 start_x(node->start.x);
  691. const vfloat8 scale_x(node->scale.x);
  692. const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
  693. const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
  694. const vfloat8 start_y(node->start.y);
  695. const vfloat8 scale_y(node->scale.y);
  696. const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
  697. const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
  698. const vfloat8 start_z(node->start.z);
  699. const vfloat8 scale_z(node->scale.z);
  700. const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
  701. const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
  702. #if defined(__AVX2__) || defined(__ARM_NEON)
  703. const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
  704. const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
  705. const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
  706. const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
  707. const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
  708. const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
  709. #else
  710. const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
  711. const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
  712. const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
  713. const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
  714. const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
  715. const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
  716. #endif
  717. #if defined(__AVX2__) && !defined(__AVX512F__) // HSW
  718. const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  719. const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  720. const vbool8 vmask = asInt(tNear) > asInt(tFar);
  721. const size_t mask = movemask(vmask) ^ ((1<<8)-1);
  722. #elif defined(__AVX512F__) // SKX
  723. const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
  724. const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
  725. const vbool8 vmask = asInt(tNear) <= asInt(tFar);
  726. const size_t mask = movemask(vmask);
  727. #else
  728. const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  729. const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  730. const vbool8 vmask = tNear <= tFar;
  731. const size_t mask = movemask(vmask);
  732. #endif
  733. dist = tNear;
  734. return mask & mvalid;
  735. }
  736. template<>
  737. __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist)
  738. {
  739. const size_t mvalid = movemask(node->validMask());
  740. const vfloat8 start_x(node->start.x);
  741. const vfloat8 scale_x(node->scale.x);
  742. const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
  743. const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
  744. const vfloat8 start_y(node->start.y);
  745. const vfloat8 scale_y(node->scale.y);
  746. const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
  747. const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
  748. const vfloat8 start_z(node->start.z);
  749. const vfloat8 scale_z(node->scale.z);
  750. const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
  751. const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
  752. const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
  753. const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
  754. const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
  755. const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
  756. const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
  757. const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
  758. const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
  759. const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
  760. const vbool8 vmask = tNear <= tFar;
  761. const size_t mask = movemask(vmask);
  762. dist = tNear;
  763. return mask & mvalid;
  764. }
  765. #endif
  766. template<int N>
  767. __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
  768. {
  769. const vboolf<N> mvalid = node->validMask();
  770. const vfloat<N> lower_x = node->dequantizeLowerX(time);
  771. const vfloat<N> upper_x = node->dequantizeUpperX(time);
  772. const vfloat<N> lower_y = node->dequantizeLowerY(time);
  773. const vfloat<N> upper_y = node->dequantizeUpperY(time);
  774. const vfloat<N> lower_z = node->dequantizeLowerZ(time);
  775. const vfloat<N> upper_z = node->dequantizeUpperZ(time);
  776. #if defined(__AVX2__) || defined(__ARM_NEON)
  777. const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
  778. const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
  779. const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
  780. const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
  781. const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
  782. const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
  783. #else
  784. const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x;
  785. const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y;
  786. const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
  787. const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x;
  788. const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y;
  789. const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
  790. #endif
  791. const vfloat<N> tminX = mini(tNearX,tFarX);
  792. const vfloat<N> tmaxX = maxi(tNearX,tFarX);
  793. const vfloat<N> tminY = mini(tNearY,tFarY);
  794. const vfloat<N> tmaxY = maxi(tNearY,tFarY);
  795. const vfloat<N> tminZ = mini(tNearZ,tFarZ);
  796. const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
  797. const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
  798. const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
  799. #if defined(__AVX512F__) // SKX
  800. const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
  801. #else
  802. const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
  803. #endif
  804. const size_t mask = movemask(vmask);
  805. dist = tNear;
  806. return mask;
  807. }
  808. template<int N>
  809. __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
  810. {
  811. const vboolf<N> mvalid = node->validMask();
  812. const vfloat<N> lower_x = node->dequantizeLowerX(time);
  813. const vfloat<N> upper_x = node->dequantizeUpperX(time);
  814. const vfloat<N> lower_y = node->dequantizeLowerY(time);
  815. const vfloat<N> upper_y = node->dequantizeUpperY(time);
  816. const vfloat<N> lower_z = node->dequantizeLowerZ(time);
  817. const vfloat<N> upper_z = node->dequantizeUpperZ(time);
  818. const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
  819. const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
  820. const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
  821. const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
  822. const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
  823. const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
  824. const vfloat<N> tminX = mini(tNearX,tFarX);
  825. const vfloat<N> tmaxX = maxi(tNearX,tFarX);
  826. const vfloat<N> tminY = mini(tNearY,tFarY);
  827. const vfloat<N> tmaxY = maxi(tNearY,tFarY);
  828. const vfloat<N> tminZ = mini(tNearZ,tFarZ);
  829. const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
  830. const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
  831. const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
  832. #if defined(__AVX512F__) // SKX
  833. const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
  834. #else
  835. const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
  836. #endif
  837. const size_t mask = movemask(vmask);
  838. dist = tNear;
  839. return mask;
  840. }
  841. //////////////////////////////////////////////////////////////////////////////////////
  842. // Fast OBBNode intersection
  843. //////////////////////////////////////////////////////////////////////////////////////
  844. template<int N, bool robust>
  845. __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist)
  846. {
  847. const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir);
  848. //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir;
  849. const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir);
  850. const Vec3vf<N> org = xfmPoint(node->naabb,ray.org);
  851. const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir;
  852. const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir;
  853. const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
  854. const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
  855. const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
  856. const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
  857. const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
  858. const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
  859. vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
  860. vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
  861. if (robust) {
  862. tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
  863. tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
  864. }
  865. const vbool<N> vmask = tNear <= tFar;
  866. dist = tNear;
  867. return movemask(vmask);
  868. }
  869. //////////////////////////////////////////////////////////////////////////////////////
  870. // Fast OBBNodeMB intersection
  871. //////////////////////////////////////////////////////////////////////////////////////
  872. template<int N, bool robust>
  873. __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist)
  874. {
  875. const AffineSpace3vf<N> xfm = node->space0;
  876. const Vec3vf<N> b0_lower = zero;
  877. const Vec3vf<N> b0_upper = one;
  878. const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time));
  879. const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time));
  880. const BBox3vf<N> bounds(lower,upper);
  881. const Vec3vf<N> dir = xfmVector(xfm,ray.dir);
  882. const Vec3vf<N> rdir = rcp_safe(dir);
  883. const Vec3vf<N> org = xfmPoint(xfm,ray.org);
  884. const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir;
  885. const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir;
  886. const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
  887. const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
  888. const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
  889. const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
  890. const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
  891. const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
  892. vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
  893. vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
  894. if (robust) {
  895. tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
  896. tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
  897. }
  898. const vbool<N> vmask = tNear <= tFar;
  899. dist = tNear;
  900. return movemask(vmask);
  901. }
  902. //////////////////////////////////////////////////////////////////////////////////////
  903. // Node intersectors used in point query raversal
  904. //////////////////////////////////////////////////////////////////////////////////////
  905. /*! Computes traversal information for N nodes with 1 point query */
  906. template<int N, int types>
  907. struct BVHNNodePointQuerySphere1;
  908. template<int N>
  909. struct BVHNNodePointQuerySphere1<N, BVH_AN1>
  910. {
  911. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  912. {
  913. if (unlikely(node.isLeaf())) return false;
  914. mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
  915. return true;
  916. }
  917. };
  918. template<int N>
  919. struct BVHNNodePointQuerySphere1<N, BVH_AN2>
  920. {
  921. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  922. {
  923. if (unlikely(node.isLeaf())) return false;
  924. mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
  925. return true;
  926. }
  927. };
  928. template<int N>
  929. struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D>
  930. {
  931. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  932. {
  933. if (unlikely(node.isLeaf())) return false;
  934. mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist);
  935. return true;
  936. }
  937. };
  938. template<int N>
  939. struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1>
  940. {
  941. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  942. {
  943. if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
  944. else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist);
  945. else return false;
  946. return true;
  947. }
  948. };
  949. template<int N>
  950. struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2>
  951. {
  952. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  953. {
  954. if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
  955. else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
  956. else return false;
  957. return true;
  958. }
  959. };
  960. template<int N>
  961. struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2>
  962. {
  963. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  964. {
  965. if (unlikely(node.isLeaf())) return false;
  966. if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
  967. else mask = pointQueryNodeSphereMB4D(node, query, time, dist);
  968. return true;
  969. }
  970. };
  971. template<int N>
  972. struct BVHNNodePointQuerySphere1<N, BVH_QN1>
  973. {
  974. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  975. {
  976. if (unlikely(node.isLeaf())) return false;
  977. mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
  978. return true;
  979. }
  980. };
  981. template<int N>
  982. struct BVHNQuantizedBaseNodePointQuerySphere1
  983. {
  984. static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  985. {
  986. return pointQueryNodeSphere(node,query,dist);
  987. }
  988. static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  989. {
  990. return pointQueryNodeSphere(node,query,time,dist);
  991. }
  992. };
  993. /*! Computes traversal information for N nodes with 1 point query */
  994. template<int N, int types>
  995. struct BVHNNodePointQueryAABB1;
  996. template<int N>
  997. struct BVHNNodePointQueryAABB1<N, BVH_AN1>
  998. {
  999. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1000. {
  1001. if (unlikely(node.isLeaf())) return false;
  1002. mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
  1003. return true;
  1004. }
  1005. };
  1006. template<int N>
  1007. struct BVHNNodePointQueryAABB1<N, BVH_AN2>
  1008. {
  1009. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1010. {
  1011. if (unlikely(node.isLeaf())) return false;
  1012. mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
  1013. return true;
  1014. }
  1015. };
  1016. template<int N>
  1017. struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D>
  1018. {
  1019. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1020. {
  1021. if (unlikely(node.isLeaf())) return false;
  1022. mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist);
  1023. return true;
  1024. }
  1025. };
  1026. template<int N>
  1027. struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1>
  1028. {
  1029. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1030. {
  1031. if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
  1032. else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist);
  1033. else return false;
  1034. return true;
  1035. }
  1036. };
  1037. template<int N>
  1038. struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2>
  1039. {
  1040. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1041. {
  1042. if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
  1043. else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
  1044. else return false;
  1045. return true;
  1046. }
  1047. };
  1048. template<int N>
  1049. struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2>
  1050. {
  1051. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1052. {
  1053. if (unlikely(node.isLeaf())) return false;
  1054. if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
  1055. else mask = pointQueryNodeAABBMB4D(node, query, time, dist);
  1056. return true;
  1057. }
  1058. };
  1059. template<int N>
  1060. struct BVHNNodePointQueryAABB1<N, BVH_QN1>
  1061. {
  1062. static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
  1063. {
  1064. if (unlikely(node.isLeaf())) return false;
  1065. mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
  1066. return true;
  1067. }
  1068. };
  1069. template<int N>
  1070. struct BVHNQuantizedBaseNodePointQueryAABB1
  1071. {
  1072. static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
  1073. {
  1074. return pointQueryNodeAABB(node,query,dist);
  1075. }
  1076. static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
  1077. {
  1078. return pointQueryNodeAABB(node,query,time,dist);
  1079. }
  1080. };
  1081. //////////////////////////////////////////////////////////////////////////////////////
  1082. // Node intersectors used in ray traversal
  1083. //////////////////////////////////////////////////////////////////////////////////////
  1084. /*! Intersects N nodes with 1 ray */
  1085. template<int N, int types, bool robust>
  1086. struct BVHNNodeIntersector1;
  1087. template<int N>
  1088. struct BVHNNodeIntersector1<N, BVH_AN1, false>
  1089. {
  1090. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1091. {
  1092. if (unlikely(node.isLeaf())) return false;
  1093. mask = intersectNode(node.getAABBNode(), ray, dist);
  1094. return true;
  1095. }
  1096. };
  1097. template<int N>
  1098. struct BVHNNodeIntersector1<N, BVH_AN1, true>
  1099. {
  1100. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1101. {
  1102. if (unlikely(node.isLeaf())) return false;
  1103. mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
  1104. return true;
  1105. }
  1106. };
  1107. template<int N>
  1108. struct BVHNNodeIntersector1<N, BVH_AN2, false>
  1109. {
  1110. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1111. {
  1112. if (unlikely(node.isLeaf())) return false;
  1113. mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
  1114. return true;
  1115. }
  1116. };
  1117. template<int N>
  1118. struct BVHNNodeIntersector1<N, BVH_AN2, true>
  1119. {
  1120. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1121. {
  1122. if (unlikely(node.isLeaf())) return false;
  1123. mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
  1124. return true;
  1125. }
  1126. };
  1127. template<int N>
  1128. struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false>
  1129. {
  1130. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1131. {
  1132. if (unlikely(node.isLeaf())) return false;
  1133. mask = intersectNodeMB4D<N>(node, ray, time, dist);
  1134. return true;
  1135. }
  1136. };
  1137. template<int N>
  1138. struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true>
  1139. {
  1140. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1141. {
  1142. if (unlikely(node.isLeaf())) return false;
  1143. mask = intersectNodeMB4DRobust<N>(node, ray, time, dist);
  1144. return true;
  1145. }
  1146. };
  1147. template<int N>
  1148. struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false>
  1149. {
  1150. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1151. {
  1152. if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist);
  1153. else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
  1154. else return false;
  1155. return true;
  1156. }
  1157. };
  1158. template<int N>
  1159. struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true>
  1160. {
  1161. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1162. {
  1163. if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
  1164. else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
  1165. else return false;
  1166. return true;
  1167. }
  1168. };
  1169. template<int N>
  1170. struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false>
  1171. {
  1172. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1173. {
  1174. if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
  1175. else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
  1176. else return false;
  1177. return true;
  1178. }
  1179. };
  1180. template<int N>
  1181. struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true>
  1182. {
  1183. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1184. {
  1185. if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
  1186. else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
  1187. else return false;
  1188. return true;
  1189. }
  1190. };
  1191. template<int N>
  1192. struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false>
  1193. {
  1194. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1195. {
  1196. if (unlikely(node.isLeaf())) return false;
  1197. if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
  1198. else mask = intersectNodeMB4D(node, ray, time, dist);
  1199. return true;
  1200. }
  1201. };
  1202. template<int N>
  1203. struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true>
  1204. {
  1205. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1206. {
  1207. if (unlikely(node.isLeaf())) return false;
  1208. if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
  1209. else mask = intersectNodeMB4DRobust(node, ray, time, dist);
  1210. return true;
  1211. }
  1212. };
  1213. template<int N>
  1214. struct BVHNNodeIntersector1<N, BVH_QN1, false>
  1215. {
  1216. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
  1217. {
  1218. if (unlikely(node.isLeaf())) return false;
  1219. mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
  1220. return true;
  1221. }
  1222. };
  1223. template<int N>
  1224. struct BVHNNodeIntersector1<N, BVH_QN1, true>
  1225. {
  1226. static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
  1227. {
  1228. if (unlikely(node.isLeaf())) return false;
  1229. mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
  1230. return true;
  1231. }
  1232. };
  1233. /*! Intersects N nodes with K rays */
  1234. template<int N, bool robust>
  1235. struct BVHNQuantizedBaseNodeIntersector1;
  1236. template<int N>
  1237. struct BVHNQuantizedBaseNodeIntersector1<N, false>
  1238. {
  1239. static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist)
  1240. {
  1241. return intersectNode(node,ray,dist);
  1242. }
  1243. static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
  1244. {
  1245. return intersectNode(node,ray,time,dist);
  1246. }
  1247. };
  1248. template<int N>
  1249. struct BVHNQuantizedBaseNodeIntersector1<N, true>
  1250. {
  1251. static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
  1252. {
  1253. return intersectNode(node,ray,dist);
  1254. }
  1255. static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
  1256. {
  1257. return intersectNode(node,ray,time,dist);
  1258. }
  1259. };
  1260. }
  1261. }