DVec3.inl 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #pragma once
  5. #include <Jolt/Core/HashCombine.h>
  6. // Create a std::hash/JPH::Hash for DVec3
  7. JPH_MAKE_HASHABLE(JPH::DVec3, t.GetX(), t.GetY(), t.GetZ())
  8. JPH_NAMESPACE_BEGIN
  9. DVec3::DVec3(Vec3Arg inRHS)
  10. {
  11. #if defined(JPH_USE_AVX)
  12. mValue = _mm256_cvtps_pd(inRHS.mValue);
  13. #elif defined(JPH_USE_SSE)
  14. mValue.mLow = _mm_cvtps_pd(inRHS.mValue);
  15. mValue.mHigh = _mm_cvtps_pd(_mm_shuffle_ps(inRHS.mValue, inRHS.mValue, _MM_SHUFFLE(2, 2, 2, 2)));
  16. #elif defined(JPH_USE_NEON)
  17. mValue.val[0] = vcvt_f64_f32(vget_low_f32(inRHS.mValue));
  18. mValue.val[1] = vcvt_high_f64_f32(inRHS.mValue);
  19. #else
  20. mF64[0] = (double)inRHS.GetX();
  21. mF64[1] = (double)inRHS.GetY();
  22. mF64[2] = (double)inRHS.GetZ();
  23. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  24. mF64[3] = mF64[2];
  25. #endif
  26. #endif
  27. }
  28. DVec3::DVec3(Vec4Arg inRHS) :
  29. DVec3(Vec3(inRHS))
  30. {
  31. }
  32. DVec3::DVec3(double inX, double inY, double inZ)
  33. {
  34. #if defined(JPH_USE_AVX)
  35. mValue = _mm256_set_pd(inZ, inZ, inY, inX); // Assure Z and W are the same
  36. #elif defined(JPH_USE_SSE)
  37. mValue.mLow = _mm_set_pd(inY, inX);
  38. mValue.mHigh = _mm_set1_pd(inZ);
  39. #elif defined(JPH_USE_NEON)
  40. mValue.val[0] = vcombine_f64(vcreate_f64(BitCast<uint64>(inX)), vcreate_f64(BitCast<uint64>(inY)));
  41. mValue.val[1] = vdupq_n_f64(inZ);
  42. #else
  43. mF64[0] = inX;
  44. mF64[1] = inY;
  45. mF64[2] = inZ;
  46. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  47. mF64[3] = mF64[2];
  48. #endif
  49. #endif
  50. }
  51. DVec3::DVec3(const Double3 &inV)
  52. {
  53. #if defined(JPH_USE_AVX)
  54. Type x = _mm256_castpd128_pd256(_mm_load_sd(&inV.x));
  55. Type y = _mm256_castpd128_pd256(_mm_load_sd(&inV.y));
  56. Type z = _mm256_broadcast_sd(&inV.z);
  57. Type xy = _mm256_unpacklo_pd(x, y);
  58. mValue = _mm256_blend_pd(xy, z, 0b1100); // Assure Z and W are the same
  59. #elif defined(JPH_USE_SSE)
  60. mValue.mLow = _mm_loadu_pd(&inV.x);
  61. mValue.mHigh = _mm_set1_pd(inV.z);
  62. #elif defined(JPH_USE_NEON)
  63. mValue.val[0] = vld1q_f64(&inV.x);
  64. mValue.val[1] = vdupq_n_f64(inV.z);
  65. #else
  66. mF64[0] = inV.x;
  67. mF64[1] = inV.y;
  68. mF64[2] = inV.z;
  69. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  70. mF64[3] = mF64[2];
  71. #endif
  72. #endif
  73. }
  74. void DVec3::CheckW() const
  75. {
  76. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  77. // Avoid asserts when both components are NaN
  78. JPH_ASSERT(reinterpret_cast<const uint64 *>(mF64)[2] == reinterpret_cast<const uint64 *>(mF64)[3]);
  79. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  80. }
  81. /// Internal helper function that ensures that the Z component is replicated to the W component to prevent divisions by zero
  82. DVec3::Type DVec3::sFixW(TypeArg inValue)
  83. {
  84. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  85. #if defined(JPH_USE_AVX)
  86. return _mm256_shuffle_pd(inValue, inValue, 2);
  87. #elif defined(JPH_USE_SSE)
  88. Type value;
  89. value.mLow = inValue.mLow;
  90. value.mHigh = _mm_shuffle_pd(inValue.mHigh, inValue.mHigh, 0);
  91. return value;
  92. #elif defined(JPH_USE_NEON)
  93. Type value;
  94. value.val[0] = inValue.val[0];
  95. value.val[1] = vdupq_laneq_f64(inValue.val[1], 0);
  96. return value;
  97. #else
  98. Type value;
  99. value.mData[0] = inValue.mData[0];
  100. value.mData[1] = inValue.mData[1];
  101. value.mData[2] = inValue.mData[2];
  102. value.mData[3] = inValue.mData[2];
  103. return value;
  104. #endif
  105. #else
  106. return inValue;
  107. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  108. }
  109. DVec3 DVec3::sZero()
  110. {
  111. #if defined(JPH_USE_AVX)
  112. return _mm256_setzero_pd();
  113. #elif defined(JPH_USE_SSE)
  114. __m128d zero = _mm_setzero_pd();
  115. return DVec3({ zero, zero });
  116. #elif defined(JPH_USE_NEON)
  117. float64x2_t zero = vdupq_n_f64(0.0);
  118. return DVec3({ zero, zero });
  119. #else
  120. return DVec3(0, 0, 0);
  121. #endif
  122. }
  123. DVec3 DVec3::sReplicate(double inV)
  124. {
  125. #if defined(JPH_USE_AVX)
  126. return _mm256_set1_pd(inV);
  127. #elif defined(JPH_USE_SSE)
  128. __m128d value = _mm_set1_pd(inV);
  129. return DVec3({ value, value });
  130. #elif defined(JPH_USE_NEON)
  131. float64x2_t value = vdupq_n_f64(inV);
  132. return DVec3({ value, value });
  133. #else
  134. return DVec3(inV, inV, inV);
  135. #endif
  136. }
  137. DVec3 DVec3::sOne()
  138. {
  139. return sReplicate(1.0);
  140. }
  141. DVec3 DVec3::sNaN()
  142. {
  143. return sReplicate(numeric_limits<double>::quiet_NaN());
  144. }
  145. DVec3 DVec3::sLoadDouble3Unsafe(const Double3 &inV)
  146. {
  147. #if defined(JPH_USE_AVX)
  148. Type v = _mm256_loadu_pd(&inV.x);
  149. #elif defined(JPH_USE_SSE)
  150. Type v;
  151. v.mLow = _mm_loadu_pd(&inV.x);
  152. v.mHigh = _mm_set1_pd(inV.z);
  153. #elif defined(JPH_USE_NEON)
  154. Type v = vld1q_f64_x2(&inV.x);
  155. #else
  156. Type v = { inV.x, inV.y, inV.z };
  157. #endif
  158. return sFixW(v);
  159. }
  160. void DVec3::StoreDouble3(Double3 *outV) const
  161. {
  162. outV->x = mF64[0];
  163. outV->y = mF64[1];
  164. outV->z = mF64[2];
  165. }
  166. DVec3::operator Vec3() const
  167. {
  168. #if defined(JPH_USE_AVX)
  169. return _mm256_cvtpd_ps(mValue);
  170. #elif defined(JPH_USE_SSE)
  171. __m128 low = _mm_cvtpd_ps(mValue.mLow);
  172. __m128 high = _mm_cvtpd_ps(mValue.mHigh);
  173. return _mm_shuffle_ps(low, high, _MM_SHUFFLE(1, 0, 1, 0));
  174. #elif defined(JPH_USE_NEON)
  175. return vcvt_high_f32_f64(vcvtx_f32_f64(mValue.val[0]), mValue.val[1]);
  176. #else
  177. return Vec3((float)GetX(), (float)GetY(), (float)GetZ());
  178. #endif
  179. }
  180. DVec3 DVec3::sMin(DVec3Arg inV1, DVec3Arg inV2)
  181. {
  182. #if defined(JPH_USE_AVX)
  183. return _mm256_min_pd(inV1.mValue, inV2.mValue);
  184. #elif defined(JPH_USE_SSE)
  185. return DVec3({ _mm_min_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_min_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  186. #elif defined(JPH_USE_NEON)
  187. return DVec3({ vminq_f64(inV1.mValue.val[0], inV2.mValue.val[0]), vminq_f64(inV1.mValue.val[1], inV2.mValue.val[1]) });
  188. #else
  189. return DVec3(min(inV1.mF64[0], inV2.mF64[0]),
  190. min(inV1.mF64[1], inV2.mF64[1]),
  191. min(inV1.mF64[2], inV2.mF64[2]));
  192. #endif
  193. }
  194. DVec3 DVec3::sMax(DVec3Arg inV1, DVec3Arg inV2)
  195. {
  196. #if defined(JPH_USE_AVX)
  197. return _mm256_max_pd(inV1.mValue, inV2.mValue);
  198. #elif defined(JPH_USE_SSE)
  199. return DVec3({ _mm_max_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_max_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  200. #elif defined(JPH_USE_NEON)
  201. return DVec3({ vmaxq_f64(inV1.mValue.val[0], inV2.mValue.val[0]), vmaxq_f64(inV1.mValue.val[1], inV2.mValue.val[1]) });
  202. #else
  203. return DVec3(max(inV1.mF64[0], inV2.mF64[0]),
  204. max(inV1.mF64[1], inV2.mF64[1]),
  205. max(inV1.mF64[2], inV2.mF64[2]));
  206. #endif
  207. }
  208. DVec3 DVec3::sClamp(DVec3Arg inV, DVec3Arg inMin, DVec3Arg inMax)
  209. {
  210. return sMax(sMin(inV, inMax), inMin);
  211. }
  212. DVec3 DVec3::sEquals(DVec3Arg inV1, DVec3Arg inV2)
  213. {
  214. #if defined(JPH_USE_AVX)
  215. return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_EQ_OQ);
  216. #elif defined(JPH_USE_SSE)
  217. return DVec3({ _mm_cmpeq_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpeq_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  218. #elif defined(JPH_USE_NEON)
  219. return DVec3({ vreinterpretq_f64_u64(vceqq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vceqq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
  220. #else
  221. return DVec3(inV1.mF64[0] == inV2.mF64[0]? cTrue : cFalse,
  222. inV1.mF64[1] == inV2.mF64[1]? cTrue : cFalse,
  223. inV1.mF64[2] == inV2.mF64[2]? cTrue : cFalse);
  224. #endif
  225. }
  226. DVec3 DVec3::sLess(DVec3Arg inV1, DVec3Arg inV2)
  227. {
  228. #if defined(JPH_USE_AVX)
  229. return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_LT_OQ);
  230. #elif defined(JPH_USE_SSE)
  231. return DVec3({ _mm_cmplt_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmplt_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  232. #elif defined(JPH_USE_NEON)
  233. return DVec3({ vreinterpretq_f64_u64(vcltq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcltq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
  234. #else
  235. return DVec3(inV1.mF64[0] < inV2.mF64[0]? cTrue : cFalse,
  236. inV1.mF64[1] < inV2.mF64[1]? cTrue : cFalse,
  237. inV1.mF64[2] < inV2.mF64[2]? cTrue : cFalse);
  238. #endif
  239. }
  240. DVec3 DVec3::sLessOrEqual(DVec3Arg inV1, DVec3Arg inV2)
  241. {
  242. #if defined(JPH_USE_AVX)
  243. return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_LE_OQ);
  244. #elif defined(JPH_USE_SSE)
  245. return DVec3({ _mm_cmple_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmple_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  246. #elif defined(JPH_USE_NEON)
  247. return DVec3({ vreinterpretq_f64_u64(vcleq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcleq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
  248. #else
  249. return DVec3(inV1.mF64[0] <= inV2.mF64[0]? cTrue : cFalse,
  250. inV1.mF64[1] <= inV2.mF64[1]? cTrue : cFalse,
  251. inV1.mF64[2] <= inV2.mF64[2]? cTrue : cFalse);
  252. #endif
  253. }
  254. DVec3 DVec3::sGreater(DVec3Arg inV1, DVec3Arg inV2)
  255. {
  256. #if defined(JPH_USE_AVX)
  257. return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_GT_OQ);
  258. #elif defined(JPH_USE_SSE)
  259. return DVec3({ _mm_cmpgt_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpgt_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  260. #elif defined(JPH_USE_NEON)
  261. return DVec3({ vreinterpretq_f64_u64(vcgtq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcgtq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
  262. #else
  263. return DVec3(inV1.mF64[0] > inV2.mF64[0]? cTrue : cFalse,
  264. inV1.mF64[1] > inV2.mF64[1]? cTrue : cFalse,
  265. inV1.mF64[2] > inV2.mF64[2]? cTrue : cFalse);
  266. #endif
  267. }
  268. DVec3 DVec3::sGreaterOrEqual(DVec3Arg inV1, DVec3Arg inV2)
  269. {
  270. #if defined(JPH_USE_AVX)
  271. return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_GE_OQ);
  272. #elif defined(JPH_USE_SSE)
  273. return DVec3({ _mm_cmpge_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpge_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  274. #elif defined(JPH_USE_NEON)
  275. return DVec3({ vreinterpretq_f64_u64(vcgeq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_f64_u64(vcgeq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
  276. #else
  277. return DVec3(inV1.mF64[0] >= inV2.mF64[0]? cTrue : cFalse,
  278. inV1.mF64[1] >= inV2.mF64[1]? cTrue : cFalse,
  279. inV1.mF64[2] >= inV2.mF64[2]? cTrue : cFalse);
  280. #endif
  281. }
  282. DVec3 DVec3::sFusedMultiplyAdd(DVec3Arg inMul1, DVec3Arg inMul2, DVec3Arg inAdd)
  283. {
  284. #if defined(JPH_USE_AVX)
  285. #ifdef JPH_USE_FMADD
  286. return _mm256_fmadd_pd(inMul1.mValue, inMul2.mValue, inAdd.mValue);
  287. #else
  288. return _mm256_add_pd(_mm256_mul_pd(inMul1.mValue, inMul2.mValue), inAdd.mValue);
  289. #endif
  290. #elif defined(JPH_USE_NEON)
  291. return DVec3({ vmlaq_f64(inAdd.mValue.val[0], inMul1.mValue.val[0], inMul2.mValue.val[0]), vmlaq_f64(inAdd.mValue.val[1], inMul1.mValue.val[1], inMul2.mValue.val[1]) });
  292. #else
  293. return inMul1 * inMul2 + inAdd;
  294. #endif
  295. }
  296. DVec3 DVec3::sSelect(DVec3Arg inNotSet, DVec3Arg inSet, DVec3Arg inControl)
  297. {
  298. #if defined(JPH_USE_AVX)
  299. return _mm256_blendv_pd(inNotSet.mValue, inSet.mValue, inControl.mValue);
  300. #elif defined(JPH_USE_SSE4_1)
  301. Type v = { _mm_blendv_pd(inNotSet.mValue.mLow, inSet.mValue.mLow, inControl.mValue.mLow), _mm_blendv_pd(inNotSet.mValue.mHigh, inSet.mValue.mHigh, inControl.mValue.mHigh) };
  302. return sFixW(v);
  303. #elif defined(JPH_USE_NEON)
  304. Type v = { vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.mValue.val[0]), 63)), inSet.mValue.val[0], inNotSet.mValue.val[0]),
  305. vbslq_f64(vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_f64(inControl.mValue.val[1]), 63)), inSet.mValue.val[1], inNotSet.mValue.val[1]) };
  306. return sFixW(v);
  307. #else
  308. DVec3 result;
  309. for (int i = 0; i < 3; i++)
  310. result.mF64[i] = (BitCast<uint64>(inControl.mF64[i]) & (uint64(1) << 63))? inSet.mF64[i] : inNotSet.mF64[i];
  311. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  312. result.mF64[3] = result.mF64[2];
  313. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  314. return result;
  315. #endif
  316. }
  317. DVec3 DVec3::sOr(DVec3Arg inV1, DVec3Arg inV2)
  318. {
  319. #if defined(JPH_USE_AVX)
  320. return _mm256_or_pd(inV1.mValue, inV2.mValue);
  321. #elif defined(JPH_USE_SSE)
  322. return DVec3({ _mm_or_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_or_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  323. #elif defined(JPH_USE_NEON)
  324. return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.mValue.val[0]), vreinterpretq_u64_f64(inV2.mValue.val[0]))),
  325. vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(inV1.mValue.val[1]), vreinterpretq_u64_f64(inV2.mValue.val[1]))) });
  326. #else
  327. return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) | BitCast<uint64>(inV2.mF64[0])),
  328. BitCast<double>(BitCast<uint64>(inV1.mF64[1]) | BitCast<uint64>(inV2.mF64[1])),
  329. BitCast<double>(BitCast<uint64>(inV1.mF64[2]) | BitCast<uint64>(inV2.mF64[2])));
  330. #endif
  331. }
  332. DVec3 DVec3::sXor(DVec3Arg inV1, DVec3Arg inV2)
  333. {
  334. #if defined(JPH_USE_AVX)
  335. return _mm256_xor_pd(inV1.mValue, inV2.mValue);
  336. #elif defined(JPH_USE_SSE)
  337. return DVec3({ _mm_xor_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_xor_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  338. #elif defined(JPH_USE_NEON)
  339. return DVec3({ vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.mValue.val[0]), vreinterpretq_u64_f64(inV2.mValue.val[0]))),
  340. vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(inV1.mValue.val[1]), vreinterpretq_u64_f64(inV2.mValue.val[1]))) });
  341. #else
  342. return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) ^ BitCast<uint64>(inV2.mF64[0])),
  343. BitCast<double>(BitCast<uint64>(inV1.mF64[1]) ^ BitCast<uint64>(inV2.mF64[1])),
  344. BitCast<double>(BitCast<uint64>(inV1.mF64[2]) ^ BitCast<uint64>(inV2.mF64[2])));
  345. #endif
  346. }
  347. DVec3 DVec3::sAnd(DVec3Arg inV1, DVec3Arg inV2)
  348. {
  349. #if defined(JPH_USE_AVX)
  350. return _mm256_and_pd(inV1.mValue, inV2.mValue);
  351. #elif defined(JPH_USE_SSE)
  352. return DVec3({ _mm_and_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_and_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
  353. #elif defined(JPH_USE_NEON)
  354. return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.mValue.val[0]), vreinterpretq_u64_f64(inV2.mValue.val[0]))),
  355. vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(inV1.mValue.val[1]), vreinterpretq_u64_f64(inV2.mValue.val[1]))) });
  356. #else
  357. return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) & BitCast<uint64>(inV2.mF64[0])),
  358. BitCast<double>(BitCast<uint64>(inV1.mF64[1]) & BitCast<uint64>(inV2.mF64[1])),
  359. BitCast<double>(BitCast<uint64>(inV1.mF64[2]) & BitCast<uint64>(inV2.mF64[2])));
  360. #endif
  361. }
  362. int DVec3::GetTrues() const
  363. {
  364. #if defined(JPH_USE_AVX)
  365. return _mm256_movemask_pd(mValue) & 0x7;
  366. #elif defined(JPH_USE_SSE)
  367. return (_mm_movemask_pd(mValue.mLow) + (_mm_movemask_pd(mValue.mHigh) << 2)) & 0x7;
  368. #else
  369. return int((BitCast<uint64>(mF64[0]) >> 63) | ((BitCast<uint64>(mF64[1]) >> 63) << 1) | ((BitCast<uint64>(mF64[2]) >> 63) << 2));
  370. #endif
  371. }
  372. bool DVec3::TestAnyTrue() const
  373. {
  374. return GetTrues() != 0;
  375. }
  376. bool DVec3::TestAllTrue() const
  377. {
  378. return GetTrues() == 0x7;
  379. }
  380. bool DVec3::operator == (DVec3Arg inV2) const
  381. {
  382. return sEquals(*this, inV2).TestAllTrue();
  383. }
  384. bool DVec3::IsClose(DVec3Arg inV2, double inMaxDistSq) const
  385. {
  386. return (inV2 - *this).LengthSq() <= inMaxDistSq;
  387. }
  388. bool DVec3::IsNearZero(double inMaxDistSq) const
  389. {
  390. return LengthSq() <= inMaxDistSq;
  391. }
  392. DVec3 DVec3::operator * (DVec3Arg inV2) const
  393. {
  394. #if defined(JPH_USE_AVX)
  395. return _mm256_mul_pd(mValue, inV2.mValue);
  396. #elif defined(JPH_USE_SSE)
  397. return DVec3({ _mm_mul_pd(mValue.mLow, inV2.mValue.mLow), _mm_mul_pd(mValue.mHigh, inV2.mValue.mHigh) });
  398. #elif defined(JPH_USE_NEON)
  399. return DVec3({ vmulq_f64(mValue.val[0], inV2.mValue.val[0]), vmulq_f64(mValue.val[1], inV2.mValue.val[1]) });
  400. #else
  401. return DVec3(mF64[0] * inV2.mF64[0], mF64[1] * inV2.mF64[1], mF64[2] * inV2.mF64[2]);
  402. #endif
  403. }
  404. DVec3 DVec3::operator * (double inV2) const
  405. {
  406. #if defined(JPH_USE_AVX)
  407. return _mm256_mul_pd(mValue, _mm256_set1_pd(inV2));
  408. #elif defined(JPH_USE_SSE)
  409. __m128d v = _mm_set1_pd(inV2);
  410. return DVec3({ _mm_mul_pd(mValue.mLow, v), _mm_mul_pd(mValue.mHigh, v) });
  411. #elif defined(JPH_USE_NEON)
  412. return DVec3({ vmulq_n_f64(mValue.val[0], inV2), vmulq_n_f64(mValue.val[1], inV2) });
  413. #else
  414. return DVec3(mF64[0] * inV2, mF64[1] * inV2, mF64[2] * inV2);
  415. #endif
  416. }
  417. DVec3 operator * (double inV1, DVec3Arg inV2)
  418. {
  419. #if defined(JPH_USE_AVX)
  420. return _mm256_mul_pd(_mm256_set1_pd(inV1), inV2.mValue);
  421. #elif defined(JPH_USE_SSE)
  422. __m128d v = _mm_set1_pd(inV1);
  423. return DVec3({ _mm_mul_pd(v, inV2.mValue.mLow), _mm_mul_pd(v, inV2.mValue.mHigh) });
  424. #elif defined(JPH_USE_NEON)
  425. return DVec3({ vmulq_n_f64(inV2.mValue.val[0], inV1), vmulq_n_f64(inV2.mValue.val[1], inV1) });
  426. #else
  427. return DVec3(inV1 * inV2.mF64[0], inV1 * inV2.mF64[1], inV1 * inV2.mF64[2]);
  428. #endif
  429. }
  430. DVec3 DVec3::operator / (double inV2) const
  431. {
  432. #if defined(JPH_USE_AVX)
  433. return _mm256_div_pd(mValue, _mm256_set1_pd(inV2));
  434. #elif defined(JPH_USE_SSE)
  435. __m128d v = _mm_set1_pd(inV2);
  436. return DVec3({ _mm_div_pd(mValue.mLow, v), _mm_div_pd(mValue.mHigh, v) });
  437. #elif defined(JPH_USE_NEON)
  438. float64x2_t v = vdupq_n_f64(inV2);
  439. return DVec3({ vdivq_f64(mValue.val[0], v), vdivq_f64(mValue.val[1], v) });
  440. #else
  441. return DVec3(mF64[0] / inV2, mF64[1] / inV2, mF64[2] / inV2);
  442. #endif
  443. }
  444. DVec3 &DVec3::operator *= (double inV2)
  445. {
  446. #if defined(JPH_USE_AVX)
  447. mValue = _mm256_mul_pd(mValue, _mm256_set1_pd(inV2));
  448. #elif defined(JPH_USE_SSE)
  449. __m128d v = _mm_set1_pd(inV2);
  450. mValue.mLow = _mm_mul_pd(mValue.mLow, v);
  451. mValue.mHigh = _mm_mul_pd(mValue.mHigh, v);
  452. #elif defined(JPH_USE_NEON)
  453. mValue.val[0] = vmulq_n_f64(mValue.val[0], inV2);
  454. mValue.val[1] = vmulq_n_f64(mValue.val[1], inV2);
  455. #else
  456. for (int i = 0; i < 3; ++i)
  457. mF64[i] *= inV2;
  458. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  459. mF64[3] = mF64[2];
  460. #endif
  461. #endif
  462. return *this;
  463. }
  464. DVec3 &DVec3::operator *= (DVec3Arg inV2)
  465. {
  466. #if defined(JPH_USE_AVX)
  467. mValue = _mm256_mul_pd(mValue, inV2.mValue);
  468. #elif defined(JPH_USE_SSE)
  469. mValue.mLow = _mm_mul_pd(mValue.mLow, inV2.mValue.mLow);
  470. mValue.mHigh = _mm_mul_pd(mValue.mHigh, inV2.mValue.mHigh);
  471. #elif defined(JPH_USE_NEON)
  472. mValue.val[0] = vmulq_f64(mValue.val[0], inV2.mValue.val[0]);
  473. mValue.val[1] = vmulq_f64(mValue.val[1], inV2.mValue.val[1]);
  474. #else
  475. for (int i = 0; i < 3; ++i)
  476. mF64[i] *= inV2.mF64[i];
  477. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  478. mF64[3] = mF64[2];
  479. #endif
  480. #endif
  481. return *this;
  482. }
  483. DVec3 &DVec3::operator /= (double inV2)
  484. {
  485. #if defined(JPH_USE_AVX)
  486. mValue = _mm256_div_pd(mValue, _mm256_set1_pd(inV2));
  487. #elif defined(JPH_USE_SSE)
  488. __m128d v = _mm_set1_pd(inV2);
  489. mValue.mLow = _mm_div_pd(mValue.mLow, v);
  490. mValue.mHigh = _mm_div_pd(mValue.mHigh, v);
  491. #elif defined(JPH_USE_NEON)
  492. float64x2_t v = vdupq_n_f64(inV2);
  493. mValue.val[0] = vdivq_f64(mValue.val[0], v);
  494. mValue.val[1] = vdivq_f64(mValue.val[1], v);
  495. #else
  496. for (int i = 0; i < 3; ++i)
  497. mF64[i] /= inV2;
  498. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  499. mF64[3] = mF64[2];
  500. #endif
  501. #endif
  502. return *this;
  503. }
  504. DVec3 DVec3::operator + (Vec3Arg inV2) const
  505. {
  506. #if defined(JPH_USE_AVX)
  507. return _mm256_add_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
  508. #elif defined(JPH_USE_SSE)
  509. return DVec3({ _mm_add_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue)), _mm_add_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
  510. #elif defined(JPH_USE_NEON)
  511. return DVec3({ vaddq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue))), vaddq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue)) });
  512. #else
  513. return DVec3(mF64[0] + inV2.mF32[0], mF64[1] + inV2.mF32[1], mF64[2] + inV2.mF32[2]);
  514. #endif
  515. }
  516. DVec3 DVec3::operator + (DVec3Arg inV2) const
  517. {
  518. #if defined(JPH_USE_AVX)
  519. return _mm256_add_pd(mValue, inV2.mValue);
  520. #elif defined(JPH_USE_SSE)
  521. return DVec3({ _mm_add_pd(mValue.mLow, inV2.mValue.mLow), _mm_add_pd(mValue.mHigh, inV2.mValue.mHigh) });
  522. #elif defined(JPH_USE_NEON)
  523. return DVec3({ vaddq_f64(mValue.val[0], inV2.mValue.val[0]), vaddq_f64(mValue.val[1], inV2.mValue.val[1]) });
  524. #else
  525. return DVec3(mF64[0] + inV2.mF64[0], mF64[1] + inV2.mF64[1], mF64[2] + inV2.mF64[2]);
  526. #endif
  527. }
  528. DVec3 &DVec3::operator += (Vec3Arg inV2)
  529. {
  530. #if defined(JPH_USE_AVX)
  531. mValue = _mm256_add_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
  532. #elif defined(JPH_USE_SSE)
  533. mValue.mLow = _mm_add_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue));
  534. mValue.mHigh = _mm_add_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
  535. #elif defined(JPH_USE_NEON)
  536. mValue.val[0] = vaddq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue)));
  537. mValue.val[1] = vaddq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue));
  538. #else
  539. for (int i = 0; i < 3; ++i)
  540. mF64[i] += inV2.mF32[i];
  541. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  542. mF64[3] = mF64[2];
  543. #endif
  544. #endif
  545. return *this;
  546. }
  547. DVec3 &DVec3::operator += (DVec3Arg inV2)
  548. {
  549. #if defined(JPH_USE_AVX)
  550. mValue = _mm256_add_pd(mValue, inV2.mValue);
  551. #elif defined(JPH_USE_SSE)
  552. mValue.mLow = _mm_add_pd(mValue.mLow, inV2.mValue.mLow);
  553. mValue.mHigh = _mm_add_pd(mValue.mHigh, inV2.mValue.mHigh);
  554. #elif defined(JPH_USE_NEON)
  555. mValue.val[0] = vaddq_f64(mValue.val[0], inV2.mValue.val[0]);
  556. mValue.val[1] = vaddq_f64(mValue.val[1], inV2.mValue.val[1]);
  557. #else
  558. for (int i = 0; i < 3; ++i)
  559. mF64[i] += inV2.mF64[i];
  560. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  561. mF64[3] = mF64[2];
  562. #endif
  563. #endif
  564. return *this;
  565. }
  566. DVec3 DVec3::operator - () const
  567. {
  568. #if defined(JPH_USE_AVX)
  569. return _mm256_sub_pd(_mm256_setzero_pd(), mValue);
  570. #elif defined(JPH_USE_SSE)
  571. __m128d zero = _mm_setzero_pd();
  572. return DVec3({ _mm_sub_pd(zero, mValue.mLow), _mm_sub_pd(zero, mValue.mHigh) });
  573. #elif defined(JPH_USE_NEON)
  574. #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
  575. float64x2_t zero = vdupq_n_f64(0);
  576. return DVec3({ vsubq_f64(zero, mValue.val[0]), vsubq_f64(zero, mValue.val[1]) });
  577. #else
  578. return DVec3({ vnegq_f64(mValue.val[0]), vnegq_f64(mValue.val[1]) });
  579. #endif
  580. #else
  581. #ifdef JPH_CROSS_PLATFORM_DETERMINISTIC
  582. return DVec3(0.0 - mF64[0], 0.0 - mF64[1], 0.0 - mF64[2]);
  583. #else
  584. return DVec3(-mF64[0], -mF64[1], -mF64[2]);
  585. #endif
  586. #endif
  587. }
  588. DVec3 DVec3::operator - (Vec3Arg inV2) const
  589. {
  590. #if defined(JPH_USE_AVX)
  591. return _mm256_sub_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
  592. #elif defined(JPH_USE_SSE)
  593. return DVec3({ _mm_sub_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue)), _mm_sub_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
  594. #elif defined(JPH_USE_NEON)
  595. return DVec3({ vsubq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue))), vsubq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue)) });
  596. #else
  597. return DVec3(mF64[0] - inV2.mF32[0], mF64[1] - inV2.mF32[1], mF64[2] - inV2.mF32[2]);
  598. #endif
  599. }
  600. DVec3 DVec3::operator - (DVec3Arg inV2) const
  601. {
  602. #if defined(JPH_USE_AVX)
  603. return _mm256_sub_pd(mValue, inV2.mValue);
  604. #elif defined(JPH_USE_SSE)
  605. return DVec3({ _mm_sub_pd(mValue.mLow, inV2.mValue.mLow), _mm_sub_pd(mValue.mHigh, inV2.mValue.mHigh) });
  606. #elif defined(JPH_USE_NEON)
  607. return DVec3({ vsubq_f64(mValue.val[0], inV2.mValue.val[0]), vsubq_f64(mValue.val[1], inV2.mValue.val[1]) });
  608. #else
  609. return DVec3(mF64[0] - inV2.mF64[0], mF64[1] - inV2.mF64[1], mF64[2] - inV2.mF64[2]);
  610. #endif
  611. }
  612. DVec3 &DVec3::operator -= (Vec3Arg inV2)
  613. {
  614. #if defined(JPH_USE_AVX)
  615. mValue = _mm256_sub_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
  616. #elif defined(JPH_USE_SSE)
  617. mValue.mLow = _mm_sub_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue));
  618. mValue.mHigh = _mm_sub_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
  619. #elif defined(JPH_USE_NEON)
  620. mValue.val[0] = vsubq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue)));
  621. mValue.val[1] = vsubq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue));
  622. #else
  623. for (int i = 0; i < 3; ++i)
  624. mF64[i] -= inV2.mF32[i];
  625. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  626. mF64[3] = mF64[2];
  627. #endif
  628. #endif
  629. return *this;
  630. }
  631. DVec3 &DVec3::operator -= (DVec3Arg inV2)
  632. {
  633. #if defined(JPH_USE_AVX)
  634. mValue = _mm256_sub_pd(mValue, inV2.mValue);
  635. #elif defined(JPH_USE_SSE)
  636. mValue.mLow = _mm_sub_pd(mValue.mLow, inV2.mValue.mLow);
  637. mValue.mHigh = _mm_sub_pd(mValue.mHigh, inV2.mValue.mHigh);
  638. #elif defined(JPH_USE_NEON)
  639. mValue.val[0] = vsubq_f64(mValue.val[0], inV2.mValue.val[0]);
  640. mValue.val[1] = vsubq_f64(mValue.val[1], inV2.mValue.val[1]);
  641. #else
  642. for (int i = 0; i < 3; ++i)
  643. mF64[i] -= inV2.mF64[i];
  644. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  645. mF64[3] = mF64[2];
  646. #endif
  647. #endif
  648. return *this;
  649. }
  650. DVec3 DVec3::operator / (DVec3Arg inV2) const
  651. {
  652. inV2.CheckW();
  653. #if defined(JPH_USE_AVX)
  654. return _mm256_div_pd(mValue, inV2.mValue);
  655. #elif defined(JPH_USE_SSE)
  656. return DVec3({ _mm_div_pd(mValue.mLow, inV2.mValue.mLow), _mm_div_pd(mValue.mHigh, inV2.mValue.mHigh) });
  657. #elif defined(JPH_USE_NEON)
  658. return DVec3({ vdivq_f64(mValue.val[0], inV2.mValue.val[0]), vdivq_f64(mValue.val[1], inV2.mValue.val[1]) });
  659. #else
  660. return DVec3(mF64[0] / inV2.mF64[0], mF64[1] / inV2.mF64[1], mF64[2] / inV2.mF64[2]);
  661. #endif
  662. }
  663. DVec3 DVec3::Abs() const
  664. {
  665. #if defined(JPH_USE_AVX512)
  666. return _mm256_range_pd(mValue, mValue, 0b1000);
  667. #elif defined(JPH_USE_AVX)
  668. return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), mValue), mValue);
  669. #elif defined(JPH_USE_SSE)
  670. __m128d zero = _mm_setzero_pd();
  671. return DVec3({ _mm_max_pd(_mm_sub_pd(zero, mValue.mLow), mValue.mLow), _mm_max_pd(_mm_sub_pd(zero, mValue.mHigh), mValue.mHigh) });
  672. #elif defined(JPH_USE_NEON)
  673. return DVec3({ vabsq_f64(mValue.val[0]), vabsq_f64(mValue.val[1]) });
  674. #else
  675. return DVec3(abs(mF64[0]), abs(mF64[1]), abs(mF64[2]));
  676. #endif
  677. }
  678. DVec3 DVec3::Reciprocal() const
  679. {
  680. return sOne() / mValue;
  681. }
  682. DVec3 DVec3::Cross(DVec3Arg inV2) const
  683. {
  684. #if defined(JPH_USE_AVX2)
  685. __m256d t1 = _mm256_permute4x64_pd(inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  686. t1 = _mm256_mul_pd(t1, mValue);
  687. __m256d t2 = _mm256_permute4x64_pd(mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  688. t2 = _mm256_mul_pd(t2, inV2.mValue);
  689. __m256d t3 = _mm256_sub_pd(t1, t2);
  690. return _mm256_permute4x64_pd(t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  691. #else
  692. return DVec3(mF64[1] * inV2.mF64[2] - mF64[2] * inV2.mF64[1],
  693. mF64[2] * inV2.mF64[0] - mF64[0] * inV2.mF64[2],
  694. mF64[0] * inV2.mF64[1] - mF64[1] * inV2.mF64[0]);
  695. #endif
  696. }
  697. double DVec3::Dot(DVec3Arg inV2) const
  698. {
  699. #if defined(JPH_USE_AVX)
  700. __m256d mul = _mm256_mul_pd(mValue, inV2.mValue);
  701. __m128d xy = _mm256_castpd256_pd128(mul);
  702. __m128d yx = _mm_shuffle_pd(xy, xy, 1);
  703. __m128d sum = _mm_add_pd(xy, yx);
  704. __m128d zw = _mm256_extractf128_pd(mul, 1);
  705. sum = _mm_add_pd(sum, zw);
  706. return _mm_cvtsd_f64(sum);
  707. #elif defined(JPH_USE_SSE)
  708. __m128d xy = _mm_mul_pd(mValue.mLow, inV2.mValue.mLow);
  709. __m128d yx = _mm_shuffle_pd(xy, xy, 1);
  710. __m128d sum = _mm_add_pd(xy, yx);
  711. __m128d z = _mm_mul_sd(mValue.mHigh, inV2.mValue.mHigh);
  712. sum = _mm_add_pd(sum, z);
  713. return _mm_cvtsd_f64(sum);
  714. #elif defined(JPH_USE_NEON)
  715. float64x2_t mul_low = vmulq_f64(mValue.val[0], inV2.mValue.val[0]);
  716. float64x2_t mul_high = vmulq_f64(mValue.val[1], inV2.mValue.val[1]);
  717. return vaddvq_f64(mul_low) + vgetq_lane_f64(mul_high, 0);
  718. #else
  719. double dot = 0.0;
  720. for (int i = 0; i < 3; i++)
  721. dot += mF64[i] * inV2.mF64[i];
  722. return dot;
  723. #endif
  724. }
  725. double DVec3::LengthSq() const
  726. {
  727. return Dot(*this);
  728. }
  729. DVec3 DVec3::Sqrt() const
  730. {
  731. #if defined(JPH_USE_AVX)
  732. return _mm256_sqrt_pd(mValue);
  733. #elif defined(JPH_USE_SSE)
  734. return DVec3({ _mm_sqrt_pd(mValue.mLow), _mm_sqrt_pd(mValue.mHigh) });
  735. #elif defined(JPH_USE_NEON)
  736. return DVec3({ vsqrtq_f64(mValue.val[0]), vsqrtq_f64(mValue.val[1]) });
  737. #else
  738. return DVec3(sqrt(mF64[0]), sqrt(mF64[1]), sqrt(mF64[2]));
  739. #endif
  740. }
  741. double DVec3::Length() const
  742. {
  743. return sqrt(Dot(*this));
  744. }
  745. DVec3 DVec3::Normalized() const
  746. {
  747. return *this / Length();
  748. }
  749. bool DVec3::IsNormalized(double inTolerance) const
  750. {
  751. return abs(LengthSq() - 1.0) <= inTolerance;
  752. }
  753. bool DVec3::IsNaN() const
  754. {
  755. #if defined(JPH_USE_AVX512)
  756. return (_mm256_fpclass_pd_mask(mValue, 0b10000001) & 0x7) != 0;
  757. #elif defined(JPH_USE_AVX)
  758. return (_mm256_movemask_pd(_mm256_cmp_pd(mValue, mValue, _CMP_UNORD_Q)) & 0x7) != 0;
  759. #elif defined(JPH_USE_SSE)
  760. return ((_mm_movemask_pd(_mm_cmpunord_pd(mValue.mLow, mValue.mLow)) + (_mm_movemask_pd(_mm_cmpunord_pd(mValue.mHigh, mValue.mHigh)) << 2)) & 0x7) != 0;
  761. #else
  762. return isnan(mF64[0]) || isnan(mF64[1]) || isnan(mF64[2]);
  763. #endif
  764. }
  765. DVec3 DVec3::GetSign() const
  766. {
  767. #if defined(JPH_USE_AVX512)
  768. return _mm256_fixupimm_pd(mValue, mValue, _mm256_set1_epi32(0xA9A90A00), 0);
  769. #elif defined(JPH_USE_AVX)
  770. __m256d minus_one = _mm256_set1_pd(-1.0);
  771. __m256d one = _mm256_set1_pd(1.0);
  772. return _mm256_or_pd(_mm256_and_pd(mValue, minus_one), one);
  773. #elif defined(JPH_USE_SSE)
  774. __m128d minus_one = _mm_set1_pd(-1.0);
  775. __m128d one = _mm_set1_pd(1.0);
  776. return DVec3({ _mm_or_pd(_mm_and_pd(mValue.mLow, minus_one), one), _mm_or_pd(_mm_and_pd(mValue.mHigh, minus_one), one) });
  777. #elif defined(JPH_USE_NEON)
  778. uint64x2_t minus_one = vreinterpretq_u64_f64(vdupq_n_f64(-1.0f));
  779. uint64x2_t one = vreinterpretq_u64_f64(vdupq_n_f64(1.0f));
  780. return DVec3({ vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[0]), minus_one), one)),
  781. vreinterpretq_f64_u64(vorrq_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[1]), minus_one), one)) });
  782. #else
  783. return DVec3(std::signbit(mF64[0])? -1.0 : 1.0,
  784. std::signbit(mF64[1])? -1.0 : 1.0,
  785. std::signbit(mF64[2])? -1.0 : 1.0);
  786. #endif
  787. }
  788. DVec3 DVec3::PrepareRoundToZero() const
  789. {
  790. // Float has 23 bit mantissa, double 52 bit mantissa => we lose 29 bits when converting from double to float
  791. constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
  792. #if defined(JPH_USE_AVX)
  793. return _mm256_and_pd(mValue, _mm256_castsi256_pd(_mm256_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss))));
  794. #elif defined(JPH_USE_SSE)
  795. __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss)));
  796. return DVec3({ _mm_and_pd(mValue.mLow, mask), _mm_and_pd(mValue.mHigh, mask) });
  797. #elif defined(JPH_USE_NEON)
  798. uint64x2_t mask = vdupq_n_u64(~cDoubleToFloatMantissaLoss);
  799. return DVec3({ vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[0]), mask)),
  800. vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[1]), mask)) });
  801. #else
  802. double x = BitCast<double>(BitCast<uint64>(mF64[0]) & ~cDoubleToFloatMantissaLoss);
  803. double y = BitCast<double>(BitCast<uint64>(mF64[1]) & ~cDoubleToFloatMantissaLoss);
  804. double z = BitCast<double>(BitCast<uint64>(mF64[2]) & ~cDoubleToFloatMantissaLoss);
  805. return DVec3(x, y, z);
  806. #endif
  807. }
  808. DVec3 DVec3::PrepareRoundToInf() const
  809. {
  810. // Float has 23 bit mantissa, double 52 bit mantissa => we lose 29 bits when converting from double to float
  811. constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
  812. #if defined(JPH_USE_AVX512)
  813. __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
  814. __mmask8 is_zero = _mm256_testn_epi64_mask(_mm256_castpd_si256(mValue), mantissa_loss);
  815. __m256d value_or_mantissa_loss = _mm256_or_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
  816. return _mm256_mask_blend_pd(is_zero, value_or_mantissa_loss, mValue);
  817. #elif defined(JPH_USE_AVX)
  818. __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
  819. __m256d value_and_mantissa_loss = _mm256_and_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
  820. __m256d is_zero = _mm256_cmp_pd(value_and_mantissa_loss, _mm256_setzero_pd(), _CMP_EQ_OQ);
  821. __m256d value_or_mantissa_loss = _mm256_or_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
  822. return _mm256_blendv_pd(value_or_mantissa_loss, mValue, is_zero);
  823. #elif defined(JPH_USE_SSE4_1)
  824. __m128i mantissa_loss = _mm_set1_epi64x(cDoubleToFloatMantissaLoss);
  825. __m128d zero = _mm_setzero_pd();
  826. __m128d value_and_mantissa_loss_low = _mm_and_pd(mValue.mLow, _mm_castsi128_pd(mantissa_loss));
  827. __m128d is_zero_low = _mm_cmpeq_pd(value_and_mantissa_loss_low, zero);
  828. __m128d value_or_mantissa_loss_low = _mm_or_pd(mValue.mLow, _mm_castsi128_pd(mantissa_loss));
  829. __m128d value_and_mantissa_loss_high = _mm_and_pd(mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
  830. __m128d is_zero_high = _mm_cmpeq_pd(value_and_mantissa_loss_high, zero);
  831. __m128d value_or_mantissa_loss_high = _mm_or_pd(mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
  832. return DVec3({ _mm_blendv_pd(value_or_mantissa_loss_low, mValue.mLow, is_zero_low), _mm_blendv_pd(value_or_mantissa_loss_high, mValue.mHigh, is_zero_high) });
  833. #elif defined(JPH_USE_NEON)
  834. uint64x2_t mantissa_loss = vdupq_n_u64(cDoubleToFloatMantissaLoss);
  835. float64x2_t zero = vdupq_n_f64(0.0);
  836. float64x2_t value_and_mantissa_loss_low = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[0]), mantissa_loss));
  837. uint64x2_t is_zero_low = vceqq_f64(value_and_mantissa_loss_low, zero);
  838. float64x2_t value_or_mantissa_loss_low = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(mValue.val[0]), mantissa_loss));
  839. float64x2_t value_and_mantissa_loss_high = vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(mValue.val[1]), mantissa_loss));
  840. float64x2_t value_low = vbslq_f64(is_zero_low, mValue.val[0], value_or_mantissa_loss_low);
  841. uint64x2_t is_zero_high = vceqq_f64(value_and_mantissa_loss_high, zero);
  842. float64x2_t value_or_mantissa_loss_high = vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(mValue.val[1]), mantissa_loss));
  843. float64x2_t value_high = vbslq_f64(is_zero_high, mValue.val[1], value_or_mantissa_loss_high);
  844. return DVec3({ value_low, value_high });
  845. #else
  846. uint64 ux = BitCast<uint64>(mF64[0]);
  847. uint64 uy = BitCast<uint64>(mF64[1]);
  848. uint64 uz = BitCast<uint64>(mF64[2]);
  849. double x = BitCast<double>((ux & cDoubleToFloatMantissaLoss) == 0? ux : (ux | cDoubleToFloatMantissaLoss));
  850. double y = BitCast<double>((uy & cDoubleToFloatMantissaLoss) == 0? uy : (uy | cDoubleToFloatMantissaLoss));
  851. double z = BitCast<double>((uz & cDoubleToFloatMantissaLoss) == 0? uz : (uz | cDoubleToFloatMantissaLoss));
  852. return DVec3(x, y, z);
  853. #endif
  854. }
  855. Vec3 DVec3::ToVec3RoundDown() const
  856. {
  857. DVec3 to_zero = PrepareRoundToZero();
  858. DVec3 to_inf = PrepareRoundToInf();
  859. return Vec3(DVec3::sSelect(to_zero, to_inf, DVec3::sLess(*this, DVec3::sZero())));
  860. }
  861. Vec3 DVec3::ToVec3RoundUp() const
  862. {
  863. DVec3 to_zero = PrepareRoundToZero();
  864. DVec3 to_inf = PrepareRoundToInf();
  865. return Vec3(DVec3::sSelect(to_inf, to_zero, DVec3::sLess(*this, DVec3::sZero())));
  866. }
  867. JPH_NAMESPACE_END