Vec3.inl 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #include <Jolt/Math/Vec4.h>
  5. #include <Jolt/Math/UVec4.h>
  6. #include <Jolt/Core/HashCombine.h>
  7. JPH_SUPPRESS_WARNINGS_STD_BEGIN
  8. #include <random>
  9. JPH_SUPPRESS_WARNINGS_STD_END
  10. // Create a std::hash for Vec3
  11. JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
  12. JPH_NAMESPACE_BEGIN
  13. void Vec3::CheckW() const
  14. {
  15. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  16. // Avoid asserts when both components are NaN
  17. JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
  18. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  19. }
  20. JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
  21. {
  22. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  23. #if defined(JPH_USE_SSE)
  24. return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
  25. #elif defined(JPH_USE_NEON)
  26. return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
  27. #else
  28. Type value;
  29. value.mData[0] = inValue.mData[0];
  30. value.mData[1] = inValue.mData[1];
  31. value.mData[2] = inValue.mData[2];
  32. value.mData[3] = inValue.mData[2];
  33. return value;
  34. #endif
  35. #else
  36. return inValue;
  37. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  38. }
  39. Vec3::Vec3(Vec4Arg inRHS) :
  40. mValue(sFixW(inRHS.mValue))
  41. {
  42. }
  43. Vec3::Vec3(const Float3 &inV)
  44. {
  45. #if defined(JPH_USE_SSE)
  46. Type x = _mm_load_ss(&inV.x);
  47. Type y = _mm_load_ss(&inV.y);
  48. Type z = _mm_load_ss(&inV.z);
  49. Type xy = _mm_unpacklo_ps(x, y);
  50. mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
  51. #elif defined(JPH_USE_NEON)
  52. float32x2_t xy = vld1_f32(&inV.x);
  53. float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
  54. mValue = vcombine_f32(xy, zz);
  55. #else
  56. mF32[0] = inV[0];
  57. mF32[1] = inV[1];
  58. mF32[2] = inV[2];
  59. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  60. mF32[3] = inV[2];
  61. #endif
  62. #endif
  63. }
  64. Vec3::Vec3(float inX, float inY, float inZ)
  65. {
  66. #if defined(JPH_USE_SSE)
  67. mValue = _mm_set_ps(inZ, inZ, inY, inX);
  68. #elif defined(JPH_USE_NEON)
  69. uint32x2_t xy = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32 *>(&inX)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inY)) << 32));
  70. uint32x2_t zz = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32* >(&inZ)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inZ)) << 32));
  71. mValue = vcombine_f32(xy, zz);
  72. #else
  73. mF32[0] = inX;
  74. mF32[1] = inY;
  75. mF32[2] = inZ;
  76. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  77. mF32[3] = inZ;
  78. #endif
  79. #endif
  80. }
  81. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
  82. Vec3 Vec3::Swizzle() const
  83. {
  84. static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
  85. static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
  86. static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
  87. #if defined(JPH_USE_SSE)
  88. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
  89. #elif defined(JPH_USE_NEON)
  90. return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
  91. #else
  92. return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
  93. #endif
  94. }
  95. Vec3 Vec3::sZero()
  96. {
  97. #if defined(JPH_USE_SSE)
  98. return _mm_setzero_ps();
  99. #elif defined(JPH_USE_NEON)
  100. return vdupq_n_f32(0);
  101. #else
  102. return Vec3(0, 0, 0);
  103. #endif
  104. }
  105. Vec3 Vec3::sReplicate(float inV)
  106. {
  107. #if defined(JPH_USE_SSE)
  108. return _mm_set1_ps(inV);
  109. #elif defined(JPH_USE_NEON)
  110. return vdupq_n_f32(inV);
  111. #else
  112. return Vec3(inV, inV, inV);
  113. #endif
  114. }
  115. Vec3 Vec3::sNaN()
  116. {
  117. return sReplicate(numeric_limits<float>::quiet_NaN());
  118. }
  119. Vec3 Vec3::sLoadFloat3Unsafe(const Float3 &inV)
  120. {
  121. #if defined(JPH_USE_SSE)
  122. Type v = _mm_loadu_ps(&inV.x);
  123. #elif defined(JPH_USE_NEON)
  124. Type v = vld1q_f32(&inV.x);
  125. #else
  126. Type v = { inV.x, inV.y, inV.z };
  127. #endif
  128. return sFixW(v);
  129. }
  130. Vec3 Vec3::sMin(Vec3Arg inV1, Vec3Arg inV2)
  131. {
  132. #if defined(JPH_USE_SSE)
  133. return _mm_min_ps(inV1.mValue, inV2.mValue);
  134. #elif defined(JPH_USE_NEON)
  135. return vminq_f32(inV1.mValue, inV2.mValue);
  136. #else
  137. return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
  138. min(inV1.mF32[1], inV2.mF32[1]),
  139. min(inV1.mF32[2], inV2.mF32[2]));
  140. #endif
  141. }
  142. Vec3 Vec3::sMax(Vec3Arg inV1, Vec3Arg inV2)
  143. {
  144. #if defined(JPH_USE_SSE)
  145. return _mm_max_ps(inV1.mValue, inV2.mValue);
  146. #elif defined(JPH_USE_NEON)
  147. return vmaxq_f32(inV1.mValue, inV2.mValue);
  148. #else
  149. return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
  150. max(inV1.mF32[1], inV2.mF32[1]),
  151. max(inV1.mF32[2], inV2.mF32[2]));
  152. #endif
  153. }
  154. Vec3 Vec3::sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
  155. {
  156. return sMax(sMin(inV, inMax), inMin);
  157. }
  158. UVec4 Vec3::sEquals(Vec3Arg inV1, Vec3Arg inV2)
  159. {
  160. #if defined(JPH_USE_SSE)
  161. return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
  162. #elif defined(JPH_USE_NEON)
  163. return vceqq_f32(inV1.mValue, inV2.mValue);
  164. #else
  165. uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
  166. return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
  167. inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
  168. z,
  169. z);
  170. #endif
  171. }
  172. UVec4 Vec3::sLess(Vec3Arg inV1, Vec3Arg inV2)
  173. {
  174. #if defined(JPH_USE_SSE)
  175. return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
  176. #elif defined(JPH_USE_NEON)
  177. return vcltq_f32(inV1.mValue, inV2.mValue);
  178. #else
  179. uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
  180. return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
  181. inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
  182. z,
  183. z);
  184. #endif
  185. }
  186. UVec4 Vec3::sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
  187. {
  188. #if defined(JPH_USE_SSE)
  189. return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
  190. #elif defined(JPH_USE_NEON)
  191. return vcleq_f32(inV1.mValue, inV2.mValue);
  192. #else
  193. uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
  194. return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
  195. inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
  196. z,
  197. z);
  198. #endif
  199. }
  200. UVec4 Vec3::sGreater(Vec3Arg inV1, Vec3Arg inV2)
  201. {
  202. #if defined(JPH_USE_SSE)
  203. return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
  204. #elif defined(JPH_USE_NEON)
  205. return vcgtq_f32(inV1.mValue, inV2.mValue);
  206. #else
  207. uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
  208. return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
  209. inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
  210. z,
  211. z);
  212. #endif
  213. }
  214. UVec4 Vec3::sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
  215. {
  216. #if defined(JPH_USE_SSE)
  217. return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
  218. #elif defined(JPH_USE_NEON)
  219. return vcgeq_f32(inV1.mValue, inV2.mValue);
  220. #else
  221. uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
  222. return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
  223. inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
  224. z,
  225. z);
  226. #endif
  227. }
  228. Vec3 Vec3::sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
  229. {
  230. #if defined(JPH_USE_SSE)
  231. #ifdef JPH_USE_FMADD
  232. return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
  233. #else
  234. return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
  235. #endif
  236. #elif defined(JPH_USE_NEON)
  237. return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
  238. #else
  239. return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
  240. inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
  241. inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
  242. #endif
  243. }
  244. Vec3 Vec3::sSelect(Vec3Arg inV1, Vec3Arg inV2, UVec4Arg inControl)
  245. {
  246. #if defined(JPH_USE_SSE4_1)
  247. Type v = _mm_blendv_ps(inV1.mValue, inV2.mValue, _mm_castsi128_ps(inControl.mValue));
  248. return sFixW(v);
  249. #elif defined(JPH_USE_NEON)
  250. Type v = vbslq_f32(vshrq_n_s32(inControl.mValue, 31), inV2.mValue, inV1.mValue);
  251. return sFixW(v);
  252. #else
  253. Vec3 result;
  254. for (int i = 0; i < 3; i++)
  255. result.mF32[i] = inControl.mU32[i] ? inV2.mF32[i] : inV1.mF32[i];
  256. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  257. result.mF32[3] = result.mF32[2];
  258. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  259. return result;
  260. #endif
  261. }
  262. Vec3 Vec3::sOr(Vec3Arg inV1, Vec3Arg inV2)
  263. {
  264. #if defined(JPH_USE_SSE)
  265. return _mm_or_ps(inV1.mValue, inV2.mValue);
  266. #elif defined(JPH_USE_NEON)
  267. return vorrq_s32(inV1.mValue, inV2.mValue);
  268. #else
  269. return Vec3(UVec4::sOr(inV1.ReinterpretAsInt(), inV2.ReinterpretAsInt()).ReinterpretAsFloat());
  270. #endif
  271. }
  272. Vec3 Vec3::sXor(Vec3Arg inV1, Vec3Arg inV2)
  273. {
  274. #if defined(JPH_USE_SSE)
  275. return _mm_xor_ps(inV1.mValue, inV2.mValue);
  276. #elif defined(JPH_USE_NEON)
  277. return veorq_s32(inV1.mValue, inV2.mValue);
  278. #else
  279. return Vec3(UVec4::sXor(inV1.ReinterpretAsInt(), inV2.ReinterpretAsInt()).ReinterpretAsFloat());
  280. #endif
  281. }
  282. Vec3 Vec3::sAnd(Vec3Arg inV1, Vec3Arg inV2)
  283. {
  284. #if defined(JPH_USE_SSE)
  285. return _mm_and_ps(inV1.mValue, inV2.mValue);
  286. #elif defined(JPH_USE_NEON)
  287. return vandq_s32(inV1.mValue, inV2.mValue);
  288. #else
  289. return Vec3(UVec4::sAnd(inV1.ReinterpretAsInt(), inV2.ReinterpretAsInt()).ReinterpretAsFloat());
  290. #endif
  291. }
  292. Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
  293. {
  294. Vec4 s, c;
  295. Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
  296. return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
  297. }
  298. template <class Random>
  299. Vec3 Vec3::sRandom(Random &inRandom)
  300. {
  301. std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
  302. float theta = JPH_PI * zero_to_one(inRandom);
  303. float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
  304. return sUnitSpherical(theta, phi);
  305. }
  306. bool Vec3::operator == (Vec3Arg inV2) const
  307. {
  308. return sEquals(*this, inV2).TestAllXYZTrue();
  309. }
  310. bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
  311. {
  312. return (inV2 - *this).LengthSq() <= inMaxDistSq;
  313. }
  314. bool Vec3::IsNearZero(float inMaxDistSq) const
  315. {
  316. return LengthSq() <= inMaxDistSq;
  317. }
  318. Vec3 Vec3::operator * (Vec3Arg inV2) const
  319. {
  320. #if defined(JPH_USE_SSE)
  321. return _mm_mul_ps(mValue, inV2.mValue);
  322. #elif defined(JPH_USE_NEON)
  323. return vmulq_f32(mValue, inV2.mValue);
  324. #else
  325. return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
  326. #endif
  327. }
  328. Vec3 Vec3::operator * (float inV2) const
  329. {
  330. #if defined(JPH_USE_SSE)
  331. return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
  332. #elif defined(JPH_USE_NEON)
  333. return vmulq_n_f32(mValue, inV2);
  334. #else
  335. return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
  336. #endif
  337. }
  338. Vec3 operator * (float inV1, Vec3Arg inV2)
  339. {
  340. #if defined(JPH_USE_SSE)
  341. return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
  342. #elif defined(JPH_USE_NEON)
  343. return vmulq_n_f32(inV2.mValue, inV1);
  344. #else
  345. return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
  346. #endif
  347. }
  348. Vec3 Vec3::operator / (float inV2) const
  349. {
  350. #if defined(JPH_USE_SSE)
  351. return _mm_div_ps(mValue, _mm_set1_ps(inV2));
  352. #elif defined(JPH_USE_NEON)
  353. return vdivq_f32(mValue, vdupq_n_f32(inV2));
  354. #else
  355. return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
  356. #endif
  357. }
  358. Vec3 &Vec3::operator *= (float inV2)
  359. {
  360. #if defined(JPH_USE_SSE)
  361. mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
  362. #elif defined(JPH_USE_NEON)
  363. mValue = vmulq_n_f32(mValue, inV2);
  364. #else
  365. for (int i = 0; i < 3; ++i)
  366. mF32[i] *= inV2;
  367. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  368. mF32[3] = mF32[2];
  369. #endif
  370. #endif
  371. return *this;
  372. }
  373. Vec3 &Vec3::operator *= (Vec3Arg inV2)
  374. {
  375. #if defined(JPH_USE_SSE)
  376. mValue = _mm_mul_ps(mValue, inV2.mValue);
  377. #elif defined(JPH_USE_NEON)
  378. mValue = vmulq_f32(mValue, inV2.mValue);
  379. #else
  380. for (int i = 0; i < 3; ++i)
  381. mF32[i] *= inV2.mF32[i];
  382. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  383. mF32[3] = mF32[2];
  384. #endif
  385. #endif
  386. return *this;
  387. }
  388. Vec3 &Vec3::operator /= (float inV2)
  389. {
  390. #if defined(JPH_USE_SSE)
  391. mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
  392. #elif defined(JPH_USE_NEON)
  393. mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
  394. #else
  395. for (int i = 0; i < 3; ++i)
  396. mF32[i] /= inV2;
  397. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  398. mF32[3] = mF32[2];
  399. #endif
  400. #endif
  401. return *this;
  402. }
  403. Vec3 Vec3::operator + (Vec3Arg inV2) const
  404. {
  405. #if defined(JPH_USE_SSE)
  406. return _mm_add_ps(mValue, inV2.mValue);
  407. #elif defined(JPH_USE_NEON)
  408. return vaddq_f32(mValue, inV2.mValue);
  409. #else
  410. return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
  411. #endif
  412. }
  413. Vec3 &Vec3::operator += (Vec3Arg inV2)
  414. {
  415. #if defined(JPH_USE_SSE)
  416. mValue = _mm_add_ps(mValue, inV2.mValue);
  417. #elif defined(JPH_USE_NEON)
  418. mValue = vaddq_f32(mValue, inV2.mValue);
  419. #else
  420. for (int i = 0; i < 3; ++i)
  421. mF32[i] += inV2.mF32[i];
  422. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  423. mF32[3] = mF32[2];
  424. #endif
  425. #endif
  426. return *this;
  427. }
  428. Vec3 Vec3::operator - () const
  429. {
  430. #if defined(JPH_USE_SSE)
  431. return _mm_sub_ps(_mm_setzero_ps(), mValue);
  432. #elif defined(JPH_USE_NEON)
  433. return vnegq_f32(mValue);
  434. #else
  435. return Vec3(-mF32[0], -mF32[1], -mF32[2]);
  436. #endif
  437. }
  438. Vec3 Vec3::operator - (Vec3Arg inV2) const
  439. {
  440. #if defined(JPH_USE_SSE)
  441. return _mm_sub_ps(mValue, inV2.mValue);
  442. #elif defined(JPH_USE_NEON)
  443. return vsubq_f32(mValue, inV2.mValue);
  444. #else
  445. return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
  446. #endif
  447. }
  448. Vec3 &Vec3::operator -= (Vec3Arg inV2)
  449. {
  450. #if defined(JPH_USE_SSE)
  451. mValue = _mm_sub_ps(mValue, inV2.mValue);
  452. #elif defined(JPH_USE_NEON)
  453. mValue = vsubq_f32(mValue, inV2.mValue);
  454. #else
  455. for (int i = 0; i < 3; ++i)
  456. mF32[i] -= inV2.mF32[i];
  457. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  458. mF32[3] = mF32[2];
  459. #endif
  460. #endif
  461. return *this;
  462. }
  463. Vec3 Vec3::operator / (Vec3Arg inV2) const
  464. {
  465. inV2.CheckW(); // Check W equals Z to avoid div by zero
  466. #if defined(JPH_USE_SSE)
  467. return _mm_div_ps(mValue, inV2.mValue);
  468. #elif defined(JPH_USE_NEON)
  469. return vdivq_f32(mValue, inV2.mValue);
  470. #else
  471. return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
  472. #endif
  473. }
  474. Vec4 Vec3::SplatX() const
  475. {
  476. #if defined(JPH_USE_SSE)
  477. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
  478. #elif defined(JPH_USE_NEON)
  479. return vdupq_laneq_f32(mValue, 0);
  480. #else
  481. return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
  482. #endif
  483. }
  484. Vec4 Vec3::SplatY() const
  485. {
  486. #if defined(JPH_USE_SSE)
  487. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
  488. #elif defined(JPH_USE_NEON)
  489. return vdupq_laneq_f32(mValue, 1);
  490. #else
  491. return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
  492. #endif
  493. }
  494. Vec4 Vec3::SplatZ() const
  495. {
  496. #if defined(JPH_USE_SSE)
  497. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
  498. #elif defined(JPH_USE_NEON)
  499. return vdupq_laneq_f32(mValue, 2);
  500. #else
  501. return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
  502. #endif
  503. }
  504. int Vec3::GetLowestComponentIndex() const
  505. {
  506. return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
  507. }
  508. int Vec3::GetHighestComponentIndex() const
  509. {
  510. return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
  511. }
  512. Vec3 Vec3::Abs() const
  513. {
  514. #if defined(JPH_USE_AVX512)
  515. return _mm_range_ps(mValue, mValue, 0b1000);
  516. #elif defined(JPH_USE_SSE)
  517. return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
  518. #elif defined(JPH_USE_NEON)
  519. return vabsq_f32(mValue);
  520. #else
  521. return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
  522. #endif
  523. }
  524. Vec3 Vec3::Reciprocal() const
  525. {
  526. return sReplicate(1.0f) / mValue;
  527. }
  528. Vec3 Vec3::Cross(Vec3Arg inV2) const
  529. {
  530. #if defined(JPH_USE_SSE)
  531. Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  532. t1 = _mm_mul_ps(t1, mValue);
  533. Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  534. t2 = _mm_mul_ps(t2, inV2.mValue);
  535. Type t3 = _mm_sub_ps(t1, t2);
  536. return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  537. #elif defined(JPH_USE_NEON)
  538. Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
  539. t1 = vmulq_f32(t1, mValue);
  540. Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
  541. t2 = vmulq_f32(t2, inV2.mValue);
  542. Type t3 = vsubq_f32(t1, t2);
  543. return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
  544. #else
  545. return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
  546. mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
  547. mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
  548. #endif
  549. }
  550. Vec3 Vec3::DotV(Vec3Arg inV2) const
  551. {
  552. #if defined(JPH_USE_SSE4_1)
  553. return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
  554. #elif defined(JPH_USE_NEON)
  555. float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
  556. mul = vsetq_lane_f32(0, mul, 3);
  557. return vdupq_n_f32(vaddvq_f32(mul));
  558. #else
  559. float dot = 0.0f;
  560. for (int i = 0; i < 3; i++)
  561. dot += mF32[i] * inV2.mF32[i];
  562. return Vec3::sReplicate(dot);
  563. #endif
  564. }
  565. Vec4 Vec3::DotV4(Vec3Arg inV2) const
  566. {
  567. #if defined(JPH_USE_SSE4_1)
  568. return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
  569. #elif defined(JPH_USE_NEON)
  570. float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
  571. mul = vsetq_lane_f32(0, mul, 3);
  572. return vdupq_n_f32(vaddvq_f32(mul));
  573. #else
  574. float dot = 0.0f;
  575. for (int i = 0; i < 3; i++)
  576. dot += mF32[i] * inV2.mF32[i];
  577. return Vec4::sReplicate(dot);
  578. #endif
  579. }
  580. float Vec3::Dot(Vec3Arg inV2) const
  581. {
  582. #if defined(JPH_USE_SSE4_1)
  583. return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
  584. #elif defined(JPH_USE_NEON)
  585. float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
  586. mul = vsetq_lane_f32(0, mul, 3);
  587. return vaddvq_f32(mul);
  588. #else
  589. float dot = 0.0f;
  590. for (int i = 0; i < 3; i++)
  591. dot += mF32[i] * inV2.mF32[i];
  592. return dot;
  593. #endif
  594. }
  595. float Vec3::LengthSq() const
  596. {
  597. #if defined(JPH_USE_SSE4_1)
  598. return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
  599. #elif defined(JPH_USE_NEON)
  600. float32x4_t mul = vmulq_f32(mValue, mValue);
  601. mul = vsetq_lane_f32(0, mul, 3);
  602. return vaddvq_f32(mul);
  603. #else
  604. float len_sq = 0.0f;
  605. for (int i = 0; i < 3; i++)
  606. len_sq += mF32[i] * mF32[i];
  607. return len_sq;
  608. #endif
  609. }
  610. float Vec3::Length() const
  611. {
  612. #if defined(JPH_USE_SSE4_1)
  613. return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
  614. #elif defined(JPH_USE_NEON)
  615. float32x4_t mul = vmulq_f32(mValue, mValue);
  616. mul = vsetq_lane_f32(0, mul, 3);
  617. float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
  618. return vget_lane_f32(vsqrt_f32(sum), 0);
  619. #else
  620. return sqrt(LengthSq());
  621. #endif
  622. }
  623. Vec3 Vec3::Sqrt() const
  624. {
  625. #if defined(JPH_USE_SSE)
  626. return _mm_sqrt_ps(mValue);
  627. #elif defined(JPH_USE_NEON)
  628. return vsqrtq_f32(mValue);
  629. #else
  630. return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
  631. #endif
  632. }
  633. Vec3 Vec3::Normalized() const
  634. {
  635. #if defined(JPH_USE_SSE4_1)
  636. return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
  637. #elif defined(JPH_USE_NEON)
  638. float32x4_t mul = vmulq_f32(mValue, mValue);
  639. mul = vsetq_lane_f32(0, mul, 3);
  640. float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
  641. return vdivq_f32(mValue, vsqrtq_f32(sum));
  642. #else
  643. return *this / Length();
  644. #endif
  645. }
  646. Vec3 Vec3::NormalizedOr(Vec3Arg inZeroValue) const
  647. {
  648. #if defined(JPH_USE_SSE4_1)
  649. Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
  650. Type is_zero = _mm_cmpeq_ps(len_sq, _mm_setzero_ps());
  651. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  652. if (_mm_movemask_ps(is_zero) == 0xf)
  653. return inZeroValue;
  654. else
  655. return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
  656. #else
  657. return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
  658. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  659. #elif defined(JPH_USE_NEON)
  660. float32x4_t mul = vmulq_f32(mValue, mValue);
  661. mul = vsetq_lane_f32(0, mul, 3);
  662. float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
  663. float32x4_t len = vsqrtq_f32(sum);
  664. float32x4_t is_zero = vceqq_f32(len, vdupq_n_f32(0));
  665. return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, len));
  666. #else
  667. float len_sq = LengthSq();
  668. if (len_sq == 0.0f)
  669. return inZeroValue;
  670. else
  671. return *this / sqrt(len_sq);
  672. #endif
  673. }
  674. bool Vec3::IsNormalized(float inTolerance) const
  675. {
  676. return abs(LengthSq() - 1.0f) <= inTolerance;
  677. }
  678. bool Vec3::IsNaN() const
  679. {
  680. #if defined(JPH_USE_AVX512)
  681. return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
  682. #elif defined(JPH_USE_SSE)
  683. return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
  684. #elif defined(JPH_USE_NEON)
  685. uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
  686. uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
  687. return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
  688. #else
  689. return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
  690. #endif
  691. }
  692. void Vec3::StoreFloat3(Float3 *outV) const
  693. {
  694. #if defined(JPH_USE_SSE)
  695. _mm_store_ss(&outV->x, mValue);
  696. Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
  697. _mm_store_ss(&outV->y, t.mValue);
  698. t = t.Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_UNUSED>();
  699. _mm_store_ss(&outV->z, t.mValue);
  700. #elif defined(JPH_USE_NEON)
  701. float32x2_t xy = vget_low_f32(mValue);
  702. vst1_f32(&outV->x, xy);
  703. vst1q_lane_f32(&outV->z, mValue, 2);
  704. #else
  705. outV->x = mF32[0];
  706. outV->y = mF32[1];
  707. outV->z = mF32[2];
  708. #endif
  709. }
  710. UVec4 Vec3::ToInt() const
  711. {
  712. #if defined(JPH_USE_SSE)
  713. return _mm_cvttps_epi32(mValue);
  714. #elif defined(JPH_USE_NEON)
  715. return vcvtq_u32_f32(mValue);
  716. #else
  717. return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
  718. #endif
  719. }
  720. UVec4 Vec3::ReinterpretAsInt() const
  721. {
  722. #if defined(JPH_USE_SSE)
  723. return UVec4(_mm_castps_si128(mValue));
  724. #elif defined(JPH_USE_NEON)
  725. return vreinterpretq_u32_f32(mValue);
  726. #else
  727. return *reinterpret_cast<const UVec4 *>(this);
  728. #endif
  729. }
  730. float Vec3::ReduceMin() const
  731. {
  732. Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
  733. v = sMin(v, v.Swizzle<SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
  734. return v.GetX();
  735. }
  736. float Vec3::ReduceMax() const
  737. {
  738. Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
  739. v = sMax(v, v.Swizzle<SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
  740. return v.GetX();
  741. }
  742. Vec3 Vec3::GetNormalizedPerpendicular() const
  743. {
  744. if (abs(mF32[0]) > abs(mF32[1]))
  745. {
  746. float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
  747. return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
  748. }
  749. else
  750. {
  751. float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
  752. return Vec3(0.0f, mF32[2], -mF32[1]) / len;
  753. }
  754. }
  755. Vec3 Vec3::GetSign() const
  756. {
  757. #if defined(JPH_USE_AVX512)
  758. return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
  759. #elif defined(JPH_USE_SSE)
  760. Type minus_one = _mm_set1_ps(-1.0f);
  761. Type one = _mm_set1_ps(1.0f);
  762. return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
  763. #elif defined(JPH_USE_NEON)
  764. Type minus_one = vdupq_n_f32(-1.0f);
  765. Type one = vdupq_n_f32(1.0f);
  766. return vorrq_s32(vandq_s32(mValue, minus_one), one);
  767. #else
  768. return Vec3(signbit(mF32[0])? -1.0f : 1.0f,
  769. signbit(mF32[1])? -1.0f : 1.0f,
  770. signbit(mF32[2])? -1.0f : 1.0f);
  771. #endif
  772. }
  773. JPH_NAMESPACE_END