Vec3.inl 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844
  1. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  2. // SPDX-License-Identifier: MIT
  3. #include <Jolt/Math/Vec4.h>
  4. #include <Jolt/Math/UVec4.h>
  5. #include <Jolt/Core/HashCombine.h>
  6. JPH_SUPPRESS_WARNINGS_STD_BEGIN
  7. #include <random>
  8. JPH_SUPPRESS_WARNINGS_STD_END
  9. // Create a std::hash for Vec3
  10. JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
  11. JPH_NAMESPACE_BEGIN
  12. void Vec3::CheckW() const
  13. {
  14. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  15. // Avoid asserts when both components are NaN
  16. JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
  17. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  18. }
  19. JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
  20. {
  21. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  22. #if defined(JPH_USE_SSE)
  23. return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
  24. #elif defined(JPH_USE_NEON)
  25. return JPH_NEON_SHUFFLE_F32x4(inValue, inValue, 0, 1, 2, 2);
  26. #else
  27. Type value;
  28. value.mData[0] = inValue.mData[0];
  29. value.mData[1] = inValue.mData[1];
  30. value.mData[2] = inValue.mData[2];
  31. value.mData[3] = inValue.mData[2];
  32. return value;
  33. #endif
  34. #else
  35. return inValue;
  36. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  37. }
  38. Vec3::Vec3(Vec4Arg inRHS) :
  39. mValue(sFixW(inRHS.mValue))
  40. {
  41. }
  42. Vec3::Vec3(const Float3 &inV)
  43. {
  44. #if defined(JPH_USE_SSE)
  45. Type x = _mm_load_ss(&inV.x);
  46. Type y = _mm_load_ss(&inV.y);
  47. Type z = _mm_load_ss(&inV.z);
  48. Type xy = _mm_unpacklo_ps(x, y);
  49. mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
  50. #elif defined(JPH_USE_NEON)
  51. float32x2_t xy = vld1_f32(&inV.x);
  52. float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
  53. mValue = vcombine_f32(xy, zz);
  54. #else
  55. mF32[0] = inV[0];
  56. mF32[1] = inV[1];
  57. mF32[2] = inV[2];
  58. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  59. mF32[3] = inV[2];
  60. #endif
  61. #endif
  62. }
  63. Vec3::Vec3(float inX, float inY, float inZ)
  64. {
  65. #if defined(JPH_USE_SSE)
  66. mValue = _mm_set_ps(inZ, inZ, inY, inX);
  67. #elif defined(JPH_USE_NEON)
  68. uint32x2_t xy = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32 *>(&inX)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inY)) << 32));
  69. uint32x2_t zz = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32* >(&inZ)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inZ)) << 32));
  70. mValue = vcombine_f32(xy, zz);
  71. #else
  72. mF32[0] = inX;
  73. mF32[1] = inY;
  74. mF32[2] = inZ;
  75. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  76. mF32[3] = inZ;
  77. #endif
  78. #endif
  79. }
  80. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
  81. Vec3 Vec3::Swizzle() const
  82. {
  83. static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
  84. static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
  85. static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
  86. #if defined(JPH_USE_SSE)
  87. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
  88. #elif defined(JPH_USE_NEON)
  89. return JPH_NEON_SHUFFLE_F32x4(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
  90. #else
  91. return Vec3(mF32[SwizzleX], mF32[SwizzleY], mF32[SwizzleZ]);
  92. #endif
  93. }
  94. Vec3 Vec3::sZero()
  95. {
  96. #if defined(JPH_USE_SSE)
  97. return _mm_setzero_ps();
  98. #elif defined(JPH_USE_NEON)
  99. return vdupq_n_f32(0);
  100. #else
  101. return Vec3(0, 0, 0);
  102. #endif
  103. }
  104. Vec3 Vec3::sReplicate(float inV)
  105. {
  106. #if defined(JPH_USE_SSE)
  107. return _mm_set1_ps(inV);
  108. #elif defined(JPH_USE_NEON)
  109. return vdupq_n_f32(inV);
  110. #else
  111. return Vec3(inV, inV, inV);
  112. #endif
  113. }
  114. Vec3 Vec3::sNaN()
  115. {
  116. return sReplicate(numeric_limits<float>::quiet_NaN());
  117. }
  118. Vec3 Vec3::sLoadFloat3Unsafe(const Float3 &inV)
  119. {
  120. #if defined(JPH_USE_SSE)
  121. Type v = _mm_loadu_ps(&inV.x);
  122. #elif defined(JPH_USE_NEON)
  123. Type v = vld1q_f32(&inV.x);
  124. #else
  125. Type v = { inV.x, inV.y, inV.z };
  126. #endif
  127. return sFixW(v);
  128. }
  129. Vec3 Vec3::sMin(Vec3Arg inV1, Vec3Arg inV2)
  130. {
  131. #if defined(JPH_USE_SSE)
  132. return _mm_min_ps(inV1.mValue, inV2.mValue);
  133. #elif defined(JPH_USE_NEON)
  134. return vminq_f32(inV1.mValue, inV2.mValue);
  135. #else
  136. return Vec3(min(inV1.mF32[0], inV2.mF32[0]),
  137. min(inV1.mF32[1], inV2.mF32[1]),
  138. min(inV1.mF32[2], inV2.mF32[2]));
  139. #endif
  140. }
  141. Vec3 Vec3::sMax(Vec3Arg inV1, Vec3Arg inV2)
  142. {
  143. #if defined(JPH_USE_SSE)
  144. return _mm_max_ps(inV1.mValue, inV2.mValue);
  145. #elif defined(JPH_USE_NEON)
  146. return vmaxq_f32(inV1.mValue, inV2.mValue);
  147. #else
  148. return Vec3(max(inV1.mF32[0], inV2.mF32[0]),
  149. max(inV1.mF32[1], inV2.mF32[1]),
  150. max(inV1.mF32[2], inV2.mF32[2]));
  151. #endif
  152. }
  153. Vec3 Vec3::sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
  154. {
  155. return sMax(sMin(inV, inMax), inMin);
  156. }
  157. UVec4 Vec3::sEquals(Vec3Arg inV1, Vec3Arg inV2)
  158. {
  159. #if defined(JPH_USE_SSE)
  160. return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
  161. #elif defined(JPH_USE_NEON)
  162. return vceqq_f32(inV1.mValue, inV2.mValue);
  163. #else
  164. uint32 z = inV1.mF32[2] == inV2.mF32[2]? 0xffffffffu : 0;
  165. return UVec4(inV1.mF32[0] == inV2.mF32[0]? 0xffffffffu : 0,
  166. inV1.mF32[1] == inV2.mF32[1]? 0xffffffffu : 0,
  167. z,
  168. z);
  169. #endif
  170. }
  171. UVec4 Vec3::sLess(Vec3Arg inV1, Vec3Arg inV2)
  172. {
  173. #if defined(JPH_USE_SSE)
  174. return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
  175. #elif defined(JPH_USE_NEON)
  176. return vcltq_f32(inV1.mValue, inV2.mValue);
  177. #else
  178. uint32 z = inV1.mF32[2] < inV2.mF32[2]? 0xffffffffu : 0;
  179. return UVec4(inV1.mF32[0] < inV2.mF32[0]? 0xffffffffu : 0,
  180. inV1.mF32[1] < inV2.mF32[1]? 0xffffffffu : 0,
  181. z,
  182. z);
  183. #endif
  184. }
  185. UVec4 Vec3::sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
  186. {
  187. #if defined(JPH_USE_SSE)
  188. return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
  189. #elif defined(JPH_USE_NEON)
  190. return vcleq_f32(inV1.mValue, inV2.mValue);
  191. #else
  192. uint32 z = inV1.mF32[2] <= inV2.mF32[2]? 0xffffffffu : 0;
  193. return UVec4(inV1.mF32[0] <= inV2.mF32[0]? 0xffffffffu : 0,
  194. inV1.mF32[1] <= inV2.mF32[1]? 0xffffffffu : 0,
  195. z,
  196. z);
  197. #endif
  198. }
  199. UVec4 Vec3::sGreater(Vec3Arg inV1, Vec3Arg inV2)
  200. {
  201. #if defined(JPH_USE_SSE)
  202. return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
  203. #elif defined(JPH_USE_NEON)
  204. return vcgtq_f32(inV1.mValue, inV2.mValue);
  205. #else
  206. uint32 z = inV1.mF32[2] > inV2.mF32[2]? 0xffffffffu : 0;
  207. return UVec4(inV1.mF32[0] > inV2.mF32[0]? 0xffffffffu : 0,
  208. inV1.mF32[1] > inV2.mF32[1]? 0xffffffffu : 0,
  209. z,
  210. z);
  211. #endif
  212. }
  213. UVec4 Vec3::sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
  214. {
  215. #if defined(JPH_USE_SSE)
  216. return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
  217. #elif defined(JPH_USE_NEON)
  218. return vcgeq_f32(inV1.mValue, inV2.mValue);
  219. #else
  220. uint32 z = inV1.mF32[2] >= inV2.mF32[2]? 0xffffffffu : 0;
  221. return UVec4(inV1.mF32[0] >= inV2.mF32[0]? 0xffffffffu : 0,
  222. inV1.mF32[1] >= inV2.mF32[1]? 0xffffffffu : 0,
  223. z,
  224. z);
  225. #endif
  226. }
  227. Vec3 Vec3::sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
  228. {
  229. #if defined(JPH_USE_SSE)
  230. #ifdef JPH_USE_FMADD
  231. return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
  232. #else
  233. return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
  234. #endif
  235. #elif defined(JPH_USE_NEON)
  236. return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
  237. #else
  238. return Vec3(inMul1.mF32[0] * inMul2.mF32[0] + inAdd.mF32[0],
  239. inMul1.mF32[1] * inMul2.mF32[1] + inAdd.mF32[1],
  240. inMul1.mF32[2] * inMul2.mF32[2] + inAdd.mF32[2]);
  241. #endif
  242. }
  243. Vec3 Vec3::sSelect(Vec3Arg inV1, Vec3Arg inV2, UVec4Arg inControl)
  244. {
  245. #if defined(JPH_USE_SSE4_1)
  246. Type v = _mm_blendv_ps(inV1.mValue, inV2.mValue, _mm_castsi128_ps(inControl.mValue));
  247. return sFixW(v);
  248. #elif defined(JPH_USE_NEON)
  249. Type v = vbslq_f32(vshrq_n_s32(inControl.mValue, 31), inV2.mValue, inV1.mValue);
  250. return sFixW(v);
  251. #else
  252. Vec3 result;
  253. for (int i = 0; i < 3; i++)
  254. result.mF32[i] = inControl.mU32[i] ? inV2.mF32[i] : inV1.mF32[i];
  255. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  256. result.mF32[3] = result.mF32[2];
  257. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  258. return result;
  259. #endif
  260. }
  261. Vec3 Vec3::sOr(Vec3Arg inV1, Vec3Arg inV2)
  262. {
  263. #if defined(JPH_USE_SSE)
  264. return _mm_or_ps(inV1.mValue, inV2.mValue);
  265. #elif defined(JPH_USE_NEON)
  266. return vorrq_s32(inV1.mValue, inV2.mValue);
  267. #else
  268. return Vec3(UVec4::sOr(inV1.ReinterpretAsInt(), inV2.ReinterpretAsInt()).ReinterpretAsFloat());
  269. #endif
  270. }
  271. Vec3 Vec3::sXor(Vec3Arg inV1, Vec3Arg inV2)
  272. {
  273. #if defined(JPH_USE_SSE)
  274. return _mm_xor_ps(inV1.mValue, inV2.mValue);
  275. #elif defined(JPH_USE_NEON)
  276. return veorq_s32(inV1.mValue, inV2.mValue);
  277. #else
  278. return Vec3(UVec4::sXor(inV1.ReinterpretAsInt(), inV2.ReinterpretAsInt()).ReinterpretAsFloat());
  279. #endif
  280. }
  281. Vec3 Vec3::sAnd(Vec3Arg inV1, Vec3Arg inV2)
  282. {
  283. #if defined(JPH_USE_SSE)
  284. return _mm_and_ps(inV1.mValue, inV2.mValue);
  285. #elif defined(JPH_USE_NEON)
  286. return vandq_s32(inV1.mValue, inV2.mValue);
  287. #else
  288. return Vec3(UVec4::sAnd(inV1.ReinterpretAsInt(), inV2.ReinterpretAsInt()).ReinterpretAsFloat());
  289. #endif
  290. }
  291. Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
  292. {
  293. Vec4 s, c;
  294. Vec4(inTheta, inPhi, 0, 0).SinCos(s, c);
  295. return Vec3(s.GetX() * c.GetY(), s.GetX() * s.GetY(), c.GetX());
  296. }
  297. template <class Random>
  298. Vec3 Vec3::sRandom(Random &inRandom)
  299. {
  300. std::uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
  301. float theta = JPH_PI * zero_to_one(inRandom);
  302. float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
  303. return sUnitSpherical(theta, phi);
  304. }
  305. bool Vec3::operator == (Vec3Arg inV2) const
  306. {
  307. return sEquals(*this, inV2).TestAllXYZTrue();
  308. }
  309. bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
  310. {
  311. return (inV2 - *this).LengthSq() <= inMaxDistSq;
  312. }
  313. bool Vec3::IsNearZero(float inMaxDistSq) const
  314. {
  315. return LengthSq() <= inMaxDistSq;
  316. }
  317. Vec3 Vec3::operator * (Vec3Arg inV2) const
  318. {
  319. #if defined(JPH_USE_SSE)
  320. return _mm_mul_ps(mValue, inV2.mValue);
  321. #elif defined(JPH_USE_NEON)
  322. return vmulq_f32(mValue, inV2.mValue);
  323. #else
  324. return Vec3(mF32[0] * inV2.mF32[0], mF32[1] * inV2.mF32[1], mF32[2] * inV2.mF32[2]);
  325. #endif
  326. }
  327. Vec3 Vec3::operator * (float inV2) const
  328. {
  329. #if defined(JPH_USE_SSE)
  330. return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
  331. #elif defined(JPH_USE_NEON)
  332. return vmulq_n_f32(mValue, inV2);
  333. #else
  334. return Vec3(mF32[0] * inV2, mF32[1] * inV2, mF32[2] * inV2);
  335. #endif
  336. }
  337. Vec3 operator * (float inV1, Vec3Arg inV2)
  338. {
  339. #if defined(JPH_USE_SSE)
  340. return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
  341. #elif defined(JPH_USE_NEON)
  342. return vmulq_n_f32(inV2.mValue, inV1);
  343. #else
  344. return Vec3(inV1 * inV2.mF32[0], inV1 * inV2.mF32[1], inV1 * inV2.mF32[2]);
  345. #endif
  346. }
  347. Vec3 Vec3::operator / (float inV2) const
  348. {
  349. #if defined(JPH_USE_SSE)
  350. return _mm_div_ps(mValue, _mm_set1_ps(inV2));
  351. #elif defined(JPH_USE_NEON)
  352. return vdivq_f32(mValue, vdupq_n_f32(inV2));
  353. #else
  354. return Vec3(mF32[0] / inV2, mF32[1] / inV2, mF32[2] / inV2);
  355. #endif
  356. }
  357. Vec3 &Vec3::operator *= (float inV2)
  358. {
  359. #if defined(JPH_USE_SSE)
  360. mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
  361. #elif defined(JPH_USE_NEON)
  362. mValue = vmulq_n_f32(mValue, inV2);
  363. #else
  364. for (int i = 0; i < 3; ++i)
  365. mF32[i] *= inV2;
  366. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  367. mF32[3] = mF32[2];
  368. #endif
  369. #endif
  370. return *this;
  371. }
  372. Vec3 &Vec3::operator *= (Vec3Arg inV2)
  373. {
  374. #if defined(JPH_USE_SSE)
  375. mValue = _mm_mul_ps(mValue, inV2.mValue);
  376. #elif defined(JPH_USE_NEON)
  377. mValue = vmulq_f32(mValue, inV2.mValue);
  378. #else
  379. for (int i = 0; i < 3; ++i)
  380. mF32[i] *= inV2.mF32[i];
  381. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  382. mF32[3] = mF32[2];
  383. #endif
  384. #endif
  385. return *this;
  386. }
  387. Vec3 &Vec3::operator /= (float inV2)
  388. {
  389. #if defined(JPH_USE_SSE)
  390. mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
  391. #elif defined(JPH_USE_NEON)
  392. mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
  393. #else
  394. for (int i = 0; i < 3; ++i)
  395. mF32[i] /= inV2;
  396. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  397. mF32[3] = mF32[2];
  398. #endif
  399. #endif
  400. return *this;
  401. }
  402. Vec3 Vec3::operator + (Vec3Arg inV2) const
  403. {
  404. #if defined(JPH_USE_SSE)
  405. return _mm_add_ps(mValue, inV2.mValue);
  406. #elif defined(JPH_USE_NEON)
  407. return vaddq_f32(mValue, inV2.mValue);
  408. #else
  409. return Vec3(mF32[0] + inV2.mF32[0], mF32[1] + inV2.mF32[1], mF32[2] + inV2.mF32[2]);
  410. #endif
  411. }
  412. Vec3 &Vec3::operator += (Vec3Arg inV2)
  413. {
  414. #if defined(JPH_USE_SSE)
  415. mValue = _mm_add_ps(mValue, inV2.mValue);
  416. #elif defined(JPH_USE_NEON)
  417. mValue = vaddq_f32(mValue, inV2.mValue);
  418. #else
  419. for (int i = 0; i < 3; ++i)
  420. mF32[i] += inV2.mF32[i];
  421. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  422. mF32[3] = mF32[2];
  423. #endif
  424. #endif
  425. return *this;
  426. }
  427. Vec3 Vec3::operator - () const
  428. {
  429. #if defined(JPH_USE_SSE)
  430. return _mm_sub_ps(_mm_setzero_ps(), mValue);
  431. #elif defined(JPH_USE_NEON)
  432. return vnegq_f32(mValue);
  433. #else
  434. return Vec3(-mF32[0], -mF32[1], -mF32[2]);
  435. #endif
  436. }
  437. Vec3 Vec3::operator - (Vec3Arg inV2) const
  438. {
  439. #if defined(JPH_USE_SSE)
  440. return _mm_sub_ps(mValue, inV2.mValue);
  441. #elif defined(JPH_USE_NEON)
  442. return vsubq_f32(mValue, inV2.mValue);
  443. #else
  444. return Vec3(mF32[0] - inV2.mF32[0], mF32[1] - inV2.mF32[1], mF32[2] - inV2.mF32[2]);
  445. #endif
  446. }
  447. Vec3 &Vec3::operator -= (Vec3Arg inV2)
  448. {
  449. #if defined(JPH_USE_SSE)
  450. mValue = _mm_sub_ps(mValue, inV2.mValue);
  451. #elif defined(JPH_USE_NEON)
  452. mValue = vsubq_f32(mValue, inV2.mValue);
  453. #else
  454. for (int i = 0; i < 3; ++i)
  455. mF32[i] -= inV2.mF32[i];
  456. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  457. mF32[3] = mF32[2];
  458. #endif
  459. #endif
  460. return *this;
  461. }
  462. Vec3 Vec3::operator / (Vec3Arg inV2) const
  463. {
  464. inV2.CheckW(); // Check W equals Z to avoid div by zero
  465. #if defined(JPH_USE_SSE)
  466. return _mm_div_ps(mValue, inV2.mValue);
  467. #elif defined(JPH_USE_NEON)
  468. return vdivq_f32(mValue, inV2.mValue);
  469. #else
  470. return Vec3(mF32[0] / inV2.mF32[0], mF32[1] / inV2.mF32[1], mF32[2] / inV2.mF32[2]);
  471. #endif
  472. }
  473. Vec4 Vec3::SplatX() const
  474. {
  475. #if defined(JPH_USE_SSE)
  476. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
  477. #elif defined(JPH_USE_NEON)
  478. return vdupq_laneq_f32(mValue, 0);
  479. #else
  480. return Vec4(mF32[0], mF32[0], mF32[0], mF32[0]);
  481. #endif
  482. }
  483. Vec4 Vec3::SplatY() const
  484. {
  485. #if defined(JPH_USE_SSE)
  486. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
  487. #elif defined(JPH_USE_NEON)
  488. return vdupq_laneq_f32(mValue, 1);
  489. #else
  490. return Vec4(mF32[1], mF32[1], mF32[1], mF32[1]);
  491. #endif
  492. }
  493. Vec4 Vec3::SplatZ() const
  494. {
  495. #if defined(JPH_USE_SSE)
  496. return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
  497. #elif defined(JPH_USE_NEON)
  498. return vdupq_laneq_f32(mValue, 2);
  499. #else
  500. return Vec4(mF32[2], mF32[2], mF32[2], mF32[2]);
  501. #endif
  502. }
  503. int Vec3::GetLowestComponentIndex() const
  504. {
  505. return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
  506. }
  507. int Vec3::GetHighestComponentIndex() const
  508. {
  509. return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
  510. }
  511. Vec3 Vec3::Abs() const
  512. {
  513. #if defined(JPH_USE_AVX512)
  514. return _mm_range_ps(mValue, mValue, 0b1000);
  515. #elif defined(JPH_USE_SSE)
  516. return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
  517. #elif defined(JPH_USE_NEON)
  518. return vabsq_f32(mValue);
  519. #else
  520. return Vec3(abs(mF32[0]), abs(mF32[1]), abs(mF32[2]));
  521. #endif
  522. }
  523. Vec3 Vec3::Reciprocal() const
  524. {
  525. return sReplicate(1.0f) / mValue;
  526. }
  527. Vec3 Vec3::Cross(Vec3Arg inV2) const
  528. {
  529. #if defined(JPH_USE_SSE)
  530. Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  531. t1 = _mm_mul_ps(t1, mValue);
  532. Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  533. t2 = _mm_mul_ps(t2, inV2.mValue);
  534. Type t3 = _mm_sub_ps(t1, t2);
  535. return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
  536. #elif defined(JPH_USE_NEON)
  537. Type t1 = JPH_NEON_SHUFFLE_F32x4(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
  538. t1 = vmulq_f32(t1, mValue);
  539. Type t2 = JPH_NEON_SHUFFLE_F32x4(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
  540. t2 = vmulq_f32(t2, inV2.mValue);
  541. Type t3 = vsubq_f32(t1, t2);
  542. return JPH_NEON_SHUFFLE_F32x4(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
  543. #else
  544. return Vec3(mF32[1] * inV2.mF32[2] - mF32[2] * inV2.mF32[1],
  545. mF32[2] * inV2.mF32[0] - mF32[0] * inV2.mF32[2],
  546. mF32[0] * inV2.mF32[1] - mF32[1] * inV2.mF32[0]);
  547. #endif
  548. }
  549. Vec3 Vec3::DotV(Vec3Arg inV2) const
  550. {
  551. #if defined(JPH_USE_SSE4_1)
  552. return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
  553. #elif defined(JPH_USE_NEON)
  554. float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
  555. mul = vsetq_lane_f32(0, mul, 3);
  556. return vdupq_n_f32(vaddvq_f32(mul));
  557. #else
  558. float dot = 0.0f;
  559. for (int i = 0; i < 3; i++)
  560. dot += mF32[i] * inV2.mF32[i];
  561. return Vec3::sReplicate(dot);
  562. #endif
  563. }
  564. Vec4 Vec3::DotV4(Vec3Arg inV2) const
  565. {
  566. #if defined(JPH_USE_SSE4_1)
  567. return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
  568. #elif defined(JPH_USE_NEON)
  569. float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
  570. mul = vsetq_lane_f32(0, mul, 3);
  571. return vdupq_n_f32(vaddvq_f32(mul));
  572. #else
  573. float dot = 0.0f;
  574. for (int i = 0; i < 3; i++)
  575. dot += mF32[i] * inV2.mF32[i];
  576. return Vec4::sReplicate(dot);
  577. #endif
  578. }
  579. float Vec3::Dot(Vec3Arg inV2) const
  580. {
  581. #if defined(JPH_USE_SSE4_1)
  582. return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
  583. #elif defined(JPH_USE_NEON)
  584. float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
  585. mul = vsetq_lane_f32(0, mul, 3);
  586. return vaddvq_f32(mul);
  587. #else
  588. float dot = 0.0f;
  589. for (int i = 0; i < 3; i++)
  590. dot += mF32[i] * inV2.mF32[i];
  591. return dot;
  592. #endif
  593. }
  594. float Vec3::LengthSq() const
  595. {
  596. #if defined(JPH_USE_SSE4_1)
  597. return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
  598. #elif defined(JPH_USE_NEON)
  599. float32x4_t mul = vmulq_f32(mValue, mValue);
  600. mul = vsetq_lane_f32(0, mul, 3);
  601. return vaddvq_f32(mul);
  602. #else
  603. float len_sq = 0.0f;
  604. for (int i = 0; i < 3; i++)
  605. len_sq += mF32[i] * mF32[i];
  606. return len_sq;
  607. #endif
  608. }
  609. float Vec3::Length() const
  610. {
  611. #if defined(JPH_USE_SSE4_1)
  612. return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
  613. #elif defined(JPH_USE_NEON)
  614. float32x4_t mul = vmulq_f32(mValue, mValue);
  615. mul = vsetq_lane_f32(0, mul, 3);
  616. float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
  617. return vget_lane_f32(vsqrt_f32(sum), 0);
  618. #else
  619. return sqrt(LengthSq());
  620. #endif
  621. }
  622. Vec3 Vec3::Sqrt() const
  623. {
  624. #if defined(JPH_USE_SSE)
  625. return _mm_sqrt_ps(mValue);
  626. #elif defined(JPH_USE_NEON)
  627. return vsqrtq_f32(mValue);
  628. #else
  629. return Vec3(sqrt(mF32[0]), sqrt(mF32[1]), sqrt(mF32[2]));
  630. #endif
  631. }
  632. Vec3 Vec3::Normalized() const
  633. {
  634. #if defined(JPH_USE_SSE4_1)
  635. return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
  636. #elif defined(JPH_USE_NEON)
  637. float32x4_t mul = vmulq_f32(mValue, mValue);
  638. mul = vsetq_lane_f32(0, mul, 3);
  639. float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
  640. return vdivq_f32(mValue, vsqrtq_f32(sum));
  641. #else
  642. return *this / Length();
  643. #endif
  644. }
  645. Vec3 Vec3::NormalizedOr(Vec3Arg inZeroValue) const
  646. {
  647. #if defined(JPH_USE_SSE4_1)
  648. Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
  649. Type is_zero = _mm_cmpeq_ps(len_sq, _mm_setzero_ps());
  650. #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  651. if (_mm_movemask_ps(is_zero) == 0xf)
  652. return inZeroValue;
  653. else
  654. return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
  655. #else
  656. return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
  657. #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
  658. #elif defined(JPH_USE_NEON)
  659. float32x4_t mul = vmulq_f32(mValue, mValue);
  660. mul = vsetq_lane_f32(0, mul, 3);
  661. float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
  662. float32x4_t len = vsqrtq_f32(sum);
  663. float32x4_t is_zero = vceqq_f32(len, vdupq_n_f32(0));
  664. return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, len));
  665. #else
  666. float len_sq = LengthSq();
  667. if (len_sq == 0.0f)
  668. return inZeroValue;
  669. else
  670. return *this / sqrt(len_sq);
  671. #endif
  672. }
  673. bool Vec3::IsNormalized(float inTolerance) const
  674. {
  675. return abs(LengthSq() - 1.0f) <= inTolerance;
  676. }
  677. bool Vec3::IsNaN() const
  678. {
  679. #if defined(JPH_USE_AVX512)
  680. return (_mm_fpclass_ps_mask(mValue, 0b10000001) & 0x7) != 0;
  681. #elif defined(JPH_USE_SSE)
  682. return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
  683. #elif defined(JPH_USE_NEON)
  684. uint32x4_t mask = JPH_NEON_UINT32x4(1, 1, 1, 0);
  685. uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
  686. return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
  687. #else
  688. return isnan(mF32[0]) || isnan(mF32[1]) || isnan(mF32[2]);
  689. #endif
  690. }
  691. void Vec3::StoreFloat3(Float3 *outV) const
  692. {
  693. #if defined(JPH_USE_SSE)
  694. _mm_store_ss(&outV->x, mValue);
  695. Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
  696. _mm_store_ss(&outV->y, t.mValue);
  697. t = t.Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_UNUSED>();
  698. _mm_store_ss(&outV->z, t.mValue);
  699. #elif defined(JPH_USE_NEON)
  700. float32x2_t xy = vget_low_f32(mValue);
  701. vst1_f32(&outV->x, xy);
  702. vst1q_lane_f32(&outV->z, mValue, 2);
  703. #else
  704. outV->x = mF32[0];
  705. outV->y = mF32[1];
  706. outV->z = mF32[2];
  707. #endif
  708. }
  709. UVec4 Vec3::ToInt() const
  710. {
  711. #if defined(JPH_USE_SSE)
  712. return _mm_cvttps_epi32(mValue);
  713. #elif defined(JPH_USE_NEON)
  714. return vcvtq_u32_f32(mValue);
  715. #else
  716. return UVec4(uint32(mF32[0]), uint32(mF32[1]), uint32(mF32[2]), uint32(mF32[3]));
  717. #endif
  718. }
  719. UVec4 Vec3::ReinterpretAsInt() const
  720. {
  721. #if defined(JPH_USE_SSE)
  722. return UVec4(_mm_castps_si128(mValue));
  723. #elif defined(JPH_USE_NEON)
  724. return vreinterpretq_u32_f32(mValue);
  725. #else
  726. return *reinterpret_cast<const UVec4 *>(this);
  727. #endif
  728. }
  729. float Vec3::ReduceMin() const
  730. {
  731. Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
  732. v = sMin(v, v.Swizzle<SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
  733. return v.GetX();
  734. }
  735. float Vec3::ReduceMax() const
  736. {
  737. Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
  738. v = sMax(v, v.Swizzle<SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
  739. return v.GetX();
  740. }
  741. Vec3 Vec3::GetNormalizedPerpendicular() const
  742. {
  743. if (abs(mF32[0]) > abs(mF32[1]))
  744. {
  745. float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
  746. return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
  747. }
  748. else
  749. {
  750. float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
  751. return Vec3(0.0f, mF32[2], -mF32[1]) / len;
  752. }
  753. }
  754. Vec3 Vec3::GetSign() const
  755. {
  756. #if defined(JPH_USE_AVX512)
  757. return _mm_fixupimm_ps(mValue, mValue, _mm_set1_epi32(0xA9A90A00), 0);
  758. #elif defined(JPH_USE_SSE)
  759. Type minus_one = _mm_set1_ps(-1.0f);
  760. Type one = _mm_set1_ps(1.0f);
  761. return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
  762. #elif defined(JPH_USE_NEON)
  763. Type minus_one = vdupq_n_f32(-1.0f);
  764. Type one = vdupq_n_f32(1.0f);
  765. return vorrq_s32(vandq_s32(mValue, minus_one), one);
  766. #else
  767. return Vec3(signbit(mF32[0])? -1.0f : 1.0f,
  768. signbit(mF32[1])? -1.0f : 1.0f,
  769. signbit(mF32[2])? -1.0f : 1.0f);
  770. #endif
  771. }
  772. JPH_NAMESPACE_END