b3Vector3.h 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345
  1. /*
  2. Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org
  3. This software is provided 'as-is', without any express or implied warranty.
  4. In no event will the authors be held liable for any damages arising from the use of this software.
  5. Permission is granted to anyone to use this software for any purpose,
  6. including commercial applications, and to alter it and redistribute it freely,
  7. subject to the following restrictions:
  8. 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  9. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  10. 3. This notice may not be removed or altered from any source distribution.
  11. */
  12. #ifndef B3_VECTOR3_H
  13. #define B3_VECTOR3_H
  14. //#include <stdint.h>
  15. #include "b3Scalar.h"
  16. #include "b3MinMax.h"
  17. #include "b3AlignedAllocator.h"
  18. #ifdef B3_USE_DOUBLE_PRECISION
  19. #define b3Vector3Data b3Vector3DoubleData
  20. #define b3Vector3DataName "b3Vector3DoubleData"
  21. #else
  22. #define b3Vector3Data b3Vector3FloatData
  23. #define b3Vector3DataName "b3Vector3FloatData"
  24. #endif //B3_USE_DOUBLE_PRECISION
  25. #if defined B3_USE_SSE
  26. //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
  27. #ifdef _MSC_VER
  28. #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
  29. #endif
  30. #define B3_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
  31. //#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
  32. #define b3_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
  33. #define b3_splat3_ps( _a, _i ) b3_pshufd_ps((_a), B3_SHUFFLE(_i,_i,_i, 3) )
  34. #define b3_splat_ps( _a, _i ) b3_pshufd_ps((_a), B3_SHUFFLE(_i,_i,_i,_i) )
  35. #define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
  36. #define b3vAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
  37. #define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
  38. #define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
  39. #define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
  40. #define b3vxyzMaskf b3vFFF0fMask
  41. #define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
  42. const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
  43. const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
  44. const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
  45. const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
  46. #endif
  47. #ifdef B3_USE_NEON
  48. const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
  49. const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
  50. const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
  51. const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
  52. #endif
  53. class b3Vector3;
  54. class b3Vector4;
  55. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  56. //#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
  57. inline b3Vector3 b3MakeVector3( b3SimdFloat4 v);
  58. inline b3Vector4 b3MakeVector4( b3SimdFloat4 vec);
  59. #endif
  60. inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z);
  61. inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w);
  62. inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w);
  63. /**@brief b3Vector3 can be used to represent 3D points and vectors.
  64. * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
  65. * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
  66. */
  67. B3_ATTRIBUTE_ALIGNED16(class) b3Vector3
  68. {
  69. public:
  70. #if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
  71. union {
  72. b3SimdFloat4 mVec128;
  73. float m_floats[4];
  74. struct {float x,y,z,w;};
  75. };
  76. #else
  77. union
  78. {
  79. float m_floats[4];
  80. struct {float x,y,z,w;};
  81. };
  82. #endif
  83. public:
  84. B3_DECLARE_ALIGNED_ALLOCATOR();
  85. #if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
  86. /*B3_FORCE_INLINE b3Vector3()
  87. {
  88. }
  89. */
  90. B3_FORCE_INLINE b3SimdFloat4 get128() const
  91. {
  92. return mVec128;
  93. }
  94. B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
  95. {
  96. mVec128 = v128;
  97. }
  98. #endif
  99. public:
  100. /**@brief Add a vector to this one
  101. * @param The vector to add to this one */
  102. B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
  103. {
  104. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  105. mVec128 = _mm_add_ps(mVec128, v.mVec128);
  106. #elif defined(B3_USE_NEON)
  107. mVec128 = vaddq_f32(mVec128, v.mVec128);
  108. #else
  109. m_floats[0] += v.m_floats[0];
  110. m_floats[1] += v.m_floats[1];
  111. m_floats[2] += v.m_floats[2];
  112. #endif
  113. return *this;
  114. }
  115. /**@brief Subtract a vector from this one
  116. * @param The vector to subtract */
  117. B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v)
  118. {
  119. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  120. mVec128 = _mm_sub_ps(mVec128, v.mVec128);
  121. #elif defined(B3_USE_NEON)
  122. mVec128 = vsubq_f32(mVec128, v.mVec128);
  123. #else
  124. m_floats[0] -= v.m_floats[0];
  125. m_floats[1] -= v.m_floats[1];
  126. m_floats[2] -= v.m_floats[2];
  127. #endif
  128. return *this;
  129. }
  130. /**@brief Scale the vector
  131. * @param s Scale factor */
  132. B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
  133. {
  134. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  135. __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
  136. vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
  137. mVec128 = _mm_mul_ps(mVec128, vs);
  138. #elif defined(B3_USE_NEON)
  139. mVec128 = vmulq_n_f32(mVec128, s);
  140. #else
  141. m_floats[0] *= s;
  142. m_floats[1] *= s;
  143. m_floats[2] *= s;
  144. #endif
  145. return *this;
  146. }
  147. /**@brief Inversely scale the vector
  148. * @param s Scale factor to divide by */
  149. B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s)
  150. {
  151. b3FullAssert(s != b3Scalar(0.0));
  152. #if 0 //defined(B3_USE_SSE_IN_API)
  153. // this code is not faster !
  154. __m128 vs = _mm_load_ss(&s);
  155. vs = _mm_div_ss(b3v1110, vs);
  156. vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
  157. mVec128 = _mm_mul_ps(mVec128, vs);
  158. return *this;
  159. #else
  160. return *this *= b3Scalar(1.0) / s;
  161. #endif
  162. }
  163. /**@brief Return the dot product
  164. * @param v The other vector in the dot product */
  165. B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
  166. {
  167. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  168. __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
  169. __m128 z = _mm_movehl_ps(vd, vd);
  170. __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
  171. vd = _mm_add_ss(vd, y);
  172. vd = _mm_add_ss(vd, z);
  173. return _mm_cvtss_f32(vd);
  174. #elif defined(B3_USE_NEON)
  175. float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
  176. float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
  177. x = vadd_f32(x, vget_high_f32(vd));
  178. return vget_lane_f32(x, 0);
  179. #else
  180. return m_floats[0] * v.m_floats[0] +
  181. m_floats[1] * v.m_floats[1] +
  182. m_floats[2] * v.m_floats[2];
  183. #endif
  184. }
  185. /**@brief Return the length of the vector squared */
  186. B3_FORCE_INLINE b3Scalar length2() const
  187. {
  188. return dot(*this);
  189. }
  190. /**@brief Return the length of the vector */
  191. B3_FORCE_INLINE b3Scalar length() const
  192. {
  193. return b3Sqrt(length2());
  194. }
  195. /**@brief Return the distance squared between the ends of this and another vector
  196. * This is symantically treating the vector like a point */
  197. B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
  198. /**@brief Return the distance between the ends of this and another vector
  199. * This is symantically treating the vector like a point */
  200. B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
  201. B3_FORCE_INLINE b3Vector3& safeNormalize()
  202. {
  203. b3Vector3 absVec = this->absolute();
  204. int maxIndex = absVec.maxAxis();
  205. if (absVec[maxIndex]>0)
  206. {
  207. *this /= absVec[maxIndex];
  208. return *this /= length();
  209. }
  210. setValue(1,0,0);
  211. return *this;
  212. }
  213. /**@brief Normalize this vector
  214. * x^2 + y^2 + z^2 = 1 */
  215. B3_FORCE_INLINE b3Vector3& normalize()
  216. {
  217. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  218. // dot product first
  219. __m128 vd = _mm_mul_ps(mVec128, mVec128);
  220. __m128 z = _mm_movehl_ps(vd, vd);
  221. __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
  222. vd = _mm_add_ss(vd, y);
  223. vd = _mm_add_ss(vd, z);
  224. #if 0
  225. vd = _mm_sqrt_ss(vd);
  226. vd = _mm_div_ss(b3v1110, vd);
  227. vd = b3_splat_ps(vd, 0x80);
  228. mVec128 = _mm_mul_ps(mVec128, vd);
  229. #else
  230. // NR step 1/sqrt(x) - vd is x, y is output
  231. y = _mm_rsqrt_ss(vd); // estimate
  232. // one step NR
  233. z = b3v1_5;
  234. vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5
  235. //x2 = vd;
  236. vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
  237. vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
  238. z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
  239. y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
  240. y = b3_splat_ps(y, 0x80);
  241. mVec128 = _mm_mul_ps(mVec128, y);
  242. #endif
  243. return *this;
  244. #else
  245. return *this /= length();
  246. #endif
  247. }
  248. /**@brief Return a normalized version of this vector */
  249. B3_FORCE_INLINE b3Vector3 normalized() const;
  250. /**@brief Return a rotated version of this vector
  251. * @param wAxis The axis to rotate about
  252. * @param angle The angle to rotate by */
  253. B3_FORCE_INLINE b3Vector3 rotate( const b3Vector3& wAxis, const b3Scalar angle ) const;
  254. /**@brief Return the angle between this and another vector
  255. * @param v The other vector */
  256. B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const
  257. {
  258. b3Scalar s = b3Sqrt(length2() * v.length2());
  259. b3FullAssert(s != b3Scalar(0.0));
  260. return b3Acos(dot(v) / s);
  261. }
  262. /**@brief Return a vector will the absolute values of each element */
  263. B3_FORCE_INLINE b3Vector3 absolute() const
  264. {
  265. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  266. return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
  267. #elif defined(B3_USE_NEON)
  268. return b3Vector3(vabsq_f32(mVec128));
  269. #else
  270. return b3MakeVector3(
  271. b3Fabs(m_floats[0]),
  272. b3Fabs(m_floats[1]),
  273. b3Fabs(m_floats[2]));
  274. #endif
  275. }
  276. /**@brief Return the cross product between this and another vector
  277. * @param v The other vector */
  278. B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
  279. {
  280. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  281. __m128 T, V;
  282. T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  283. V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  284. V = _mm_mul_ps(V, mVec128);
  285. T = _mm_mul_ps(T, v.mVec128);
  286. V = _mm_sub_ps(V, T);
  287. V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
  288. return b3MakeVector3(V);
  289. #elif defined(B3_USE_NEON)
  290. float32x4_t T, V;
  291. // form (Y, Z, X, _) of mVec128 and v.mVec128
  292. float32x2_t Tlow = vget_low_f32(mVec128);
  293. float32x2_t Vlow = vget_low_f32(v.mVec128);
  294. T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
  295. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
  296. V = vmulq_f32(V, mVec128);
  297. T = vmulq_f32(T, v.mVec128);
  298. V = vsubq_f32(V, T);
  299. Vlow = vget_low_f32(V);
  300. // form (Y, Z, X, _);
  301. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
  302. V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
  303. return b3Vector3(V);
  304. #else
  305. return b3MakeVector3(
  306. m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
  307. m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
  308. m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
  309. #endif
  310. }
  311. B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
  312. {
  313. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  314. // cross:
  315. __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  316. __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
  317. V = _mm_mul_ps(V, v1.mVec128);
  318. T = _mm_mul_ps(T, v2.mVec128);
  319. V = _mm_sub_ps(V, T);
  320. V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
  321. // dot:
  322. V = _mm_mul_ps(V, mVec128);
  323. __m128 z = _mm_movehl_ps(V, V);
  324. __m128 y = _mm_shuffle_ps(V, V, 0x55);
  325. V = _mm_add_ss(V, y);
  326. V = _mm_add_ss(V, z);
  327. return _mm_cvtss_f32(V);
  328. #elif defined(B3_USE_NEON)
  329. // cross:
  330. float32x4_t T, V;
  331. // form (Y, Z, X, _) of mVec128 and v.mVec128
  332. float32x2_t Tlow = vget_low_f32(v1.mVec128);
  333. float32x2_t Vlow = vget_low_f32(v2.mVec128);
  334. T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
  335. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
  336. V = vmulq_f32(V, v1.mVec128);
  337. T = vmulq_f32(T, v2.mVec128);
  338. V = vsubq_f32(V, T);
  339. Vlow = vget_low_f32(V);
  340. // form (Y, Z, X, _);
  341. V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
  342. // dot:
  343. V = vmulq_f32(mVec128, V);
  344. float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
  345. x = vadd_f32(x, vget_high_f32(V));
  346. return vget_lane_f32(x, 0);
  347. #else
  348. return
  349. m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
  350. m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
  351. m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
  352. #endif
  353. }
  354. /**@brief Return the axis with the smallest value
  355. * Note return values are 0,1,2 for x, y, or z */
  356. B3_FORCE_INLINE int minAxis() const
  357. {
  358. return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
  359. }
  360. /**@brief Return the axis with the largest value
  361. * Note return values are 0,1,2 for x, y, or z */
  362. B3_FORCE_INLINE int maxAxis() const
  363. {
  364. return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
  365. }
  366. B3_FORCE_INLINE int furthestAxis() const
  367. {
  368. return absolute().minAxis();
  369. }
  370. B3_FORCE_INLINE int closestAxis() const
  371. {
  372. return absolute().maxAxis();
  373. }
  374. B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
  375. {
  376. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  377. __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
  378. b3Scalar s = b3Scalar(1.0) - rt;
  379. __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
  380. vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
  381. __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
  382. vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
  383. __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
  384. __m128 tmp3 = _mm_add_ps(r0,r1);
  385. mVec128 = tmp3;
  386. #elif defined(B3_USE_NEON)
  387. float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
  388. vl = vmulq_n_f32(vl, rt);
  389. mVec128 = vaddq_f32(vl, v0.mVec128);
  390. #else
  391. b3Scalar s = b3Scalar(1.0) - rt;
  392. m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
  393. m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
  394. m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
  395. //don't do the unused w component
  396. // m_co[3] = s * v0[3] + rt * v1[3];
  397. #endif
  398. }
  399. /**@brief Return the linear interpolation between this and another vector
  400. * @param v The other vector
  401. * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
  402. B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const
  403. {
  404. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  405. __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
  406. vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
  407. __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
  408. vl = _mm_mul_ps(vl, vt);
  409. vl = _mm_add_ps(vl, mVec128);
  410. return b3MakeVector3(vl);
  411. #elif defined(B3_USE_NEON)
  412. float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
  413. vl = vmulq_n_f32(vl, t);
  414. vl = vaddq_f32(vl, mVec128);
  415. return b3Vector3(vl);
  416. #else
  417. return
  418. b3MakeVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
  419. m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
  420. m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
  421. #endif
  422. }
  423. /**@brief Elementwise multiply this vector by the other
  424. * @param v The other vector */
  425. B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
  426. {
  427. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  428. mVec128 = _mm_mul_ps(mVec128, v.mVec128);
  429. #elif defined(B3_USE_NEON)
  430. mVec128 = vmulq_f32(mVec128, v.mVec128);
  431. #else
  432. m_floats[0] *= v.m_floats[0];
  433. m_floats[1] *= v.m_floats[1];
  434. m_floats[2] *= v.m_floats[2];
  435. #endif
  436. return *this;
  437. }
  438. /**@brief Return the x value */
  439. B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
  440. /**@brief Return the y value */
  441. B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
  442. /**@brief Return the z value */
  443. B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
  444. /**@brief Return the w value */
  445. B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
  446. /**@brief Set the x value */
  447. B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x;};
  448. /**@brief Set the y value */
  449. B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y;};
  450. /**@brief Set the z value */
  451. B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z;};
  452. /**@brief Set the w value */
  453. B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w;};
  454. //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
  455. //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
  456. ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
  457. B3_FORCE_INLINE operator b3Scalar *() { return &m_floats[0]; }
  458. B3_FORCE_INLINE operator const b3Scalar *() const { return &m_floats[0]; }
  459. B3_FORCE_INLINE bool operator==(const b3Vector3& other) const
  460. {
  461. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  462. return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
  463. #else
  464. return ((m_floats[3]==other.m_floats[3]) &&
  465. (m_floats[2]==other.m_floats[2]) &&
  466. (m_floats[1]==other.m_floats[1]) &&
  467. (m_floats[0]==other.m_floats[0]));
  468. #endif
  469. }
  470. B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const
  471. {
  472. return !(*this == other);
  473. }
  474. /**@brief Set each element to the max of the current values and the values of another b3Vector3
  475. * @param other The other b3Vector3 to compare with
  476. */
  477. B3_FORCE_INLINE void setMax(const b3Vector3& other)
  478. {
  479. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  480. mVec128 = _mm_max_ps(mVec128, other.mVec128);
  481. #elif defined(B3_USE_NEON)
  482. mVec128 = vmaxq_f32(mVec128, other.mVec128);
  483. #else
  484. b3SetMax(m_floats[0], other.m_floats[0]);
  485. b3SetMax(m_floats[1], other.m_floats[1]);
  486. b3SetMax(m_floats[2], other.m_floats[2]);
  487. b3SetMax(m_floats[3], other.m_floats[3]);
  488. #endif
  489. }
  490. /**@brief Set each element to the min of the current values and the values of another b3Vector3
  491. * @param other The other b3Vector3 to compare with
  492. */
  493. B3_FORCE_INLINE void setMin(const b3Vector3& other)
  494. {
  495. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  496. mVec128 = _mm_min_ps(mVec128, other.mVec128);
  497. #elif defined(B3_USE_NEON)
  498. mVec128 = vminq_f32(mVec128, other.mVec128);
  499. #else
  500. b3SetMin(m_floats[0], other.m_floats[0]);
  501. b3SetMin(m_floats[1], other.m_floats[1]);
  502. b3SetMin(m_floats[2], other.m_floats[2]);
  503. b3SetMin(m_floats[3], other.m_floats[3]);
  504. #endif
  505. }
  506. B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
  507. {
  508. m_floats[0]=_x;
  509. m_floats[1]=_y;
  510. m_floats[2]=_z;
  511. m_floats[3] = b3Scalar(0.f);
  512. }
  513. void getSkewSymmetricMatrix(b3Vector3* v0,b3Vector3* v1,b3Vector3* v2) const
  514. {
  515. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  516. __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask);
  517. __m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
  518. __m128 V2 = _mm_movelh_ps(V0, V);
  519. __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
  520. V0 = _mm_shuffle_ps(V0, V, 0xDB);
  521. V2 = _mm_shuffle_ps(V2, V, 0xF9);
  522. v0->mVec128 = V0;
  523. v1->mVec128 = V1;
  524. v2->mVec128 = V2;
  525. #else
  526. v0->setValue(0. ,-getZ() ,getY());
  527. v1->setValue(getZ() ,0. ,-getX());
  528. v2->setValue(-getY() ,getX() ,0.);
  529. #endif
  530. }
  531. void setZero()
  532. {
  533. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  534. mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
  535. #elif defined(B3_USE_NEON)
  536. int32x4_t vi = vdupq_n_s32(0);
  537. mVec128 = vreinterpretq_f32_s32(vi);
  538. #else
  539. setValue(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.));
  540. #endif
  541. }
  542. B3_FORCE_INLINE bool isZero() const
  543. {
  544. return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
  545. }
  546. B3_FORCE_INLINE bool fuzzyZero() const
  547. {
  548. return length2() < B3_EPSILON;
  549. }
  550. B3_FORCE_INLINE void serialize(struct b3Vector3Data& dataOut) const;
  551. B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn);
  552. B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData& dataOut) const;
  553. B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn);
  554. B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData& dataOut) const;
  555. B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn);
  556. /**@brief returns index of maximum dot product between this and vectors in array[]
  557. * @param array The other vectors
  558. * @param array_count The number of other vectors
  559. * @param dotOut The maximum dot product */
  560. B3_FORCE_INLINE long maxDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const;
  561. /**@brief returns index of minimum dot product between this and vectors in array[]
  562. * @param array The other vectors
  563. * @param array_count The number of other vectors
  564. * @param dotOut The minimum dot product */
  565. B3_FORCE_INLINE long minDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const;
  566. /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */
  567. B3_FORCE_INLINE b3Vector3 dot3( const b3Vector3 &v0, const b3Vector3 &v1, const b3Vector3 &v2 ) const
  568. {
  569. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  570. __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
  571. __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
  572. __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
  573. __m128 b0 = _mm_unpacklo_ps( a0, a1 );
  574. __m128 b1 = _mm_unpackhi_ps( a0, a1 );
  575. __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
  576. __m128 r = _mm_movelh_ps( b0, b2 );
  577. r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
  578. a2 = _mm_and_ps( a2, b3vxyzMaskf);
  579. r = _mm_add_ps( r, b3CastdTo128f (_mm_move_sd( b3CastfTo128d(a2), b3CastfTo128d(b1) )));
  580. return b3MakeVector3(r);
  581. #elif defined(B3_USE_NEON)
  582. static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
  583. float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
  584. float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
  585. float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
  586. float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
  587. a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
  588. float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
  589. float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
  590. return b3Vector3( vcombine_f32(b0, b1) );
  591. #else
  592. return b3MakeVector3( dot(v0), dot(v1), dot(v2));
  593. #endif
  594. }
  595. };
  596. /**@brief Return the sum of two vectors (Point symantics)*/
  597. B3_FORCE_INLINE b3Vector3
  598. operator+(const b3Vector3& v1, const b3Vector3& v2)
  599. {
  600. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  601. return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
  602. #elif defined(B3_USE_NEON)
  603. return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
  604. #else
  605. return b3MakeVector3(
  606. v1.m_floats[0] + v2.m_floats[0],
  607. v1.m_floats[1] + v2.m_floats[1],
  608. v1.m_floats[2] + v2.m_floats[2]);
  609. #endif
  610. }
  611. /**@brief Return the elementwise product of two vectors */
  612. B3_FORCE_INLINE b3Vector3
  613. operator*(const b3Vector3& v1, const b3Vector3& v2)
  614. {
  615. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  616. return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
  617. #elif defined(B3_USE_NEON)
  618. return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
  619. #else
  620. return b3MakeVector3(
  621. v1.m_floats[0] * v2.m_floats[0],
  622. v1.m_floats[1] * v2.m_floats[1],
  623. v1.m_floats[2] * v2.m_floats[2]);
  624. #endif
  625. }
  626. /**@brief Return the difference between two vectors */
  627. B3_FORCE_INLINE b3Vector3
  628. operator-(const b3Vector3& v1, const b3Vector3& v2)
  629. {
  630. #if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
  631. // without _mm_and_ps this code causes slowdown in Concave moving
  632. __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
  633. return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
  634. #elif defined(B3_USE_NEON)
  635. float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
  636. return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
  637. #else
  638. return b3MakeVector3(
  639. v1.m_floats[0] - v2.m_floats[0],
  640. v1.m_floats[1] - v2.m_floats[1],
  641. v1.m_floats[2] - v2.m_floats[2]);
  642. #endif
  643. }
  644. /**@brief Return the negative of the vector */
  645. B3_FORCE_INLINE b3Vector3
  646. operator-(const b3Vector3& v)
  647. {
  648. #if (defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
  649. __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
  650. return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
  651. #elif defined(B3_USE_NEON)
  652. return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
  653. #else
  654. return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
  655. #endif
  656. }
  657. /**@brief Return the vector scaled by s */
  658. B3_FORCE_INLINE b3Vector3
  659. operator*(const b3Vector3& v, const b3Scalar& s)
  660. {
  661. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  662. __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
  663. vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0)
  664. return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
  665. #elif defined(B3_USE_NEON)
  666. float32x4_t r = vmulq_n_f32(v.mVec128, s);
  667. return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
  668. #else
  669. return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
  670. #endif
  671. }
  672. /**@brief Return the vector scaled by s */
  673. B3_FORCE_INLINE b3Vector3
  674. operator*(const b3Scalar& s, const b3Vector3& v)
  675. {
  676. return v * s;
  677. }
  678. /**@brief Return the vector inversely scaled by s */
  679. B3_FORCE_INLINE b3Vector3
  680. operator/(const b3Vector3& v, const b3Scalar& s)
  681. {
  682. b3FullAssert(s != b3Scalar(0.0));
  683. #if 0 //defined(B3_USE_SSE_IN_API)
  684. // this code is not faster !
  685. __m128 vs = _mm_load_ss(&s);
  686. vs = _mm_div_ss(b3v1110, vs);
  687. vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
  688. return b3Vector3(_mm_mul_ps(v.mVec128, vs));
  689. #else
  690. return v * (b3Scalar(1.0) / s);
  691. #endif
  692. }
  693. /**@brief Return the vector inversely scaled by s */
  694. B3_FORCE_INLINE b3Vector3
  695. operator/(const b3Vector3& v1, const b3Vector3& v2)
  696. {
  697. #if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE))
  698. __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
  699. vec = _mm_and_ps(vec, b3vFFF0fMask);
  700. return b3MakeVector3(vec);
  701. #elif defined(B3_USE_NEON)
  702. float32x4_t x, y, v, m;
  703. x = v1.mVec128;
  704. y = v2.mVec128;
  705. v = vrecpeq_f32(y); // v ~ 1/y
  706. m = vrecpsq_f32(y, v); // m = (2-v*y)
  707. v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
  708. m = vrecpsq_f32(y, v); // mm = (2-vv*y)
  709. v = vmulq_f32(v, x); // x*vv
  710. v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
  711. return b3Vector3(v);
  712. #else
  713. return b3MakeVector3(
  714. v1.m_floats[0] / v2.m_floats[0],
  715. v1.m_floats[1] / v2.m_floats[1],
  716. v1.m_floats[2] / v2.m_floats[2]);
  717. #endif
  718. }
  719. /**@brief Return the dot product between two vectors */
  720. B3_FORCE_INLINE b3Scalar
  721. b3Dot(const b3Vector3& v1, const b3Vector3& v2)
  722. {
  723. return v1.dot(v2);
  724. }
  725. /**@brief Return the distance squared between two vectors */
  726. B3_FORCE_INLINE b3Scalar
  727. b3Distance2(const b3Vector3& v1, const b3Vector3& v2)
  728. {
  729. return v1.distance2(v2);
  730. }
  731. /**@brief Return the distance between two vectors */
  732. B3_FORCE_INLINE b3Scalar
  733. b3Distance(const b3Vector3& v1, const b3Vector3& v2)
  734. {
  735. return v1.distance(v2);
  736. }
  737. /**@brief Return the angle between two vectors */
  738. B3_FORCE_INLINE b3Scalar
  739. b3Angle(const b3Vector3& v1, const b3Vector3& v2)
  740. {
  741. return v1.angle(v2);
  742. }
  743. /**@brief Return the cross product of two vectors */
  744. B3_FORCE_INLINE b3Vector3
  745. b3Cross(const b3Vector3& v1, const b3Vector3& v2)
  746. {
  747. return v1.cross(v2);
  748. }
  749. B3_FORCE_INLINE b3Scalar
  750. b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
  751. {
  752. return v1.triple(v2, v3);
  753. }
  754. /**@brief Return the linear interpolation between two vectors
  755. * @param v1 One vector
  756. * @param v2 The other vector
  757. * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
  758. B3_FORCE_INLINE b3Vector3
  759. b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
  760. {
  761. return v1.lerp(v2, t);
  762. }
  763. B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
  764. {
  765. return (v - *this).length2();
  766. }
  767. B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
  768. {
  769. return (v - *this).length();
  770. }
  771. B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
  772. {
  773. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  774. b3Vector3 norm = *this;
  775. return norm.normalize();
  776. #else
  777. return *this / length();
  778. #endif
  779. }
  780. B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Scalar _angle ) const
  781. {
  782. // wAxis must be a unit lenght vector
  783. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  784. __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
  785. b3Scalar ssin = b3Sin( _angle );
  786. __m128 C = wAxis.cross( b3MakeVector3(mVec128) ).mVec128;
  787. O = _mm_and_ps(O, b3vFFF0fMask);
  788. b3Scalar scos = b3Cos( _angle );
  789. __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
  790. __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
  791. __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0)
  792. __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0)
  793. O = _mm_add_ps(O, Y);
  794. vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0)
  795. O = _mm_add_ps(O, Z);
  796. vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0)
  797. vsin = vsin * C;
  798. O = O * wAxis.mVec128;
  799. __m128 X = mVec128 - O;
  800. O = O + vsin;
  801. vcos = vcos * X;
  802. O = O + vcos;
  803. return b3MakeVector3(O);
  804. #else
  805. b3Vector3 o = wAxis * wAxis.dot( *this );
  806. b3Vector3 _x = *this - o;
  807. b3Vector3 _y;
  808. _y = wAxis.cross( *this );
  809. return ( o + _x * b3Cos( _angle ) + _y * b3Sin( _angle ) );
  810. #endif
  811. }
  812. B3_FORCE_INLINE long b3Vector3::maxDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const
  813. {
  814. #if defined (B3_USE_SSE) || defined (B3_USE_NEON)
  815. #if defined _WIN32 || defined (B3_USE_SSE)
  816. const long scalar_cutoff = 10;
  817. long b3_maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
  818. #elif defined B3_USE_NEON
  819. const long scalar_cutoff = 4;
  820. extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
  821. #endif
  822. if( array_count < scalar_cutoff )
  823. #else
  824. #endif//B3_USE_SSE || B3_USE_NEON
  825. {
  826. b3Scalar maxDot = -B3_INFINITY;
  827. int i = 0;
  828. int ptIndex = -1;
  829. for( i = 0; i < array_count; i++ )
  830. {
  831. b3Scalar dot = array[i].dot(*this);
  832. if( dot > maxDot )
  833. {
  834. maxDot = dot;
  835. ptIndex = i;
  836. }
  837. }
  838. b3Assert(ptIndex>=0);
  839. if (ptIndex<0)
  840. {
  841. ptIndex = 0;
  842. }
  843. dotOut = maxDot;
  844. return ptIndex;
  845. }
  846. #if defined (B3_USE_SSE) || defined (B3_USE_NEON)
  847. return b3_maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
  848. #endif
  849. }
  850. B3_FORCE_INLINE long b3Vector3::minDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const
  851. {
  852. #if defined (B3_USE_SSE) || defined (B3_USE_NEON)
  853. #if defined B3_USE_SSE
  854. const long scalar_cutoff = 10;
  855. long b3_mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
  856. #elif defined B3_USE_NEON
  857. const long scalar_cutoff = 4;
  858. extern long (*b3_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
  859. #else
  860. #error unhandled arch!
  861. #endif
  862. if( array_count < scalar_cutoff )
  863. #endif//B3_USE_SSE || B3_USE_NEON
  864. {
  865. b3Scalar minDot = B3_INFINITY;
  866. int i = 0;
  867. int ptIndex = -1;
  868. for( i = 0; i < array_count; i++ )
  869. {
  870. b3Scalar dot = array[i].dot(*this);
  871. if( dot < minDot )
  872. {
  873. minDot = dot;
  874. ptIndex = i;
  875. }
  876. }
  877. dotOut = minDot;
  878. return ptIndex;
  879. }
  880. #if defined (B3_USE_SSE) || defined (B3_USE_NEON)
  881. return b3_mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
  882. #endif
  883. }
  884. class b3Vector4 : public b3Vector3
  885. {
  886. public:
  887. B3_FORCE_INLINE b3Vector4 absolute4() const
  888. {
  889. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  890. return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
  891. #elif defined(B3_USE_NEON)
  892. return b3Vector4(vabsq_f32(mVec128));
  893. #else
  894. return b3MakeVector4(
  895. b3Fabs(m_floats[0]),
  896. b3Fabs(m_floats[1]),
  897. b3Fabs(m_floats[2]),
  898. b3Fabs(m_floats[3]));
  899. #endif
  900. }
  901. b3Scalar getW() const { return m_floats[3];}
  902. B3_FORCE_INLINE int maxAxis4() const
  903. {
  904. int maxIndex = -1;
  905. b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
  906. if (m_floats[0] > maxVal)
  907. {
  908. maxIndex = 0;
  909. maxVal = m_floats[0];
  910. }
  911. if (m_floats[1] > maxVal)
  912. {
  913. maxIndex = 1;
  914. maxVal = m_floats[1];
  915. }
  916. if (m_floats[2] > maxVal)
  917. {
  918. maxIndex = 2;
  919. maxVal =m_floats[2];
  920. }
  921. if (m_floats[3] > maxVal)
  922. {
  923. maxIndex = 3;
  924. maxVal = m_floats[3];
  925. }
  926. return maxIndex;
  927. }
  928. B3_FORCE_INLINE int minAxis4() const
  929. {
  930. int minIndex = -1;
  931. b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
  932. if (m_floats[0] < minVal)
  933. {
  934. minIndex = 0;
  935. minVal = m_floats[0];
  936. }
  937. if (m_floats[1] < minVal)
  938. {
  939. minIndex = 1;
  940. minVal = m_floats[1];
  941. }
  942. if (m_floats[2] < minVal)
  943. {
  944. minIndex = 2;
  945. minVal =m_floats[2];
  946. }
  947. if (m_floats[3] < minVal)
  948. {
  949. minIndex = 3;
  950. minVal = m_floats[3];
  951. }
  952. return minIndex;
  953. }
  954. B3_FORCE_INLINE int closestAxis4() const
  955. {
  956. return absolute4().maxAxis4();
  957. }
  958. /**@brief Set x,y,z and zero w
  959. * @param x Value of x
  960. * @param y Value of y
  961. * @param z Value of z
  962. */
  963. /* void getValue(b3Scalar *m) const
  964. {
  965. m[0] = m_floats[0];
  966. m[1] = m_floats[1];
  967. m[2] =m_floats[2];
  968. }
  969. */
  970. /**@brief Set the values
  971. * @param x Value of x
  972. * @param y Value of y
  973. * @param z Value of z
  974. * @param w Value of w
  975. */
  976. B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w)
  977. {
  978. m_floats[0]=_x;
  979. m_floats[1]=_y;
  980. m_floats[2]=_z;
  981. m_floats[3]=_w;
  982. }
  983. };
  984. ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
  985. B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
  986. {
  987. #ifdef B3_USE_DOUBLE_PRECISION
  988. unsigned char* dest = (unsigned char*) &destVal;
  989. unsigned char* src = (unsigned char*) &sourceVal;
  990. dest[0] = src[7];
  991. dest[1] = src[6];
  992. dest[2] = src[5];
  993. dest[3] = src[4];
  994. dest[4] = src[3];
  995. dest[5] = src[2];
  996. dest[6] = src[1];
  997. dest[7] = src[0];
  998. #else
  999. unsigned char* dest = (unsigned char*) &destVal;
  1000. unsigned char* src = (unsigned char*) &sourceVal;
  1001. dest[0] = src[3];
  1002. dest[1] = src[2];
  1003. dest[2] = src[1];
  1004. dest[3] = src[0];
  1005. #endif //B3_USE_DOUBLE_PRECISION
  1006. }
  1007. ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
  1008. B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
  1009. {
  1010. for (int i=0;i<4;i++)
  1011. {
  1012. b3SwapScalarEndian(sourceVec[i],destVec[i]);
  1013. }
  1014. }
  1015. ///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
  1016. B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector)
  1017. {
  1018. b3Vector3 swappedVec;
  1019. for (int i=0;i<4;i++)
  1020. {
  1021. b3SwapScalarEndian(vector[i],swappedVec[i]);
  1022. }
  1023. vector = swappedVec;
  1024. }
  1025. template <class T>
  1026. B3_FORCE_INLINE void b3PlaneSpace1 (const T& n, T& p, T& q)
  1027. {
  1028. if (b3Fabs(n[2]) > B3_SQRT12) {
  1029. // choose p in y-z plane
  1030. b3Scalar a = n[1]*n[1] + n[2]*n[2];
  1031. b3Scalar k = b3RecipSqrt (a);
  1032. p[0] = 0;
  1033. p[1] = -n[2]*k;
  1034. p[2] = n[1]*k;
  1035. // set q = n x p
  1036. q[0] = a*k;
  1037. q[1] = -n[0]*p[2];
  1038. q[2] = n[0]*p[1];
  1039. }
  1040. else {
  1041. // choose p in x-y plane
  1042. b3Scalar a = n[0]*n[0] + n[1]*n[1];
  1043. b3Scalar k = b3RecipSqrt (a);
  1044. p[0] = -n[1]*k;
  1045. p[1] = n[0]*k;
  1046. p[2] = 0;
  1047. // set q = n x p
  1048. q[0] = -n[2]*p[1];
  1049. q[1] = n[2]*p[0];
  1050. q[2] = a*k;
  1051. }
  1052. }
  1053. struct b3Vector3FloatData
  1054. {
  1055. float m_floats[4];
  1056. };
  1057. struct b3Vector3DoubleData
  1058. {
  1059. double m_floats[4];
  1060. };
  1061. B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const
  1062. {
  1063. ///could also do a memcpy, check if it is worth it
  1064. for (int i=0;i<4;i++)
  1065. dataOut.m_floats[i] = float(m_floats[i]);
  1066. }
  1067. B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn)
  1068. {
  1069. for (int i=0;i<4;i++)
  1070. m_floats[i] = b3Scalar(dataIn.m_floats[i]);
  1071. }
  1072. B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const
  1073. {
  1074. ///could also do a memcpy, check if it is worth it
  1075. for (int i=0;i<4;i++)
  1076. dataOut.m_floats[i] = double(m_floats[i]);
  1077. }
  1078. B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn)
  1079. {
  1080. for (int i=0;i<4;i++)
  1081. m_floats[i] = b3Scalar(dataIn.m_floats[i]);
  1082. }
  1083. B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const
  1084. {
  1085. ///could also do a memcpy, check if it is worth it
  1086. for (int i=0;i<4;i++)
  1087. dataOut.m_floats[i] = m_floats[i];
  1088. }
  1089. B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn)
  1090. {
  1091. for (int i=0;i<4;i++)
  1092. m_floats[i] = dataIn.m_floats[i];
  1093. }
  1094. inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z)
  1095. {
  1096. b3Vector3 tmp;
  1097. tmp.setValue(x,y,z);
  1098. return tmp;
  1099. }
  1100. inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w)
  1101. {
  1102. b3Vector3 tmp;
  1103. tmp.setValue(x,y,z);
  1104. tmp.w = w;
  1105. return tmp;
  1106. }
  1107. inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w)
  1108. {
  1109. b3Vector4 tmp;
  1110. tmp.setValue(x,y,z,w);
  1111. return tmp;
  1112. }
  1113. #if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
  1114. inline b3Vector3 b3MakeVector3( b3SimdFloat4 v)
  1115. {
  1116. b3Vector3 tmp;
  1117. tmp.set128(v);
  1118. return tmp;
  1119. }
  1120. inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
  1121. {
  1122. b3Vector4 tmp;
  1123. tmp.set128(vec);
  1124. return tmp;
  1125. }
  1126. #endif
  1127. #endif //B3_VECTOR3_H