mMath_C.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) 2012 GarageGames, LLC
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to
  6. // deal in the Software without restriction, including without limitation the
  7. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. // sell copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. // IN THE SOFTWARE.
  21. //-----------------------------------------------------------------------------
  22. #include "platform/platform.h"
  23. #include "math/mMath.h"
  24. #include "math/util/frustum.h"
  25. #include <math.h> // Caution!!! Possible platform specific include
  26. //------------------------------------------------------------------------------
  27. // C version of Math Library
  28. // asm externals
  29. extern "C" {
  30. S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c );
  31. U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c );
  32. F32 m_fmod_ASM(F32 val, F32 modulo);
  33. F32 m_fmodD_ASM(F64 val, F64 modulo);
  34. void m_sincos_ASM( F32 angle, F32 *s, F32 *c );
  35. void m_sincosD_ASM( F64 angle, F64 *s, F64 *c );
  36. }
  37. //--------------------------------------
  38. static S32 m_mulDivS32_C(S32 a, S32 b, S32 c)
  39. {
  40. // S64/U64 support in most 32-bit compilers generate
  41. // horrible code, the C version are here just for porting
  42. // assembly implementation is recommended
  43. return (S32) ((S64)a*(S64)b) / (S64)c;
  44. }
  45. static U32 m_mulDivU32_C(S32 a, S32 b, U32 c)
  46. {
  47. return (U32) ((S64)a*(S64)b) / (U64)c;
  48. }
  49. //--------------------------------------
  50. static F32 m_catmullrom_C(F32 t, F32 p0, F32 p1, F32 p2, F32 p3)
  51. {
  52. return 0.5f * ((3.0f*p1 - 3.0f*p2 + p3 - p0)*t*t*t
  53. + (2.0f*p0 - 5.0f*p1 + 4.0f*p2 - p3)*t*t
  54. + (p2-p0)*t
  55. + 2.0f*p1);
  56. }
  57. //--------------------------------------
  58. static void m_sincos_C( F32 angle, F32 *s, F32 *c )
  59. {
  60. *s = mSin( angle );
  61. *c = mCos( angle );
  62. }
  63. static void m_sincosD_C( F64 angle, F64 *s, F64 *c )
  64. {
  65. *s = mSin( angle );
  66. *c = mCos( angle );
  67. }
  68. //--------------------------------------
  69. static void m_point2F_normalize_C(F32 *p)
  70. {
  71. F32 factor = 1.0f / mSqrt(p[0]*p[0] + p[1]*p[1] );
  72. p[0] *= factor;
  73. p[1] *= factor;
  74. }
  75. //--------------------------------------
  76. static void m_point2F_normalize_f_C(F32 *p, F32 val)
  77. {
  78. F32 factor = val / mSqrt(p[0]*p[0] + p[1]*p[1] );
  79. p[0] *= factor;
  80. p[1] *= factor;
  81. }
  82. //--------------------------------------
  83. static void m_point2D_normalize_C(F64 *p)
  84. {
  85. F64 factor = 1.0f / mSqrtD(p[0]*p[0] + p[1]*p[1] );
  86. p[0] *= factor;
  87. p[1] *= factor;
  88. }
  89. //--------------------------------------
  90. static void m_point2D_normalize_f_C(F64 *p, F64 val)
  91. {
  92. F64 factor = val / mSqrtD(p[0]*p[0] + p[1]*p[1] );
  93. p[0] *= factor;
  94. p[1] *= factor;
  95. }
  96. //--------------------------------------
  97. static void m_point3D_normalize_f_C(F64 *p, F64 val)
  98. {
  99. F64 factor = val / mSqrtD(p[0]*p[0] + p[1]*p[1] + p[2]*p[2]);
  100. p[0] *= factor;
  101. p[1] *= factor;
  102. p[2] *= factor;
  103. }
  104. //--------------------------------------
  105. static void m_point3F_normalize_C(F32 *p)
  106. {
  107. F32 squared = p[0]*p[0] + p[1]*p[1] + p[2]*p[2];
  108. // This can happen in Container::castRay -> ForceFieldBare::castRay
  109. //AssertFatal(squared != 0.0, "Error, zero length vector normalized!");
  110. if (squared != 0.0f) {
  111. F32 factor = 1.0f / mSqrt(squared);
  112. p[0] *= factor;
  113. p[1] *= factor;
  114. p[2] *= factor;
  115. } else {
  116. p[0] = 0.0f;
  117. p[1] = 0.0f;
  118. p[2] = 1.0f;
  119. }
  120. }
  121. //--------------------------------------
  122. static void m_point3F_normalize_f_C(F32 *p, F32 val)
  123. {
  124. F32 factor = val / mSqrt(p[0]*p[0] + p[1]*p[1] + p[2]*p[2] );
  125. p[0] *= factor;
  126. p[1] *= factor;
  127. p[2] *= factor;
  128. }
  129. //--------------------------------------
  130. static void m_point3F_interpolate_C(const F32 *from, const F32 *to, F32 factor, F32 *result )
  131. {
  132. #ifdef TORQUE_COMPILER_GCC
  133. // remove possibility of aliases
  134. const F32 inverse = 1.0f - factor;
  135. const F32 from0 = from[0], from1 = from[1], from2 = from[2];
  136. const F32 to0 = to[0], to1 = to[1], to2 = to[2];
  137. result[0] = from0 * inverse + to0 * factor;
  138. result[1] = from1 * inverse + to1 * factor;
  139. result[2] = from2 * inverse + to2 * factor;
  140. #else
  141. F32 inverse = 1.0f - factor;
  142. result[0] = from[0] * inverse + to[0] * factor;
  143. result[1] = from[1] * inverse + to[1] * factor;
  144. result[2] = from[2] * inverse + to[2] * factor;
  145. #endif
  146. }
  147. //--------------------------------------
  148. static void m_point3D_normalize_C(F64 *p)
  149. {
  150. F64 factor = 1.0f / mSqrtD(p[0]*p[0] + p[1]*p[1] + p[2]*p[2] );
  151. p[0] *= factor;
  152. p[1] *= factor;
  153. p[2] *= factor;
  154. }
  155. //--------------------------------------
  156. static void m_point3D_interpolate_C(const F64 *from, const F64 *to, F64 factor, F64 *result )
  157. {
  158. #ifdef TORQUE_COMPILER_GCC
  159. // remove possibility of aliases
  160. const F64 inverse = 1.0f - factor;
  161. const F64 from0 = from[0], from1 = from[1], from2 = from[2];
  162. const F64 to0 = to[0], to1 = to[1], to2 = to[2];
  163. result[0] = from0 * inverse + to0 * factor;
  164. result[1] = from1 * inverse + to1 * factor;
  165. result[2] = from2 * inverse + to2 * factor;
  166. #else
  167. F64 inverse = 1.0f - factor;
  168. result[0] = from[0] * inverse + to[0] * factor;
  169. result[1] = from[1] * inverse + to[1] * factor;
  170. result[2] = from[2] * inverse + to[2] * factor;
  171. #endif
  172. }
  173. static void m_quatF_set_matF_C( F32 x, F32 y, F32 z, F32 w, F32* m )
  174. {
  175. #define qidx(r,c) (r*4 + c)
  176. F32 xs = x * 2.0f;
  177. F32 ys = y * 2.0f;
  178. F32 zs = z * 2.0f;
  179. F32 wx = w * xs;
  180. F32 wy = w * ys;
  181. F32 wz = w * zs;
  182. F32 xx = x * xs;
  183. F32 xy = x * ys;
  184. F32 xz = x * zs;
  185. F32 yy = y * ys;
  186. F32 yz = y * zs;
  187. F32 zz = z * zs;
  188. m[qidx(0,0)] = 1.0f - (yy + zz);
  189. m[qidx(1,0)] = xy - wz;
  190. m[qidx(2,0)] = xz + wy;
  191. m[qidx(3,0)] = 0.0f;
  192. m[qidx(0,1)] = xy + wz;
  193. m[qidx(1,1)] = 1.0f - (xx + zz);
  194. m[qidx(2,1)] = yz - wx;
  195. m[qidx(3,1)] = 0.0f;
  196. m[qidx(0,2)] = xz - wy;
  197. m[qidx(1,2)] = yz + wx;
  198. m[qidx(2,2)] = 1.0f - (xx + yy);
  199. m[qidx(3,2)] = 0.0f;
  200. m[qidx(0,3)] = 0.0f;
  201. m[qidx(1,3)] = 0.0f;
  202. m[qidx(2,3)] = 0.0f;
  203. m[qidx(3,3)] = 1.0f;
  204. #undef qidx
  205. }
  206. //--------------------------------------
  207. static void m_matF_set_euler_C(const F32 *e, F32 *result)
  208. {
  209. enum {
  210. AXIS_X = (1<<0),
  211. AXIS_Y = (1<<1),
  212. AXIS_Z = (1<<2)
  213. };
  214. U32 axis = 0;
  215. if (e[0] != 0.0f) axis |= AXIS_X;
  216. if (e[1] != 0.0f) axis |= AXIS_Y;
  217. if (e[2] != 0.0f) axis |= AXIS_Z;
  218. switch (axis)
  219. {
  220. case 0:
  221. m_matF_identity(result);
  222. break;
  223. case AXIS_X:
  224. {
  225. F32 cx,sx;
  226. mSinCos( e[0], sx, cx );
  227. result[0] = 1.0f;
  228. result[1] = 0.0f;
  229. result[2] = 0.0f;
  230. result[3] = 0.0f;
  231. result[4] = 0.0f;
  232. result[5] = cx;
  233. result[6] = sx;
  234. result[7] = 0.0f;
  235. result[8] = 0.0f;
  236. result[9] = -sx;
  237. result[10]= cx;
  238. result[11]= 0.0f;
  239. result[12]= 0.0f;
  240. result[13]= 0.0f;
  241. result[14]= 0.0f;
  242. result[15]= 1.0f;
  243. break;
  244. }
  245. case AXIS_Y:
  246. {
  247. F32 cy,sy;
  248. mSinCos( e[1], sy, cy );
  249. result[0] = cy;
  250. result[1] = 0.0f;
  251. result[2] = -sy;
  252. result[3] = 0.0f;
  253. result[4] = 0.0f;
  254. result[5] = 1.0f;
  255. result[6] = 0.0f;
  256. result[7] = 0.0f;
  257. result[8] = sy;
  258. result[9] = 0.0f;
  259. result[10]= cy;
  260. result[11]= 0.0f;
  261. result[12]= 0.0f;
  262. result[13]= 0.0f;
  263. result[14]= 0.0f;
  264. result[15]= 1.0f;
  265. break;
  266. }
  267. case AXIS_Z:
  268. {
  269. // the matrix looks like this:
  270. // r1 - (r4 * sin(x)) r2 + (r3 * sin(x)) -cos(x) * sin(y)
  271. // -cos(x) * sin(z) cos(x) * cos(z) sin(x)
  272. // r3 + (r2 * sin(x)) r4 - (r1 * sin(x)) cos(x) * cos(y)
  273. //
  274. // where:
  275. // r1 = cos(y) * cos(z)
  276. // r2 = cos(y) * sin(z)
  277. // r3 = sin(y) * cos(z)
  278. // r4 = sin(y) * sin(z)
  279. F32 cz,sz;
  280. mSinCos( e[2], sz, cz );
  281. result[0] = cz;
  282. result[1] = sz;
  283. result[2] = 0.0f;
  284. result[3] = 0.0f;
  285. result[4] = -sz;
  286. result[5] = cz;
  287. result[6] = 0.0f;
  288. result[7] = 0.0f;
  289. result[8] = 0.0f;
  290. result[9] = 0.0f;
  291. result[10]= 1.0f;
  292. result[11]= 0.0f;
  293. result[12]= 0.0f;
  294. result[13]= 0.0f;
  295. result[14]= 0.0f;
  296. result[15]= 1.0f;
  297. break;
  298. }
  299. default:
  300. // the matrix looks like this:
  301. // r1 - (r4 * sin(x)) r2 + (r3 * sin(x)) -cos(x) * sin(y)
  302. // -cos(x) * sin(z) cos(x) * cos(z) sin(x)
  303. // r3 + (r2 * sin(x)) r4 - (r1 * sin(x)) cos(x) * cos(y)
  304. //
  305. // where:
  306. // r1 = cos(y) * cos(z)
  307. // r2 = cos(y) * sin(z)
  308. // r3 = sin(y) * cos(z)
  309. // r4 = sin(y) * sin(z)
  310. F32 cx,sx;
  311. mSinCos( e[0], sx, cx );
  312. F32 cy,sy;
  313. mSinCos( e[1], sy, cy );
  314. F32 cz,sz;
  315. mSinCos( e[2], sz, cz );
  316. F32 r1 = cy * cz;
  317. F32 r2 = cy * sz;
  318. F32 r3 = sy * cz;
  319. F32 r4 = sy * sz;
  320. result[0] = r1 - (r4 * sx);
  321. result[1] = r2 + (r3 * sx);
  322. result[2] = -cx * sy;
  323. result[3] = 0.0f;
  324. result[4] = -cx * sz;
  325. result[5] = cx * cz;
  326. result[6] = sx;
  327. result[7] = 0.0f;
  328. result[8] = r3 + (r2 * sx);
  329. result[9] = r4 - (r1 * sx);
  330. result[10]= cx * cy;
  331. result[11]= 0.0f;
  332. result[12]= 0.0f;
  333. result[13]= 0.0f;
  334. result[14]= 0.0f;
  335. result[15]= 1.0f;
  336. break;
  337. }
  338. }
  339. //--------------------------------------
  340. static void m_matF_set_euler_point_C(const F32 *e, const F32 *p, F32 *result)
  341. {
  342. m_matF_set_euler(e, result);
  343. result[3] = p[0];
  344. result[7] = p[1];
  345. result[11]= p[2];
  346. }
  347. //--------------------------------------
  348. static void m_matF_identity_C(F32 *m)
  349. {
  350. *m++ = 1.0f;
  351. *m++ = 0.0f;
  352. *m++ = 0.0f;
  353. *m++ = 0.0f;
  354. *m++ = 0.0f;
  355. *m++ = 1.0f;
  356. *m++ = 0.0f;
  357. *m++ = 0.0f;
  358. *m++ = 0.0f;
  359. *m++ = 0.0f;
  360. *m++ = 1.0f;
  361. *m++ = 0.0f;
  362. *m++ = 0.0f;
  363. *m++ = 0.0f;
  364. *m++ = 0.0f;
  365. *m = 1.0f;
  366. }
  367. #if 0
  368. // Compile this out till we hook it up. It's a more efficient matrix
  369. // inverse than what we have (it uses intermediate results of determinant
  370. // to same about 1/4 of the operations.
  371. static void affineInvertTo(const F32 * m, F32 * out)
  372. {
  373. #define idx(r,c) (r*4 + c)
  374. F32 d1 = m[idx(2,2)] * m[idx(1,1)] - m[idx(2,1)] * m[idx(1,2)];
  375. F32 d2 = m[idx(2,0)] * m[idx(1,2)] - m[idx(2,2)] * m[idx(1,0)];
  376. F32 d3 = m[idx(2,1)] * m[idx(1,0)] - m[idx(2,0)] * m[idx(1,1)];
  377. F32 invDet = 1.0f / (m[idx(0,0)] * d1 + m[idx(0,1)] * d2 + m[idx(0,2)] * d3);
  378. F32 m00 = m[idx(0,0)] * invDet;
  379. F32 m01 = m[idx(0,1)] * invDet;
  380. F32 m02 = m[idx(0,2)] * invDet;
  381. F32 * result = out;
  382. *out++ = d1 * invDet;
  383. *out++ = m02 * m[idx(2,1)] - m01 * m[idx(2,2)];
  384. *out++ = m01 * m[idx(1,2)] - m02 * m[idx(1,1)];
  385. *out++ = 0.0f;
  386. *out++ = d2 * invDet;
  387. *out++ = m00 * m[idx(2,2)] - m02 * m[idx(2,0)];
  388. *out++ = m02 * m[idx(1,0)] - m00 * m[idx(1,2)];
  389. *out++ = 0.0f;
  390. *out++ = d3 * invDet;
  391. *out++ = m01 * m[idx(2,0)] - m00 * m[idx(2,1)];
  392. *out++ = m00 * m[idx(1,1)] - m01 * m[idx(1,0)];
  393. *out++ = 0.0f;
  394. *out++ = -result[idx(0,0)] * m[idx(0,3)] - result[idx(0,1)] * m[idx(1,3)] - result[idx(0,2)] * m[idx(2,3)];
  395. *out++ = -result[idx(1,0)] * m[idx(0,3)] - result[idx(1,1)] * m[idx(1,3)] - result[idx(1,2)] * m[idx(2,3)];
  396. *out++ = -result[idx(2,0)] * m[idx(0,3)] - result[idx(2,1)] * m[idx(1,3)] - result[idx(2,2)] * m[idx(2,3)];
  397. *out++ = 1.0f;
  398. #undef idx
  399. }
  400. #endif
  401. //--------------------------------------
  402. static void m_matF_inverse_C(F32 *m)
  403. {
  404. // using Cramers Rule find the Inverse
  405. // Minv = (1/det(M)) * adjoint(M)
  406. F32 det = m_matF_determinant( m );
  407. AssertFatal( det != 0.0f, "MatrixF::inverse: non-singular matrix, no inverse.");
  408. F32 invDet = 1.0f/det;
  409. F32 temp[16];
  410. temp[0] = (m[5] * m[10]- m[6] * m[9]) * invDet;
  411. temp[1] = (m[9] * m[2] - m[10]* m[1]) * invDet;
  412. temp[2] = (m[1] * m[6] - m[2] * m[5]) * invDet;
  413. temp[4] = (m[6] * m[8] - m[4] * m[10])* invDet;
  414. temp[5] = (m[10]* m[0] - m[8] * m[2]) * invDet;
  415. temp[6] = (m[2] * m[4] - m[0] * m[6]) * invDet;
  416. temp[8] = (m[4] * m[9] - m[5] * m[8]) * invDet;
  417. temp[9] = (m[8] * m[1] - m[9] * m[0]) * invDet;
  418. temp[10]= (m[0] * m[5] - m[1] * m[4]) * invDet;
  419. m[0] = temp[0];
  420. m[1] = temp[1];
  421. m[2] = temp[2];
  422. m[4] = temp[4];
  423. m[5] = temp[5];
  424. m[6] = temp[6];
  425. m[8] = temp[8];
  426. m[9] = temp[9];
  427. m[10] = temp[10];
  428. // invert the translation
  429. temp[0] = -m[3];
  430. temp[1] = -m[7];
  431. temp[2] = -m[11];
  432. m_matF_x_vectorF(m, temp, &temp[4]);
  433. m[3] = temp[4];
  434. m[7] = temp[5];
  435. m[11]= temp[6];
  436. }
  437. static void m_matF_invert_to_C(const F32 *m, F32 *d)
  438. {
  439. // using Cramers Rule find the Inverse
  440. // Minv = (1/det(M)) * adjoint(M)
  441. F32 det = m_matF_determinant( m );
  442. AssertFatal( det != 0.0f, "MatrixF::inverse: non-singular matrix, no inverse.");
  443. F32 invDet = 1.0f/det;
  444. d[0] = (m[5] * m[10]- m[6] * m[9]) * invDet;
  445. d[1] = (m[9] * m[2] - m[10]* m[1]) * invDet;
  446. d[2] = (m[1] * m[6] - m[2] * m[5]) * invDet;
  447. d[4] = (m[6] * m[8] - m[4] * m[10])* invDet;
  448. d[5] = (m[10]* m[0] - m[8] * m[2]) * invDet;
  449. d[6] = (m[2] * m[4] - m[0] * m[6]) * invDet;
  450. d[8] = (m[4] * m[9] - m[5] * m[8]) * invDet;
  451. d[9] = (m[8] * m[1] - m[9] * m[0]) * invDet;
  452. d[10]= (m[0] * m[5] - m[1] * m[4]) * invDet;
  453. // invert the translation
  454. F32 temp[6];
  455. temp[0] = -m[3];
  456. temp[1] = -m[7];
  457. temp[2] = -m[11];
  458. m_matF_x_vectorF(d, temp, &temp[3]);
  459. d[3] = temp[3];
  460. d[7] = temp[4];
  461. d[11]= temp[5];
  462. d[ 12 ] = m[ 12 ];
  463. d[ 13 ] = m[ 13 ];
  464. d[ 14 ] = m[ 14 ];
  465. d[ 15 ] = m[ 15 ];
  466. }
  467. //--------------------------------------
  468. static void m_matF_affineInverse_C(F32 *m)
  469. {
  470. // Matrix class checks to make sure this is an affine transform before calling
  471. // this function, so we can proceed assuming it is...
  472. F32 temp[16];
  473. dMemcpy(temp, m, 16 * sizeof(F32));
  474. // Transpose rotation
  475. m[1] = temp[4];
  476. m[4] = temp[1];
  477. m[2] = temp[8];
  478. m[8] = temp[2];
  479. m[6] = temp[9];
  480. m[9] = temp[6];
  481. m[3] = -(temp[0]*temp[3] + temp[4]*temp[7] + temp[8]*temp[11]);
  482. m[7] = -(temp[1]*temp[3] + temp[5]*temp[7] + temp[9]*temp[11]);
  483. m[11] = -(temp[2]*temp[3] + temp[6]*temp[7] + temp[10]*temp[11]);
  484. }
  485. inline void swap(F32 &a, F32 &b)
  486. {
  487. F32 temp = a;
  488. a = b;
  489. b = temp;
  490. }
  491. //--------------------------------------
  492. static void m_matF_transpose_C(F32 *m)
  493. {
  494. swap(m[1], m[4]);
  495. swap(m[2], m[8]);
  496. swap(m[3], m[12]);
  497. swap(m[6], m[9]);
  498. swap(m[7], m[13]);
  499. swap(m[11],m[14]);
  500. }
  501. //--------------------------------------
  502. static void m_matF_scale_C(F32 *m,const F32 *p)
  503. {
  504. // Note, doesn't allow scaling w...
  505. m[0] *= p[0]; m[1] *= p[1]; m[2] *= p[2];
  506. m[4] *= p[0]; m[5] *= p[1]; m[6] *= p[2];
  507. m[8] *= p[0]; m[9] *= p[1]; m[10] *= p[2];
  508. m[12] *= p[0]; m[13] *= p[1]; m[14] *= p[2];
  509. }
  510. //--------------------------------------
  511. static void m_matF_normalize_C(F32 *m)
  512. {
  513. F32 col0[3], col1[3], col2[3];
  514. // extract columns 0 and 1
  515. col0[0] = m[0];
  516. col0[1] = m[4];
  517. col0[2] = m[8];
  518. col1[0] = m[1];
  519. col1[1] = m[5];
  520. col1[2] = m[9];
  521. // assure their relationships to one another
  522. mCross(*(Point3F*)col0, *(Point3F*)col1, (Point3F*)col2);
  523. mCross(*(Point3F*)col2, *(Point3F*)col0, (Point3F*)col1);
  524. // assure their length is 1.0f
  525. m_point3F_normalize( col0 );
  526. m_point3F_normalize( col1 );
  527. m_point3F_normalize( col2 );
  528. // store the normalized columns
  529. m[0] = col0[0];
  530. m[4] = col0[1];
  531. m[8] = col0[2];
  532. m[1] = col1[0];
  533. m[5] = col1[1];
  534. m[9] = col1[2];
  535. m[2] = col2[0];
  536. m[6] = col2[1];
  537. m[10]= col2[2];
  538. }
  539. //--------------------------------------
  540. static F32 m_matF_determinant_C(const F32 *m)
  541. {
  542. return m[0] * (m[5] * m[10] - m[6] * m[9]) +
  543. m[4] * (m[2] * m[9] - m[1] * m[10]) +
  544. m[8] * (m[1] * m[6] - m[2] * m[5]) ;
  545. }
  546. //--------------------------------------
  547. // Removed static in order to write benchmarking code (that compares against
  548. // specialized SSE/AMD versions) elsewhere.
  549. void default_matF_x_matF_C(const F32 *a, const F32 *b, F32 *mresult)
  550. {
  551. mresult[0] = a[0]*b[0] + a[1]*b[4] + a[2]*b[8] + a[3]*b[12];
  552. mresult[1] = a[0]*b[1] + a[1]*b[5] + a[2]*b[9] + a[3]*b[13];
  553. mresult[2] = a[0]*b[2] + a[1]*b[6] + a[2]*b[10] + a[3]*b[14];
  554. mresult[3] = a[0]*b[3] + a[1]*b[7] + a[2]*b[11] + a[3]*b[15];
  555. mresult[4] = a[4]*b[0] + a[5]*b[4] + a[6]*b[8] + a[7]*b[12];
  556. mresult[5] = a[4]*b[1] + a[5]*b[5] + a[6]*b[9] + a[7]*b[13];
  557. mresult[6] = a[4]*b[2] + a[5]*b[6] + a[6]*b[10] + a[7]*b[14];
  558. mresult[7] = a[4]*b[3] + a[5]*b[7] + a[6]*b[11] + a[7]*b[15];
  559. mresult[8] = a[8]*b[0] + a[9]*b[4] + a[10]*b[8] + a[11]*b[12];
  560. mresult[9] = a[8]*b[1] + a[9]*b[5] + a[10]*b[9] + a[11]*b[13];
  561. mresult[10]= a[8]*b[2] + a[9]*b[6] + a[10]*b[10]+ a[11]*b[14];
  562. mresult[11]= a[8]*b[3] + a[9]*b[7] + a[10]*b[11]+ a[11]*b[15];
  563. mresult[12]= a[12]*b[0]+ a[13]*b[4]+ a[14]*b[8] + a[15]*b[12];
  564. mresult[13]= a[12]*b[1]+ a[13]*b[5]+ a[14]*b[9] + a[15]*b[13];
  565. mresult[14]= a[12]*b[2]+ a[13]*b[6]+ a[14]*b[10]+ a[15]*b[14];
  566. mresult[15]= a[12]*b[3]+ a[13]*b[7]+ a[14]*b[11]+ a[15]*b[15];
  567. }
  568. // //--------------------------------------
  569. // static void m_matF_x_point3F_C(const F32 *m, const F32 *p, F32 *presult)
  570. // {
  571. // AssertFatal(p != presult, "Error, aliasing matrix mul pointers not allowed here!");
  572. // presult[0] = m[0]*p[0] + m[1]*p[1] + m[2]*p[2] + m[3];
  573. // presult[1] = m[4]*p[0] + m[5]*p[1] + m[6]*p[2] + m[7];
  574. // presult[2] = m[8]*p[0] + m[9]*p[1] + m[10]*p[2] + m[11];
  575. // }
  576. // //--------------------------------------
  577. // static void m_matF_x_vectorF_C(const F32 *m, const F32 *v, F32 *vresult)
  578. // {
  579. // AssertFatal(v != vresult, "Error, aliasing matrix mul pointers not allowed here!");
  580. // vresult[0] = m[0]*v[0] + m[1]*v[1] + m[2]*v[2];
  581. // vresult[1] = m[4]*v[0] + m[5]*v[1] + m[6]*v[2];
  582. // vresult[2] = m[8]*v[0] + m[9]*v[1] + m[10]*v[2];
  583. // }
  584. //--------------------------------------
  585. static void m_matF_x_point4F_C(const F32 *m, const F32 *p, F32 *presult)
  586. {
  587. AssertFatal(p != presult, "Error, aliasing matrix mul pointers not allowed here!");
  588. presult[0] = m[0]*p[0] + m[1]*p[1] + m[2]*p[2] + m[3]*p[3];
  589. presult[1] = m[4]*p[0] + m[5]*p[1] + m[6]*p[2] + m[7]*p[3];
  590. presult[2] = m[8]*p[0] + m[9]*p[1] + m[10]*p[2] + m[11]*p[3];
  591. presult[3] = m[12]*p[0]+ m[13]*p[1]+ m[14]*p[2] + m[15]*p[3];
  592. }
  593. //--------------------------------------
  594. static void m_matF_x_scale_x_planeF_C(const F32* m, const F32* s, const F32* p, F32* presult)
  595. {
  596. // We take in a matrix, a scale factor, and a plane equation. We want to output
  597. // the resultant normal
  598. // We have T = m*s
  599. // To multiply the normal, we want Inv(Tr(m*s))
  600. // Inv(Tr(ms)) = Inv(Tr(s) * Tr(m))
  601. // = Inv(Tr(m)) * Inv(Tr(s))
  602. //
  603. // Inv(Tr(s)) = Inv(s) = [ 1/x 0 0 0]
  604. // [ 0 1/y 0 0]
  605. // [ 0 0 1/z 0]
  606. // [ 0 0 0 1]
  607. //
  608. // Since m is an affine matrix,
  609. // Tr(m) = [ [ ] 0 ]
  610. // [ [ R ] 0 ]
  611. // [ [ ] 0 ]
  612. // [ [ x y z ] 1 ]
  613. //
  614. // Inv(Tr(m)) = [ [ -1 ] 0 ]
  615. // [ [ R ] 0 ]
  616. // [ [ ] 0 ]
  617. // [ [ A B C ] 1 ]
  618. // Where:
  619. //
  620. // P = (x, y, z)
  621. // A = -(Row(0, r) * P);
  622. // B = -(Row(1, r) * P);
  623. // C = -(Row(2, r) * P);
  624. MatrixF invScale(true);
  625. F32* pScaleElems = invScale;
  626. pScaleElems[MatrixF::idx(0, 0)] = 1.0f / s[0];
  627. pScaleElems[MatrixF::idx(1, 1)] = 1.0f / s[1];
  628. pScaleElems[MatrixF::idx(2, 2)] = 1.0f / s[2];
  629. const Point3F shear( m[MatrixF::idx(3, 0)], m[MatrixF::idx(3, 1)], m[MatrixF::idx(3, 2)] );
  630. const Point3F row0(m[MatrixF::idx(0, 0)], m[MatrixF::idx(0, 1)], m[MatrixF::idx(0, 2)]);
  631. const Point3F row1(m[MatrixF::idx(1, 0)], m[MatrixF::idx(1, 1)], m[MatrixF::idx(1, 2)]);
  632. const Point3F row2(m[MatrixF::idx(2, 0)], m[MatrixF::idx(2, 1)], m[MatrixF::idx(2, 2)]);
  633. const F32 A = -mDot(row0, shear);
  634. const F32 B = -mDot(row1, shear);
  635. const F32 C = -mDot(row2, shear);
  636. MatrixF invTrMatrix(true);
  637. F32* destMat = invTrMatrix;
  638. destMat[MatrixF::idx(0, 0)] = m[MatrixF::idx(0, 0)];
  639. destMat[MatrixF::idx(1, 0)] = m[MatrixF::idx(1, 0)];
  640. destMat[MatrixF::idx(2, 0)] = m[MatrixF::idx(2, 0)];
  641. destMat[MatrixF::idx(0, 1)] = m[MatrixF::idx(0, 1)];
  642. destMat[MatrixF::idx(1, 1)] = m[MatrixF::idx(1, 1)];
  643. destMat[MatrixF::idx(2, 1)] = m[MatrixF::idx(2, 1)];
  644. destMat[MatrixF::idx(0, 2)] = m[MatrixF::idx(0, 2)];
  645. destMat[MatrixF::idx(1, 2)] = m[MatrixF::idx(1, 2)];
  646. destMat[MatrixF::idx(2, 2)] = m[MatrixF::idx(2, 2)];
  647. destMat[MatrixF::idx(0, 3)] = A;
  648. destMat[MatrixF::idx(1, 3)] = B;
  649. destMat[MatrixF::idx(2, 3)] = C;
  650. invTrMatrix.mul(invScale);
  651. Point3F norm(p[0], p[1], p[2]);
  652. Point3F point = norm * -p[3];
  653. invTrMatrix.mulP(norm);
  654. norm.normalize();
  655. MatrixF temp;
  656. dMemcpy(temp, m, sizeof(F32) * 16);
  657. point.x *= s[0];
  658. point.y *= s[1];
  659. point.z *= s[2];
  660. temp.mulP(point);
  661. PlaneF resultPlane(point, norm);
  662. presult[0] = resultPlane.x;
  663. presult[1] = resultPlane.y;
  664. presult[2] = resultPlane.z;
  665. presult[3] = resultPlane.d;
  666. }
  667. static void m_matF_x_box3F_C(const F32 *m, F32* min, F32* max)
  668. {
  669. // Algorithm for axis aligned bounding box adapted from
  670. // Graphic Gems I, pp 548-550
  671. //
  672. F32 originalMin[3];
  673. F32 originalMax[3];
  674. originalMin[0] = min[0];
  675. originalMin[1] = min[1];
  676. originalMin[2] = min[2];
  677. originalMax[0] = max[0];
  678. originalMax[1] = max[1];
  679. originalMax[2] = max[2];
  680. min[0] = max[0] = m[3];
  681. min[1] = max[1] = m[7];
  682. min[2] = max[2] = m[11];
  683. const F32 * row = &m[0];
  684. for (U32 i = 0; i < 3; i++)
  685. {
  686. #define Do_One_Row(j) { \
  687. F32 a = (row[j] * originalMin[j]); \
  688. F32 b = (row[j] * originalMax[j]); \
  689. if (a < b) { *min += a; *max += b; } \
  690. else { *min += b; *max += a; } }
  691. // Simpler addressing (avoiding things like [ecx+edi*4]) might be worthwhile (LH):
  692. Do_One_Row(0);
  693. Do_One_Row(1);
  694. Do_One_Row(2);
  695. row += 4;
  696. min++;
  697. max++;
  698. }
  699. }
  700. void m_point3F_bulk_dot_C(const F32* refVector,
  701. const F32* dotPoints,
  702. const U32 numPoints,
  703. const U32 pointStride,
  704. F32* output)
  705. {
  706. for (U32 i = 0; i < numPoints; i++)
  707. {
  708. F32* pPoint = (F32*)(((U8*)dotPoints) + (pointStride * i));
  709. output[i] = ((refVector[0] * pPoint[0]) +
  710. (refVector[1] * pPoint[1]) +
  711. (refVector[2] * pPoint[2]));
  712. }
  713. }
  714. void m_point3F_bulk_dot_indexed_C(const F32* refVector,
  715. const F32* dotPoints,
  716. const U32 numPoints,
  717. const U32 pointStride,
  718. const U32* pointIndices,
  719. F32* output)
  720. {
  721. for (U32 i = 0; i < numPoints; i++)
  722. {
  723. F32* pPoint = (F32*)(((U8*)dotPoints) + (pointStride * pointIndices[i]));
  724. output[i] = ((refVector[0] * pPoint[0]) +
  725. (refVector[1] * pPoint[1]) +
  726. (refVector[2] * pPoint[2]));
  727. }
  728. }
  729. //------------------------------------------------------------------------------
  730. // Math function pointer declarations
  731. S32 (*m_mulDivS32)(S32 a, S32 b, S32 c) = m_mulDivS32_C;
  732. U32 (*m_mulDivU32)(S32 a, S32 b, U32 c) = m_mulDivU32_C;
  733. F32 (*m_catmullrom)(F32 t, F32 p0, F32 p1, F32 p2, F32 p3) = m_catmullrom_C;
  734. void (*m_sincos)( F32 angle, F32 *s, F32 *c ) = m_sincos_C;
  735. void (*m_sincosD)( F64 angle, F64 *s, F64 *c ) = m_sincosD_C;
  736. void (*m_point2F_normalize)(F32 *p) = m_point2F_normalize_C;
  737. void (*m_point2F_normalize_f)(F32 *p, F32 val) = m_point2F_normalize_f_C;
  738. void (*m_point2D_normalize)(F64 *p) = m_point2D_normalize_C;
  739. void (*m_point2D_normalize_f)(F64 *p, F64 val) = m_point2D_normalize_f_C;
  740. void (*m_point3D_normalize_f)(F64 *p, F64 val) = m_point3D_normalize_f_C;
  741. void (*m_point3F_normalize)(F32 *p) = m_point3F_normalize_C;
  742. void (*m_point3F_normalize_f)(F32 *p, F32 len) = m_point3F_normalize_f_C;
  743. void (*m_point3F_interpolate)(const F32 *from, const F32 *to, F32 factor, F32 *result) = m_point3F_interpolate_C;
  744. void (*m_point3D_normalize)(F64 *p) = m_point3D_normalize_C;
  745. void (*m_point3D_interpolate)(const F64 *from, const F64 *to, F64 factor, F64 *result) = m_point3D_interpolate_C;
  746. void (*m_point3F_bulk_dot)(const F32* refVector,
  747. const F32* dotPoints,
  748. const U32 numPoints,
  749. const U32 pointStride,
  750. F32* output) = m_point3F_bulk_dot_C;
  751. void (*m_point3F_bulk_dot_indexed)(const F32* refVector,
  752. const F32* dotPoints,
  753. const U32 numPoints,
  754. const U32 pointStride,
  755. const U32* pointIndices,
  756. F32* output) = m_point3F_bulk_dot_indexed_C;
  757. void (*m_quatF_set_matF)( F32 x, F32 y, F32 z, F32 w, F32* m ) = m_quatF_set_matF_C;
  758. void (*m_matF_set_euler)(const F32 *e, F32 *result) = m_matF_set_euler_C;
  759. void (*m_matF_set_euler_point)(const F32 *e, const F32 *p, F32 *result) = m_matF_set_euler_point_C;
  760. void (*m_matF_identity)(F32 *m) = m_matF_identity_C;
  761. void (*m_matF_inverse)(F32 *m) = m_matF_inverse_C;
  762. void (*m_matF_affineInverse)(F32 *m) = m_matF_affineInverse_C;
  763. void (*m_matF_invert_to)(const F32 *m, F32 *d) = m_matF_invert_to_C;
  764. void (*m_matF_transpose)(F32 *m) = m_matF_transpose_C;
  765. void (*m_matF_scale)(F32 *m,const F32* p) = m_matF_scale_C;
  766. void (*m_matF_normalize)(F32 *m) = m_matF_normalize_C;
  767. F32 (*m_matF_determinant)(const F32 *m) = m_matF_determinant_C;
  768. void (*m_matF_x_matF)(const F32 *a, const F32 *b, F32 *mresult) = default_matF_x_matF_C;
  769. void (*m_matF_x_matF_aligned)(const F32 *a, const F32 *b, F32 *mresult) = default_matF_x_matF_C;
  770. // void (*m_matF_x_point3F)(const F32 *m, const F32 *p, F32 *presult) = m_matF_x_point3F_C;
  771. // void (*m_matF_x_vectorF)(const F32 *m, const F32 *v, F32 *vresult) = m_matF_x_vectorF_C;
  772. void (*m_matF_x_point4F)(const F32 *m, const F32 *p, F32 *presult) = m_matF_x_point4F_C;
  773. void (*m_matF_x_scale_x_planeF)(const F32 *m, const F32* s, const F32 *p, F32 *presult) = m_matF_x_scale_x_planeF_C;
  774. void (*m_matF_x_box3F)(const F32 *m, F32 *min, F32 *max) = m_matF_x_box3F_C;
  775. //------------------------------------------------------------------------------
  776. void mInstallLibrary_C()
  777. {
  778. m_mulDivS32 = m_mulDivS32_C;
  779. m_mulDivU32 = m_mulDivU32_C;
  780. m_catmullrom = m_catmullrom_C;
  781. m_sincos = m_sincos_C;
  782. m_sincosD = m_sincosD_C;
  783. m_point2F_normalize = m_point2F_normalize_C;
  784. m_point2F_normalize_f = m_point2F_normalize_f_C;
  785. m_point2D_normalize = m_point2D_normalize_C;
  786. m_point2D_normalize_f = m_point2D_normalize_f_C;
  787. m_point3F_normalize = m_point3F_normalize_C;
  788. m_point3F_normalize_f = m_point3F_normalize_f_C;
  789. m_point3F_interpolate = m_point3F_interpolate_C;
  790. m_point3D_normalize = m_point3D_normalize_C;
  791. m_point3D_interpolate = m_point3D_interpolate_C;
  792. m_point3F_bulk_dot = m_point3F_bulk_dot_C;
  793. m_point3F_bulk_dot_indexed = m_point3F_bulk_dot_indexed_C;
  794. m_quatF_set_matF = m_quatF_set_matF_C;
  795. m_matF_set_euler = m_matF_set_euler_C;
  796. m_matF_set_euler_point = m_matF_set_euler_point_C;
  797. m_matF_identity = m_matF_identity_C;
  798. m_matF_inverse = m_matF_inverse_C;
  799. m_matF_affineInverse = m_matF_affineInverse_C;
  800. m_matF_invert_to = m_matF_invert_to_C;
  801. m_matF_transpose = m_matF_transpose_C;
  802. m_matF_scale = m_matF_scale_C;
  803. m_matF_normalize = m_matF_normalize_C;
  804. m_matF_determinant = m_matF_determinant_C;
  805. m_matF_x_matF = default_matF_x_matF_C;
  806. m_matF_x_matF_aligned = default_matF_x_matF_C;
  807. // m_matF_x_point3F = m_matF_x_point3F_C;
  808. // m_matF_x_vectorF = m_matF_x_vectorF_C;
  809. m_matF_x_point4F = m_matF_x_point4F_C;
  810. m_matF_x_scale_x_planeF = m_matF_x_scale_x_planeF_C;
  811. m_matF_x_box3F = m_matF_x_box3F_C;
  812. }