mathlib.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. /*----------------------------------------------------------------------------*/
  2. /**
  3. * This confidential and proprietary software may be used only as
  4. * authorised by a licensing agreement from ARM Limited
  5. * (C) COPYRIGHT 2011-2012 ARM Limited
  6. * ALL RIGHTS RESERVED
  7. *
  8. * The entire notice above must be reproduced on all authorised
  9. * copies and copies may only be made to the extent permitted
  10. * by a licensing agreement from ARM Limited.
  11. *
  12. * @brief Library of math functions.
  13. */
  14. /*----------------------------------------------------------------------------*/
  15. #define _USE_MATH_DEFINES // for M_PI on windows
  16. #include <time.h>
  17. #include <stdlib.h>
  18. #include <stdio.h>
  19. #include <math.h>
  20. #include "mathlib.h"
  21. #ifdef WIN32
  22. double cbrt(double n)
  23. {
  24. return n < 0 ? -pow(-n, 1.0 / 3.0) : pow(n, 1.0 / 3.0);
  25. }
  26. #endif
  27. /**************************
  28. basic OpenCL functions
  29. **************************/
  30. float inversesqrt(float p)
  31. {
  32. return 1.0f / sqrt(p);
  33. }
  34. float acospi(float p)
  35. {
  36. return static_cast < float >(acos(p) * (1.0f / M_PI));
  37. };
  38. float sinpi(float p)
  39. {
  40. return static_cast < float >(sin(p * M_PI));
  41. }
  42. float cospi(float p)
  43. {
  44. return static_cast < float >(cos(p * M_PI));
  45. }
  46. float nan(int p)
  47. {
  48. union
  49. {
  50. int p;
  51. float q;
  52. } v;
  53. v.p = p | 0x7FC00000U;
  54. return v.q;
  55. }
  56. #if !defined(_MSC_VER) && (__cplusplus < 201103L)
  57. float fmax(float p, float q)
  58. {
  59. if (p != p)
  60. return q;
  61. if (q != q)
  62. return p;
  63. if (p > q)
  64. return p;
  65. return q;
  66. }
  67. float fmin(float p, float q)
  68. {
  69. if (p != p)
  70. return q;
  71. if (q != q)
  72. return p;
  73. if (p < q)
  74. return p;
  75. return q;
  76. }
  77. #endif // C++11
  78. float2 fmax(float2 p, float2 q)
  79. {
  80. return float2(fmax(p.x, q.x), fmax(p.y, q.y));
  81. }
  82. float3 fmax(float3 p, float3 q)
  83. {
  84. return float3(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z));
  85. }
  86. float4 fmax(float4 p, float4 q)
  87. {
  88. return float4(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z), fmax(p.w, q.w));
  89. }
  90. float2 fmin(float2 p, float2 q)
  91. {
  92. return float2(fmin(p.x, q.x), fmin(p.y, q.y));
  93. }
  94. float3 fmin(float3 p, float3 q)
  95. {
  96. return float3(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z));
  97. }
  98. float4 fmin(float4 p, float4 q)
  99. {
  100. return float4(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z), fmin(p.w, q.w));
  101. }
  102. /*
  103. float dot( float2 p, float2 q ) { return p.x*q.x + p.y*q.y; } float dot( float3 p, float3 q ) { return p.x*q.x + p.y*q.y + p.z*q.z; } float dot( float4 p, float4 q ) { return p.x*q.x + p.y*q.y +
  104. p.z*q.z + p.w*q.w; } */
  105. float3 cross(float3 p, float3 q)
  106. {
  107. return p.yzx * q.zxy - p.zxy * q.yzx;
  108. }
  109. float4 cross(float4 p, float4 q)
  110. {
  111. return float4(p.yzx * q.zxy - p.zxy * q.yzx, 0.0f);
  112. }
  113. float length(float2 p)
  114. {
  115. return sqrt(dot(p, p));
  116. }
  117. float length(float3 p)
  118. {
  119. return sqrt(dot(p, p));
  120. }
  121. float length(float4 p)
  122. {
  123. return sqrt(dot(p, p));
  124. }
  125. float length_sqr(float2 p)
  126. {
  127. return dot(p, p);
  128. }
  129. float length_sqr(float3 p)
  130. {
  131. return dot(p, p);
  132. }
  133. float length_sqr(float4 p)
  134. {
  135. return dot(p, p);
  136. }
  137. float distance(float2 p, float2 q)
  138. {
  139. return length(q - p);
  140. }
  141. float distance(float3 p, float3 q)
  142. {
  143. return length(q - p);
  144. }
  145. float distance(float4 p, float4 q)
  146. {
  147. return length(q - p);
  148. }
  149. float distance_sqr(float2 p, float2 q)
  150. {
  151. return length_sqr(q - p);
  152. }
  153. float distance_sqr(float3 p, float3 q)
  154. {
  155. return length_sqr(q - p);
  156. }
  157. float distance_sqr(float4 p, float4 q)
  158. {
  159. return length_sqr(q - p);
  160. }
  161. float2 normalize(float2 p)
  162. {
  163. return p / length(p);
  164. }
  165. float3 normalize(float3 p)
  166. {
  167. return p / length(p);
  168. }
  169. float4 normalize(float4 p)
  170. {
  171. return p / length(p);
  172. }
  173. /**************************************************
  174. matrix functions, for 2x2, 3x3 and 4x4 matrices:
  175. * trace
  176. * determinant
  177. * transform
  178. * inverse
  179. * adjugate
  180. * characteristic polynomial
  181. * eigenvalue
  182. * eigenvector
  183. additionally, root solver
  184. for 2nd, 3rd and 4th degree monic polynomials.
  185. *************************************************/
  186. /*
  187. struct mat2 { float2 v[2]; };
  188. struct mat3 { float3 v[3]; };
  189. struct mat4 { float4 v[4]; };
  190. */
  191. float trace(mat2 p)
  192. {
  193. return p.v[0].x + p.v[1].y;
  194. }
  195. float trace(mat3 p)
  196. {
  197. return p.v[0].x + p.v[1].y + p.v[2].z;
  198. }
  199. float trace(mat4 p)
  200. {
  201. return p.v[0].x + p.v[1].y + p.v[2].z + p.v[3].w;
  202. }
  203. float determinant(mat2 p)
  204. {
  205. float2 v = p.v[0].xy * p.v[1].yx;
  206. return v.x - v.y;
  207. }
  208. float determinant(mat3 p)
  209. {
  210. return dot(p.v[0], cross(p.v[1], p.v[2]));
  211. }
  212. float determinant(mat4 p)
  213. {
  214. return dot(p.v[0],
  215. float4(dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw)),
  216. -dot(p.v[1].xzw, cross(p.v[2].xzw, p.v[3].xzw)), dot(p.v[1].xyw, cross(p.v[2].xyw, p.v[3].xyw)), -dot(p.v[1].xyz, cross(p.v[2].xyz, p.v[3].xyz))));
  217. }
  218. /*
  219. characteristic polynomials for matrices. These polynomials are monic, meaning that the coefficient of the highest component is 1; this component is omitted. The first component is the constant
  220. part. */
  221. float2 characteristic_poly(mat2 p)
  222. {
  223. return float2(determinant(p), -trace(p));
  224. }
  225. float3 characteristic_poly(mat3 p)
  226. {
  227. float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[1].yz * p.v[2].zy);
  228. return float3(-determinant(p), v1.x - v1.y, -trace(p));
  229. }
  230. float4 characteristic_poly(mat4 p)
  231. {
  232. float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[0].xw * p.v[3].wx) + (p.v[1].yz * p.v[2].zy) + (p.v[1].yw * p.v[3].wy) + (p.v[2].zw * p.v[3].wz);
  233. return float4(determinant(p),
  234. -dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw))
  235. - dot(p.v[0].xzw, cross(p.v[2].xzw, p.v[3].xzw)) - dot(p.v[0].xyw, cross(p.v[1].xyw, p.v[3].xyw)) - dot(p.v[0].xyz, cross(p.v[1].xyz, p.v[2].xyz)), v1.x - v1.y, -trace(p));
  236. }
  237. /*
  238. Root finders for monic polynomials (highest coefficient is equal to 1)
  239. Returns a vector with length equal to the number of roots that the polynomial has;
  240. for roots that do not genuinely exist, we return NaN.
  241. The polynomial is basically
  242. poly(n) = p.x + p.y*n + p.z*n^2 + p.w*n^3
  243. (including only the components of the vector that actually exist; the next coefficient
  244. has the value 1, and the remaining ones have value 0. )
  245. */
  246. float2 solve_monic(float2 p)
  247. {
  248. float v = sqrt(p.y * p.y - 4 * p.x);
  249. return (p.yy + float2(v, -v)) * -0.5f;
  250. }
  251. float3 solve_monic(float3 p)
  252. {
  253. p = p * (1.0f / 3.0f);
  254. float pz = p.z;
  255. // compute a normalization value to scale the vector by.
  256. // The normalization factor is divided by 2^20.
  257. // This is supposed to make internal calculations unlikely
  258. // to overflow while also making underflows unlikely.
  259. float scal = 1.0f;
  260. float cx = static_cast < float >(cbrt(fabs(p.x)));
  261. float cy = static_cast < float >(cbrt(fabs(p.y)));
  262. scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f);
  263. float rscal = 1.0f / scal;
  264. p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal);
  265. float bb = p.z * p.z; // div scal^2
  266. float nq = bb - p.y; // div scal^2
  267. float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3
  268. float nq3 = nq * nq * nq; // div scal^6
  269. float r2 = r * r; // div scal^6
  270. if (nq3 < r2)
  271. {
  272. // one root
  273. float root = sqrt(r2 - nq3); // div scal^3
  274. float s = static_cast < float >(cbrt(r + root)); // div scal
  275. float t = static_cast < float >(cbrt(r - root)); // div scal
  276. return float3((s + t) * scal - pz, nan(0), nan(0));
  277. }
  278. else
  279. {
  280. // three roots
  281. float phi_r = inversesqrt(nq3); // div scal ^ -3
  282. float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal
  283. float theta = acospi(r * phi_r);
  284. theta *= 1.0f / 3.0f;
  285. float ncprod = phi_root * cospi(theta);
  286. float dev = 1.73205080756887729353f * phi_root * sinpi(theta);
  287. return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz;
  288. }
  289. }
  290. /*
  291. * This function is not overflow-safe. Use with care.
  292. */
  293. float4 solve_monic(float4 p)
  294. {
  295. // step 1: depress the input polynomial
  296. float bias = p.w * 0.25f;
  297. float3 qv = float3((-3.0f / 256.0f) * p.w * p.w, (1.0f / 8.0f) * p.w, (-3.0 / 8.0f));
  298. float3 rv = float3((1.0f / 16.0f) * p.z * p.w - (1.0f / 4.0f) * p.y, (-1.0f / 2.0f) * p.z, 0.0f);
  299. float3 qx = float3(qv * p.w + rv) * p.w + p.xyz;
  300. // step 2: solve a cubic equation to get hold of a parameter p.
  301. float3 monicp = float3(-qx.y * qx.y, (qx.z * qx.z) - (4.0f * qx.x), 2.0f * qx.z);
  302. float4 v = float4(solve_monic(monicp), 1e-37f);
  303. // the cubic equation may have multiple solutions; at least one of them
  304. // is numerically at least nonnegative (but may have become negative as a result of
  305. // a roundoff error). We use fmax() to extract this value or a very small positive value.
  306. float2 v2 = fmax(v.xy, v.zw);
  307. float p2 = fmax(v2.x, v2.y); // p^2
  308. float pr = inversesqrt(p2); // 1/p
  309. float pm = p2 * pr; // p
  310. // step 3: use the solution for the cubic equation to set up two quadratic equations;
  311. // these two equations then result in the 4 possible roots.
  312. float f1 = qx.z + p2;
  313. float f2 = qx.y * pr;
  314. float s = 0.5f * (f1 + f2);
  315. float q = 0.5f * (f1 - f2);
  316. float4 res = float4(solve_monic(float2(q, pm)),
  317. solve_monic(float2(s, -pm)));
  318. // finally, order the results and apply the bias.
  319. if (res.x != res.x)
  320. return res.zwxy - bias;
  321. else
  322. return res - bias;
  323. }
  324. float2 transform(mat2 p, float2 q)
  325. {
  326. return float2(dot(p.v[0], q), dot(p.v[1], q));
  327. }
  328. float3 transform(mat3 p, float3 q)
  329. {
  330. return float3(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q));
  331. }
  332. float4 transform(mat4 p, float4 q)
  333. {
  334. return float4(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q), dot(p.v[3], q));
  335. }
  336. mat2 adjugate(mat2 p)
  337. {
  338. mat2 res;
  339. res.v[0] = float2(p.v[1].y, -p.v[0].y);
  340. res.v[1] = float2(-p.v[1].x, p.v[0].x);
  341. return res;
  342. }
  343. mat2 invert(mat2 p)
  344. {
  345. float rdet = 1.0f / determinant(p);
  346. mat2 res;
  347. res.v[0] = float2(p.v[1].y, -p.v[0].y) * rdet;
  348. res.v[1] = float2(-p.v[1].x, p.v[0].x) * rdet;
  349. return res;
  350. }
  351. mat3 adjugate(mat3 p)
  352. {
  353. mat3 res;
  354. float3 prd0 = cross(p.v[1], p.v[2]);
  355. float3 prd1 = cross(p.v[2], p.v[0]);
  356. float3 prd2 = cross(p.v[0], p.v[1]);
  357. res.v[0] = float3(prd0.x, prd1.x, prd2.x);
  358. res.v[1] = float3(prd0.y, prd1.y, prd2.y);
  359. res.v[2] = float3(prd0.z, prd1.z, prd2.z);
  360. return res;
  361. }
  362. mat3 invert(mat3 p)
  363. {
  364. float3 cross0 = cross(p.v[1], p.v[2]);
  365. float det = dot(cross0, p.v[0]);
  366. float rdet = 1.0f / det;
  367. mat3 res;
  368. float3 prd0 = cross0 * rdet;
  369. float3 prd1 = cross(p.v[2], p.v[0]) * rdet;
  370. float3 prd2 = cross(p.v[0], p.v[1]) * rdet;
  371. res.v[0] = float3(prd0.x, prd1.x, prd2.x);
  372. res.v[1] = float3(prd0.y, prd1.y, prd2.y);
  373. res.v[2] = float3(prd0.z, prd1.z, prd2.z);
  374. return res;
  375. }
  376. mat4 adjugate(mat4 p)
  377. {
  378. mat4 res;
  379. float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
  380. float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
  381. res.v[0] = float4(dot(bpc0, p.v[1].yzw), -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw));
  382. float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
  383. float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
  384. res.v[1] = float4(-dot(bpc1, p.v[1].xzw), dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw));
  385. float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
  386. float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
  387. res.v[2] = float4(dot(bpc2, p.v[1].xyw), -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw));
  388. float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
  389. float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
  390. res.v[3] = float4(-dot(bpc3, p.v[1].xyz), dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz));
  391. return res;
  392. }
  393. mat4 invert(mat4 p)
  394. {
  395. // cross products between the bottom two rows
  396. float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
  397. float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
  398. float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
  399. float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
  400. // dot-products for the top rows
  401. float4 row1 = float4(dot(bpc0, p.v[1].yzw),
  402. -dot(bpc1, p.v[1].xzw),
  403. dot(bpc2, p.v[1].xyw),
  404. -dot(bpc3, p.v[1].xyz));
  405. float det = dot(p.v[0], row1);
  406. float rdet = 1.0f / det;
  407. mat4 res;
  408. float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
  409. res.v[0] = float4(row1.x, -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw)) * rdet;
  410. float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
  411. res.v[1] = float4(row1.y, dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw)) * rdet;
  412. float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
  413. res.v[2] = float4(row1.z, -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw)) * rdet;
  414. float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
  415. res.v[3] = float4(row1.w, dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz)) * rdet;
  416. return res;
  417. }
  418. float2 eigenvalues(mat2 p)
  419. {
  420. return solve_monic(characteristic_poly(p));
  421. }
  422. float3 eigenvalues(mat3 p)
  423. {
  424. return solve_monic(characteristic_poly(p));
  425. }
  426. float4 eigenvalues(mat4 p)
  427. {
  428. return solve_monic(characteristic_poly(p));
  429. }
  430. float2 eigenvector(mat2 p, float eigvl)
  431. {
  432. // for a mat2, we first reverse-subtract the eigenvalue from the matrix diagonal,
  433. // then return whichever row had the larger sum-of-absolute-values.
  434. float4 v = float4(p.v[0], p.v[1]);
  435. v.xw = eigvl - v.xw;
  436. if (fabs(v.x) + fabs(v.y) > fabs(v.z) + fabs(v.w))
  437. return v.yx;
  438. else
  439. return v.wz;
  440. }
  441. float3 eigenvector(mat3 p, float eigvl)
  442. {
  443. // for a mat3, we obtain the eigenvector as follows:
  444. // step 1: subtract the eigenvalue from the matrix diagonal
  445. // step 2: take two cross products between rows in the matrix
  446. // step 3: return whichever of the cross products resulted in a longer vector.
  447. float3 r0 = p.v[0];
  448. float3 r1 = p.v[1];
  449. float3 r2 = p.v[2];
  450. r0.x = r0.x - eigvl;
  451. r1.y = r1.y - eigvl;
  452. r2.z = r2.z - eigvl;
  453. float3 v1 = cross(r0, r1);
  454. float3 v2 = cross(r1, r2);
  455. float len1 = dot(v1, v1);
  456. float len2 = dot(v2, v2);
  457. return len1 > len2 ? v1 : v2;
  458. }
  459. // generalized cross product: 3 vectors with 4 components each.
  460. // The result is a vector that is perpendicular to all the three specified vectors.
  461. // it works in the sense that it produces a perpendicular-to-everything vector,
  462. // but it has not been tested whether it points in the "right" direction.
  463. float4 gcross(float4 p, float4 q, float4 r)
  464. {
  465. return float4(dot(p.yzw, cross(q.yzw, r.yzw)), -dot(p.xzw, cross(q.xzw, r.xzw)), dot(p.xyw, cross(q.xyw, r.xyw)), -dot(p.xyz, cross(q.xyz, r.xyz)));
  466. }
  467. float4 eigenvector(mat4 p, float eigvl)
  468. {
  469. float4 r0 = p.v[0];
  470. float4 r1 = p.v[1];
  471. float4 r2 = p.v[2];
  472. float4 r3 = p.v[3];
  473. r0.x = r0.x - eigvl;
  474. r1.y = r1.y - eigvl;
  475. r2.z = r2.z - eigvl;
  476. r3.w = r3.w - eigvl;
  477. // generate four candidate vectors using the generalized cross product.
  478. // These will in general point in the same direction (or 180 degree opposite),
  479. // however they will have different lengths. Pick the longest one.
  480. float3 tpc0 = cross(r0.yzw, r1.yzw);
  481. float3 tpc1 = cross(r0.xzw, r1.xzw);
  482. float3 tpc2 = cross(r0.xyw, r1.xyw);
  483. float3 tpc3 = cross(r0.xyz, r1.xyz);
  484. float4 v1 = float4(dot(r2.yzw, tpc0),
  485. -dot(r2.xzw, tpc1),
  486. dot(r2.xyw, tpc2),
  487. -dot(r2.xyz, tpc3));
  488. float4 v2 = float4(dot(r3.yzw, tpc0),
  489. -dot(r3.xzw, tpc1),
  490. dot(r3.xyw, tpc2),
  491. -dot(r3.xyz, tpc3));
  492. float3 bpc0 = cross(r2.yzw, r3.yzw);
  493. float3 bpc1 = cross(r2.xzw, r3.xzw);
  494. float3 bpc2 = cross(r2.xyw, r3.xyw);
  495. float3 bpc3 = cross(r2.xyz, r3.xyz);
  496. float4 v3 = float4(dot(r0.yzw, bpc0),
  497. -dot(r0.xzw, bpc1),
  498. dot(r0.xyw, bpc2),
  499. -dot(r0.xyz, bpc3));
  500. float4 v4 = float4(dot(r1.yzw, bpc0),
  501. -dot(r1.xzw, bpc1),
  502. dot(r1.xyw, bpc2),
  503. -dot(r1.xyz, bpc3));
  504. float len1 = dot(v1, v1);
  505. float len2 = dot(v2, v2);
  506. float len3 = dot(v3, v3);
  507. float len4 = dot(v4, v4);
  508. if (fmax(len1, len2) > fmax(len3, len4))
  509. return len1 > len2 ? v1 : v2;
  510. else
  511. return len3 > len4 ? v3 : v4;
  512. }
  513. // matrix multiply
  514. mat2 operator *(mat2 a, mat2 b)
  515. {
  516. mat2 res;
  517. res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1];
  518. res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1];
  519. return res;
  520. }
  521. mat3 operator *(mat3 a, mat3 b)
  522. {
  523. mat3 res;
  524. res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2];
  525. res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2];
  526. res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2];
  527. return res;
  528. }
  529. mat4 operator *(mat4 a, mat4 b)
  530. {
  531. mat4 res;
  532. res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2] + a.v[0].w * b.v[3];
  533. res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2] + a.v[1].w * b.v[3];
  534. res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2] + a.v[2].w * b.v[3];
  535. res.v[3] = a.v[3].x * b.v[0] + a.v[3].y * b.v[1] + a.v[3].z * b.v[2] + a.v[3].w * b.v[3];
  536. return res;
  537. }
  538. /*************************
  539. simple geometric functions
  540. *************************/
  541. // return parameter value for the point on the line closest to the specified point
  542. float param_nearest_on_line(float2 point, line2 line)
  543. {
  544. return dot(point - line.a, line.b) / dot(line.b, line.b);
  545. }
  546. float param_nearest_on_line(float3 point, line3 line)
  547. {
  548. return dot(point - line.a, line.b) / dot(line.b, line.b);
  549. }
  550. float param_nearest_on_line(float4 point, line4 line)
  551. {
  552. return dot(point - line.a, line.b) / dot(line.b, line.b);
  553. }
  554. // return distance between point and line
  555. float point_line_distance(float2 point, line2 line)
  556. {
  557. return distance(point, line.a + line.b * param_nearest_on_line(point, line));
  558. }
  559. float point_line_distance(float3 point, line3 line)
  560. {
  561. return distance(point, line.a + line.b * param_nearest_on_line(point, line));
  562. }
  563. float point_line_distance(float4 point, line4 line)
  564. {
  565. return distance(point, line.a + line.b * param_nearest_on_line(point, line));
  566. }
  567. float point_line_distance_sqr(float2 point, line2 line)
  568. {
  569. return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
  570. }
  571. float point_line_distance_sqr(float3 point, line3 line)
  572. {
  573. return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
  574. }
  575. float point_line_distance_sqr(float4 point, line4 line)
  576. {
  577. return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
  578. }
  579. // distance between plane/hyperplane in 3D and 4D
  580. float point_plane_3d_distance(float3 point, plane_3d plane)
  581. {
  582. return dot(point - plane.root_point, plane.normal);
  583. }
  584. float point_hyperplane_4d_distance(float4 point, hyperplane_4d plane)
  585. {
  586. return dot(point - plane.root_point, plane.normal);
  587. }
  588. // helper functions to produce a 3D plane from three points and a 4D hyperplane from four points.
  589. plane_3d generate_plane_from_points(float3 point0, float3 point1, float3 point2)
  590. {
  591. plane_3d res;
  592. res.root_point = point0;
  593. res.normal = normalize(cross(point1 - point0, point2 - point0));
  594. return res;
  595. }
  596. hyperplane_4d generate_hyperplane_from_points(float4 point0, float4 point1, float4 point2, float4 point3)
  597. {
  598. hyperplane_4d res;
  599. res.root_point = point0;
  600. res.normal = normalize(gcross(point1 - point0, point2 - point0, point3 - point0));
  601. return res;
  602. }