mathlib.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. /*----------------------------------------------------------------------------*/
  2. /**
  3. * This confidential and proprietary software may be used only as
  4. * authorised by a licensing agreement from ARM Limited
  5. * (C) COPYRIGHT 2011-2012 ARM Limited
  6. * ALL RIGHTS RESERVED
  7. *
  8. * The entire notice above must be reproduced on all authorised
  9. * copies and copies may only be made to the extent permitted
  10. * by a licensing agreement from ARM Limited.
  11. *
  12. * @brief Library of math functions.
  13. */
  14. /*----------------------------------------------------------------------------*/
  15. #include <time.h>
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <math.h>
  19. #include "mathlib.h"
  20. /**************************
  21. basic OpenCL functions
  22. **************************/
  23. float inversesqrt(float p)
  24. {
  25. return 1.0f / sqrt(p);
  26. }
  27. float acospi(float p)
  28. {
  29. return static_cast < float >(acos(p) * (1.0f / M_PI));
  30. };
  31. float sinpi(float p)
  32. {
  33. return static_cast < float >(sin(p * M_PI));
  34. }
  35. float cospi(float p)
  36. {
  37. return static_cast < float >(cos(p * M_PI));
  38. }
  39. float nan(int p)
  40. {
  41. union
  42. {
  43. int p;
  44. float q;
  45. } v;
  46. v.p = p | 0x7FC00000U;
  47. return v.q;
  48. }
  49. #if (!_MSC_VER) && (__cplusplus < 201103L)
  50. float fmax(float p, float q)
  51. {
  52. if (p != p)
  53. return q;
  54. if (q != q)
  55. return p;
  56. if (p > q)
  57. return p;
  58. return q;
  59. }
  60. float fmin(float p, float q)
  61. {
  62. if (p != p)
  63. return q;
  64. if (q != q)
  65. return p;
  66. if (p < q)
  67. return p;
  68. return q;
  69. }
  70. #endif // C++11
  71. float2 fmax(float2 p, float2 q)
  72. {
  73. return float2(fmax(p.x, q.x), fmax(p.y, q.y));
  74. }
  75. float3 fmax(float3 p, float3 q)
  76. {
  77. return float3(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z));
  78. }
  79. float4 fmax(float4 p, float4 q)
  80. {
  81. return float4(fmax(p.x, q.x), fmax(p.y, q.y), fmax(p.z, q.z), fmax(p.w, q.w));
  82. }
  83. float2 fmin(float2 p, float2 q)
  84. {
  85. return float2(fmin(p.x, q.x), fmin(p.y, q.y));
  86. }
  87. float3 fmin(float3 p, float3 q)
  88. {
  89. return float3(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z));
  90. }
  91. float4 fmin(float4 p, float4 q)
  92. {
  93. return float4(fmin(p.x, q.x), fmin(p.y, q.y), fmin(p.z, q.z), fmin(p.w, q.w));
  94. }
  95. /*
  96. float dot( float2 p, float2 q ) { return p.x*q.x + p.y*q.y; } float dot( float3 p, float3 q ) { return p.x*q.x + p.y*q.y + p.z*q.z; } float dot( float4 p, float4 q ) { return p.x*q.x + p.y*q.y +
  97. p.z*q.z + p.w*q.w; } */
  98. float3 cross(float3 p, float3 q)
  99. {
  100. return p.yzx * q.zxy - p.zxy * q.yzx;
  101. }
  102. float4 cross(float4 p, float4 q)
  103. {
  104. return float4(p.yzx * q.zxy - p.zxy * q.yzx, 0.0f);
  105. }
  106. float length(float2 p)
  107. {
  108. return sqrt(dot(p, p));
  109. }
  110. float length(float3 p)
  111. {
  112. return sqrt(dot(p, p));
  113. }
  114. float length(float4 p)
  115. {
  116. return sqrt(dot(p, p));
  117. }
  118. float length_sqr(float2 p)
  119. {
  120. return dot(p, p);
  121. }
  122. float length_sqr(float3 p)
  123. {
  124. return dot(p, p);
  125. }
  126. float length_sqr(float4 p)
  127. {
  128. return dot(p, p);
  129. }
  130. float distance(float2 p, float2 q)
  131. {
  132. return length(q - p);
  133. }
  134. float distance(float3 p, float3 q)
  135. {
  136. return length(q - p);
  137. }
  138. float distance(float4 p, float4 q)
  139. {
  140. return length(q - p);
  141. }
  142. float distance_sqr(float2 p, float2 q)
  143. {
  144. return length_sqr(q - p);
  145. }
  146. float distance_sqr(float3 p, float3 q)
  147. {
  148. return length_sqr(q - p);
  149. }
  150. float distance_sqr(float4 p, float4 q)
  151. {
  152. return length_sqr(q - p);
  153. }
  154. float2 normalize(float2 p)
  155. {
  156. return p / length(p);
  157. }
  158. float3 normalize(float3 p)
  159. {
  160. return p / length(p);
  161. }
  162. float4 normalize(float4 p)
  163. {
  164. return p / length(p);
  165. }
  166. /**************************************************
  167. matrix functions, for 2x2, 3x3 and 4x4 matrices:
  168. * trace
  169. * determinant
  170. * transform
  171. * inverse
  172. * adjugate
  173. * characteristic polynomial
  174. * eigenvalue
  175. * eigenvector
  176. additionally, root solver
  177. for 2nd, 3rd and 4th degree monic polynomials.
  178. *************************************************/
  179. /*
  180. struct mat2 { float2 v[2]; };
  181. struct mat3 { float3 v[3]; };
  182. struct mat4 { float4 v[4]; };
  183. */
  184. float trace(mat2 p)
  185. {
  186. return p.v[0].x + p.v[1].y;
  187. }
  188. float trace(mat3 p)
  189. {
  190. return p.v[0].x + p.v[1].y + p.v[2].z;
  191. }
  192. float trace(mat4 p)
  193. {
  194. return p.v[0].x + p.v[1].y + p.v[2].z + p.v[3].w;
  195. }
  196. float determinant(mat2 p)
  197. {
  198. float2 v = p.v[0].xy * p.v[1].yx;
  199. return v.x - v.y;
  200. }
  201. float determinant(mat3 p)
  202. {
  203. return dot(p.v[0], cross(p.v[1], p.v[2]));
  204. }
  205. float determinant(mat4 p)
  206. {
  207. return dot(p.v[0],
  208. float4(dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw)),
  209. -dot(p.v[1].xzw, cross(p.v[2].xzw, p.v[3].xzw)), dot(p.v[1].xyw, cross(p.v[2].xyw, p.v[3].xyw)), -dot(p.v[1].xyz, cross(p.v[2].xyz, p.v[3].xyz))));
  210. }
  211. /*
  212. characteristic polynomials for matrices. These polynomials are monic, meaning that the coefficient of the highest component is 1; this component is omitted. The first component is the constant
  213. part. */
  214. float2 characteristic_poly(mat2 p)
  215. {
  216. return float2(determinant(p), -trace(p));
  217. }
  218. float3 characteristic_poly(mat3 p)
  219. {
  220. float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[1].yz * p.v[2].zy);
  221. return float3(-determinant(p), v1.x - v1.y, -trace(p));
  222. }
  223. float4 characteristic_poly(mat4 p)
  224. {
  225. float2 v1 = (p.v[0].xy * p.v[1].yx) + (p.v[0].xz * p.v[2].zx) + (p.v[0].xw * p.v[3].wx) + (p.v[1].yz * p.v[2].zy) + (p.v[1].yw * p.v[3].wy) + (p.v[2].zw * p.v[3].wz);
  226. return float4(determinant(p),
  227. -dot(p.v[1].yzw, cross(p.v[2].yzw, p.v[3].yzw))
  228. - dot(p.v[0].xzw, cross(p.v[2].xzw, p.v[3].xzw)) - dot(p.v[0].xyw, cross(p.v[1].xyw, p.v[3].xyw)) - dot(p.v[0].xyz, cross(p.v[1].xyz, p.v[2].xyz)), v1.x - v1.y, -trace(p));
  229. }
  230. /*
  231. Root finders for monic polynomials (highest coefficient is equal to 1)
  232. Returns a vector with length equal to the number of roots that the polynomial has;
  233. for roots that do not genuinely exist, we return NaN.
  234. The polynomial is basically
  235. poly(n) = p.x + p.y*n + p.z*n^2 + p.w*n^3
  236. (including only the components of the vector that actually exist; the next coefficient
  237. has the value 1, and the remaining ones have value 0. )
  238. */
  239. float2 solve_monic(float2 p)
  240. {
  241. float v = sqrt(p.y * p.y - 4 * p.x);
  242. return (p.yy + float2(v, -v)) * -0.5f;
  243. }
  244. float3 solve_monic(float3 p)
  245. {
  246. p = p * (1.0f / 3.0f);
  247. float pz = p.z;
  248. // compute a normalization value to scale the vector by.
  249. // The normalization factor is divided by 2^20.
  250. // This is supposed to make internal calculations unlikely
  251. // to overflow while also making underflows unlikely.
  252. float scal = 1.0f;
  253. float cx = static_cast < float >(cbrt(fabs(p.x)));
  254. float cy = static_cast < float >(cbrt(fabs(p.y)));
  255. scal = fmax(fmax(fabsf(p.z), cx), cy * cy) * (1.0f / 1048576.0f);
  256. float rscal = 1.0f / scal;
  257. p = p * float3(rscal * rscal * rscal, rscal * rscal, rscal);
  258. float bb = p.z * p.z; // div scal^2
  259. float nq = bb - p.y; // div scal^2
  260. float r = 1.5f * (p.y * p.z - p.x) - p.z * bb; // div scal^3
  261. float nq3 = nq * nq * nq; // div scal^6
  262. float r2 = r * r; // div scal^6
  263. if (nq3 < r2)
  264. {
  265. // one root
  266. float root = sqrt(r2 - nq3); // div scal^3
  267. float s = static_cast < float >(cbrt(r + root)); // div scal
  268. float t = static_cast < float >(cbrt(r - root)); // div scal
  269. return float3((s + t) * scal - pz, nan(0), nan(0));
  270. }
  271. else
  272. {
  273. // three roots
  274. float phi_r = inversesqrt(nq3); // div scal ^ -3
  275. float phi_root = static_cast < float >(cbrt(phi_r * nq3)); // div scal
  276. float theta = acospi(r * phi_r);
  277. theta *= 1.0f / 3.0f;
  278. float ncprod = phi_root * cospi(theta);
  279. float dev = 1.73205080756887729353f * phi_root * sinpi(theta);
  280. return float3(2 * ncprod, -dev - ncprod, dev - ncprod) * scal - pz;
  281. }
  282. }
  283. /*
  284. * This function is not overflow-safe. Use with care.
  285. */
  286. float4 solve_monic(float4 p)
  287. {
  288. // step 1: depress the input polynomial
  289. float bias = p.w * 0.25f;
  290. float3 qv = float3((-3.0f / 256.0f) * p.w * p.w, (1.0f / 8.0f) * p.w, (-3.0 / 8.0f));
  291. float3 rv = float3((1.0f / 16.0f) * p.z * p.w - (1.0f / 4.0f) * p.y, (-1.0f / 2.0f) * p.z, 0.0f);
  292. float3 qx = float3(qv * p.w + rv) * p.w + p.xyz;
  293. // step 2: solve a cubic equation to get hold of a parameter p.
  294. float3 monicp = float3(-qx.y * qx.y, (qx.z * qx.z) - (4.0f * qx.x), 2.0f * qx.z);
  295. float4 v = float4(solve_monic(monicp), 1e-37f);
  296. // the cubic equation may have multiple solutions; at least one of them
  297. // is numerically at least nonnegative (but may have become negative as a result of
  298. // a roundoff error). We use fmax() to extract this value or a very small positive value.
  299. float2 v2 = fmax(v.xy, v.zw);
  300. float p2 = fmax(v2.x, v2.y); // p^2
  301. float pr = inversesqrt(p2); // 1/p
  302. float pm = p2 * pr; // p
  303. // step 3: use the solution for the cubic equation to set up two quadratic equations;
  304. // these two equations then result in the 4 possible roots.
  305. float f1 = qx.z + p2;
  306. float f2 = qx.y * pr;
  307. float s = 0.5f * (f1 + f2);
  308. float q = 0.5f * (f1 - f2);
  309. float4 res = float4(solve_monic(float2(q, pm)),
  310. solve_monic(float2(s, -pm)));
  311. // finally, order the results and apply the bias.
  312. if (res.x != res.x)
  313. return res.zwxy - bias;
  314. else
  315. return res - bias;
  316. }
  317. float2 transform(mat2 p, float2 q)
  318. {
  319. return float2(dot(p.v[0], q), dot(p.v[1], q));
  320. }
  321. float3 transform(mat3 p, float3 q)
  322. {
  323. return float3(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q));
  324. }
  325. float4 transform(mat4 p, float4 q)
  326. {
  327. return float4(dot(p.v[0], q), dot(p.v[1], q), dot(p.v[2], q), dot(p.v[3], q));
  328. }
  329. mat2 adjugate(mat2 p)
  330. {
  331. mat2 res;
  332. res.v[0] = float2(p.v[1].y, -p.v[0].y);
  333. res.v[1] = float2(-p.v[1].x, p.v[0].x);
  334. return res;
  335. }
  336. mat2 invert(mat2 p)
  337. {
  338. float rdet = 1.0f / determinant(p);
  339. mat2 res;
  340. res.v[0] = float2(p.v[1].y, -p.v[0].y) * rdet;
  341. res.v[1] = float2(-p.v[1].x, p.v[0].x) * rdet;
  342. return res;
  343. }
  344. mat3 adjugate(mat3 p)
  345. {
  346. mat3 res;
  347. float3 prd0 = cross(p.v[1], p.v[2]);
  348. float3 prd1 = cross(p.v[2], p.v[0]);
  349. float3 prd2 = cross(p.v[0], p.v[1]);
  350. res.v[0] = float3(prd0.x, prd1.x, prd2.x);
  351. res.v[1] = float3(prd0.y, prd1.y, prd2.y);
  352. res.v[2] = float3(prd0.z, prd1.z, prd2.z);
  353. return res;
  354. }
  355. mat3 invert(mat3 p)
  356. {
  357. float3 cross0 = cross(p.v[1], p.v[2]);
  358. float det = dot(cross0, p.v[0]);
  359. float rdet = 1.0f / det;
  360. mat3 res;
  361. float3 prd0 = cross0 * rdet;
  362. float3 prd1 = cross(p.v[2], p.v[0]) * rdet;
  363. float3 prd2 = cross(p.v[0], p.v[1]) * rdet;
  364. res.v[0] = float3(prd0.x, prd1.x, prd2.x);
  365. res.v[1] = float3(prd0.y, prd1.y, prd2.y);
  366. res.v[2] = float3(prd0.z, prd1.z, prd2.z);
  367. return res;
  368. }
  369. mat4 adjugate(mat4 p)
  370. {
  371. mat4 res;
  372. float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
  373. float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
  374. res.v[0] = float4(dot(bpc0, p.v[1].yzw), -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw));
  375. float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
  376. float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
  377. res.v[1] = float4(-dot(bpc1, p.v[1].xzw), dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw));
  378. float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
  379. float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
  380. res.v[2] = float4(dot(bpc2, p.v[1].xyw), -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw));
  381. float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
  382. float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
  383. res.v[3] = float4(-dot(bpc3, p.v[1].xyz), dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz));
  384. return res;
  385. }
  386. mat4 invert(mat4 p)
  387. {
  388. // cross products between the bottom two rows
  389. float3 bpc0 = cross(p.v[2].yzw, p.v[3].yzw);
  390. float3 bpc1 = cross(p.v[2].xzw, p.v[3].xzw);
  391. float3 bpc2 = cross(p.v[2].xyw, p.v[3].xyw);
  392. float3 bpc3 = cross(p.v[2].xyz, p.v[3].xyz);
  393. // dot-products for the top rows
  394. float4 row1 = float4(dot(bpc0, p.v[1].yzw),
  395. -dot(bpc1, p.v[1].xzw),
  396. dot(bpc2, p.v[1].xyw),
  397. -dot(bpc3, p.v[1].xyz));
  398. float det = dot(p.v[0], row1);
  399. float rdet = 1.0f / det;
  400. mat4 res;
  401. float3 tpc0 = cross(p.v[0].yzw, p.v[1].yzw);
  402. res.v[0] = float4(row1.x, -dot(bpc0, p.v[0].yzw), dot(tpc0, p.v[3].yzw), -dot(tpc0, p.v[2].yzw)) * rdet;
  403. float3 tpc1 = cross(p.v[0].xzw, p.v[1].xzw);
  404. res.v[1] = float4(row1.y, dot(bpc1, p.v[0].xzw), -dot(tpc1, p.v[3].xzw), dot(tpc1, p.v[2].xzw)) * rdet;
  405. float3 tpc2 = cross(p.v[0].xyw, p.v[1].xyw);
  406. res.v[2] = float4(row1.z, -dot(bpc2, p.v[0].xyw), dot(tpc2, p.v[3].xyw), -dot(tpc2, p.v[2].xyw)) * rdet;
  407. float3 tpc3 = cross(p.v[0].xyz, p.v[1].xyz);
  408. res.v[3] = float4(row1.w, dot(bpc3, p.v[0].xyz), -dot(tpc3, p.v[3].xyz), dot(tpc3, p.v[2].xyz)) * rdet;
  409. return res;
  410. }
  411. float2 eigenvalues(mat2 p)
  412. {
  413. return solve_monic(characteristic_poly(p));
  414. }
  415. float3 eigenvalues(mat3 p)
  416. {
  417. return solve_monic(characteristic_poly(p));
  418. }
  419. float4 eigenvalues(mat4 p)
  420. {
  421. return solve_monic(characteristic_poly(p));
  422. }
  423. float2 eigenvector(mat2 p, float eigvl)
  424. {
  425. // for a mat2, we first reverse-subtract the eigenvalue from the matrix diagonal,
  426. // then return whichever row had the larger sum-of-absolute-values.
  427. float4 v = float4(p.v[0], p.v[1]);
  428. v.xw = eigvl - v.xw;
  429. if (fabs(v.x) + fabs(v.y) > fabs(v.z) + fabs(v.w))
  430. return v.yx;
  431. else
  432. return v.wz;
  433. }
  434. float3 eigenvector(mat3 p, float eigvl)
  435. {
  436. // for a mat3, we obtain the eigenvector as follows:
  437. // step 1: subtract the eigenvalue from the matrix diagonal
  438. // step 2: take two cross products between rows in the matrix
  439. // step 3: return whichever of the cross products resulted in a longer vector.
  440. float3 r0 = p.v[0];
  441. float3 r1 = p.v[1];
  442. float3 r2 = p.v[2];
  443. r0.x = r0.x - eigvl;
  444. r1.y = r1.y - eigvl;
  445. r2.z = r2.z - eigvl;
  446. float3 v1 = cross(r0, r1);
  447. float3 v2 = cross(r1, r2);
  448. float len1 = dot(v1, v1);
  449. float len2 = dot(v2, v2);
  450. return len1 > len2 ? v1 : v2;
  451. }
  452. // generalized cross product: 3 vectors with 4 components each.
  453. // The result is a vector that is perpendicular to all the three specified vectors.
  454. // it works in the sense that it produces a perpendicular-to-everything vector,
  455. // but it has not been tested whether it points in the "right" direction.
  456. float4 gcross(float4 p, float4 q, float4 r)
  457. {
  458. return float4(dot(p.yzw, cross(q.yzw, r.yzw)), -dot(p.xzw, cross(q.xzw, r.xzw)), dot(p.xyw, cross(q.xyw, r.xyw)), -dot(p.xyz, cross(q.xyz, r.xyz)));
  459. }
  460. float4 eigenvector(mat4 p, float eigvl)
  461. {
  462. float4 r0 = p.v[0];
  463. float4 r1 = p.v[1];
  464. float4 r2 = p.v[2];
  465. float4 r3 = p.v[3];
  466. r0.x = r0.x - eigvl;
  467. r1.y = r1.y - eigvl;
  468. r2.z = r2.z - eigvl;
  469. r3.w = r3.w - eigvl;
  470. // generate four candidate vectors using the generalized cross product.
  471. // These will in general point in the same direction (or 180 degree opposite),
  472. // however they will have different lengths. Pick the longest one.
  473. float3 tpc0 = cross(r0.yzw, r1.yzw);
  474. float3 tpc1 = cross(r0.xzw, r1.xzw);
  475. float3 tpc2 = cross(r0.xyw, r1.xyw);
  476. float3 tpc3 = cross(r0.xyz, r1.xyz);
  477. float4 v1 = float4(dot(r2.yzw, tpc0),
  478. -dot(r2.xzw, tpc1),
  479. dot(r2.xyw, tpc2),
  480. -dot(r2.xyz, tpc3));
  481. float4 v2 = float4(dot(r3.yzw, tpc0),
  482. -dot(r3.xzw, tpc1),
  483. dot(r3.xyw, tpc2),
  484. -dot(r3.xyz, tpc3));
  485. float3 bpc0 = cross(r2.yzw, r3.yzw);
  486. float3 bpc1 = cross(r2.xzw, r3.xzw);
  487. float3 bpc2 = cross(r2.xyw, r3.xyw);
  488. float3 bpc3 = cross(r2.xyz, r3.xyz);
  489. float4 v3 = float4(dot(r0.yzw, bpc0),
  490. -dot(r0.xzw, bpc1),
  491. dot(r0.xyw, bpc2),
  492. -dot(r0.xyz, bpc3));
  493. float4 v4 = float4(dot(r1.yzw, bpc0),
  494. -dot(r1.xzw, bpc1),
  495. dot(r1.xyw, bpc2),
  496. -dot(r1.xyz, bpc3));
  497. float len1 = dot(v1, v1);
  498. float len2 = dot(v2, v2);
  499. float len3 = dot(v3, v3);
  500. float len4 = dot(v4, v4);
  501. if (fmax(len1, len2) > fmax(len3, len4))
  502. return len1 > len2 ? v1 : v2;
  503. else
  504. return len3 > len4 ? v3 : v4;
  505. }
  506. // matrix multiply
  507. mat2 operator *(mat2 a, mat2 b)
  508. {
  509. mat2 res;
  510. res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1];
  511. res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1];
  512. return res;
  513. }
  514. mat3 operator *(mat3 a, mat3 b)
  515. {
  516. mat3 res;
  517. res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2];
  518. res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2];
  519. res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2];
  520. return res;
  521. }
  522. mat4 operator *(mat4 a, mat4 b)
  523. {
  524. mat4 res;
  525. res.v[0] = a.v[0].x * b.v[0] + a.v[0].y * b.v[1] + a.v[0].z * b.v[2] + a.v[0].w * b.v[3];
  526. res.v[1] = a.v[1].x * b.v[0] + a.v[1].y * b.v[1] + a.v[1].z * b.v[2] + a.v[1].w * b.v[3];
  527. res.v[2] = a.v[2].x * b.v[0] + a.v[2].y * b.v[1] + a.v[2].z * b.v[2] + a.v[2].w * b.v[3];
  528. res.v[3] = a.v[3].x * b.v[0] + a.v[3].y * b.v[1] + a.v[3].z * b.v[2] + a.v[3].w * b.v[3];
  529. return res;
  530. }
  531. /*************************
  532. simple geometric functions
  533. *************************/
  534. // return parameter value for the point on the line closest to the specified point
  535. float param_nearest_on_line(float2 point, line2 line)
  536. {
  537. return dot(point - line.a, line.b) / dot(line.b, line.b);
  538. }
  539. float param_nearest_on_line(float3 point, line3 line)
  540. {
  541. return dot(point - line.a, line.b) / dot(line.b, line.b);
  542. }
  543. float param_nearest_on_line(float4 point, line4 line)
  544. {
  545. return dot(point - line.a, line.b) / dot(line.b, line.b);
  546. }
  547. // return distance between point and line
  548. float point_line_distance(float2 point, line2 line)
  549. {
  550. return distance(point, line.a + line.b * param_nearest_on_line(point, line));
  551. }
  552. float point_line_distance(float3 point, line3 line)
  553. {
  554. return distance(point, line.a + line.b * param_nearest_on_line(point, line));
  555. }
  556. float point_line_distance(float4 point, line4 line)
  557. {
  558. return distance(point, line.a + line.b * param_nearest_on_line(point, line));
  559. }
  560. float point_line_distance_sqr(float2 point, line2 line)
  561. {
  562. return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
  563. }
  564. float point_line_distance_sqr(float3 point, line3 line)
  565. {
  566. return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
  567. }
  568. float point_line_distance_sqr(float4 point, line4 line)
  569. {
  570. return distance_sqr(point, line.a + line.b * param_nearest_on_line(point, line));
  571. }
  572. // distance between plane/hyperplane in 3D and 4D
  573. float point_plane_3d_distance(float3 point, plane_3d plane)
  574. {
  575. return dot(point - plane.root_point, plane.normal);
  576. }
  577. float point_hyperplane_4d_distance(float4 point, hyperplane_4d plane)
  578. {
  579. return dot(point - plane.root_point, plane.normal);
  580. }
  581. // helper functions to produce a 3D plane from three points and a 4D hyperplane from four points.
  582. plane_3d generate_plane_from_points(float3 point0, float3 point1, float3 point2)
  583. {
  584. plane_3d res;
  585. res.root_point = point0;
  586. res.normal = normalize(cross(point1 - point0, point2 - point0));
  587. return res;
  588. }
  589. hyperplane_4d generate_hyperplane_from_points(float4 point0, float4 point1, float4 point2, float4 point3)
  590. {
  591. hyperplane_4d res;
  592. res.root_point = point0;
  593. res.normal = normalize(gcross(point1 - point0, point2 - point0, point3 - point0));
  594. return res;
  595. }