fitting.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200
  1. // This code is in the public domain -- Ignacio Castaño <[email protected]>
  2. #include "fitting.h"
  3. #include "vector.inl"
  4. #include "plane.inl"
  5. #include "matrix.inl"
  6. #include "nvcore/array.inl"
  7. #include "nvcore/utils.h" // max, swap
  8. using namespace nv;
  9. // @@ Move to EigenSolver.h
  10. // @@ We should be able to do something cheaper...
  11. static Vector3 estimatePrincipalComponent(const float * __restrict matrix)
  12. {
  13. const Vector3 row0(matrix[0], matrix[1], matrix[2]);
  14. const Vector3 row1(matrix[1], matrix[3], matrix[4]);
  15. const Vector3 row2(matrix[2], matrix[4], matrix[5]);
  16. float r0 = lengthSquared(row0);
  17. float r1 = lengthSquared(row1);
  18. float r2 = lengthSquared(row2);
  19. if (r0 > r1 && r0 > r2) return row0;
  20. if (r1 > r2) return row1;
  21. return row2;
  22. }
  23. static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix)
  24. {
  25. if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
  26. {
  27. return Vector3(0.0f);
  28. }
  29. Vector3 v = estimatePrincipalComponent(matrix);
  30. const int NUM = 8;
  31. for (int i = 0; i < NUM; i++)
  32. {
  33. float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
  34. float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
  35. float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
  36. float norm = max(max(x, y), z);
  37. v = Vector3(x, y, z) / norm;
  38. }
  39. return v;
  40. }
  41. Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points)
  42. {
  43. Vector3 centroid(0.0f);
  44. for (int i = 0; i < n; i++)
  45. {
  46. centroid += points[i];
  47. }
  48. centroid /= float(n);
  49. return centroid;
  50. }
  51. Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
  52. {
  53. Vector3 centroid(0.0f);
  54. float total = 0.0f;
  55. for (int i = 0; i < n; i++)
  56. {
  57. total += weights[i];
  58. centroid += weights[i]*points[i];
  59. }
  60. centroid /= total;
  61. return centroid;
  62. }
  63. Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points)
  64. {
  65. Vector4 centroid(0.0f);
  66. for (int i = 0; i < n; i++)
  67. {
  68. centroid += points[i];
  69. }
  70. centroid /= float(n);
  71. return centroid;
  72. }
  73. Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
  74. {
  75. Vector4 centroid(0.0f);
  76. float total = 0.0f;
  77. for (int i = 0; i < n; i++)
  78. {
  79. total += weights[i];
  80. centroid += weights[i]*points[i];
  81. }
  82. centroid /= total;
  83. return centroid;
  84. }
  85. Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance)
  86. {
  87. // compute the centroid
  88. Vector3 centroid = computeCentroid(n, points);
  89. // compute covariance matrix
  90. for (int i = 0; i < 6; i++)
  91. {
  92. covariance[i] = 0.0f;
  93. }
  94. for (int i = 0; i < n; i++)
  95. {
  96. Vector3 v = points[i] - centroid;
  97. covariance[0] += v.x * v.x;
  98. covariance[1] += v.x * v.y;
  99. covariance[2] += v.x * v.z;
  100. covariance[3] += v.y * v.y;
  101. covariance[4] += v.y * v.z;
  102. covariance[5] += v.z * v.z;
  103. }
  104. return centroid;
  105. }
  106. Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance)
  107. {
  108. // compute the centroid
  109. Vector3 centroid = computeCentroid(n, points, weights, metric);
  110. // compute covariance matrix
  111. for (int i = 0; i < 6; i++)
  112. {
  113. covariance[i] = 0.0f;
  114. }
  115. for (int i = 0; i < n; i++)
  116. {
  117. Vector3 a = (points[i] - centroid) * metric;
  118. Vector3 b = weights[i]*a;
  119. covariance[0] += a.x * b.x;
  120. covariance[1] += a.x * b.y;
  121. covariance[2] += a.x * b.z;
  122. covariance[3] += a.y * b.y;
  123. covariance[4] += a.y * b.z;
  124. covariance[5] += a.z * b.z;
  125. }
  126. return centroid;
  127. }
  128. Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance)
  129. {
  130. // compute the centroid
  131. Vector4 centroid = computeCentroid(n, points);
  132. // compute covariance matrix
  133. for (int i = 0; i < 10; i++)
  134. {
  135. covariance[i] = 0.0f;
  136. }
  137. for (int i = 0; i < n; i++)
  138. {
  139. Vector4 v = points[i] - centroid;
  140. covariance[0] += v.x * v.x;
  141. covariance[1] += v.x * v.y;
  142. covariance[2] += v.x * v.z;
  143. covariance[3] += v.x * v.w;
  144. covariance[4] += v.y * v.y;
  145. covariance[5] += v.y * v.z;
  146. covariance[6] += v.y * v.w;
  147. covariance[7] += v.z * v.z;
  148. covariance[8] += v.z * v.w;
  149. covariance[9] += v.w * v.w;
  150. }
  151. return centroid;
  152. }
  153. Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance)
  154. {
  155. // compute the centroid
  156. Vector4 centroid = computeCentroid(n, points, weights, metric);
  157. // compute covariance matrix
  158. for (int i = 0; i < 10; i++)
  159. {
  160. covariance[i] = 0.0f;
  161. }
  162. for (int i = 0; i < n; i++)
  163. {
  164. Vector4 a = (points[i] - centroid) * metric;
  165. Vector4 b = weights[i]*a;
  166. covariance[0] += a.x * b.x;
  167. covariance[1] += a.x * b.y;
  168. covariance[2] += a.x * b.z;
  169. covariance[3] += a.x * b.w;
  170. covariance[4] += a.y * b.y;
  171. covariance[5] += a.y * b.z;
  172. covariance[6] += a.y * b.w;
  173. covariance[7] += a.z * b.z;
  174. covariance[8] += a.z * b.w;
  175. covariance[9] += a.w * b.w;
  176. }
  177. return centroid;
  178. }
  179. Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points)
  180. {
  181. float matrix[6];
  182. computeCovariance(n, points, matrix);
  183. return firstEigenVector_PowerMethod(matrix);
  184. }
  185. Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
  186. {
  187. float matrix[6];
  188. computeCovariance(n, points, weights, metric, matrix);
  189. return firstEigenVector_PowerMethod(matrix);
  190. }
  191. static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix)
  192. {
  193. if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
  194. {
  195. return Vector3(0.0f);
  196. }
  197. float eigenValues[3];
  198. Vector3 eigenVectors[3];
  199. if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
  200. {
  201. return Vector3(0.0f);
  202. }
  203. return eigenVectors[0];
  204. }
  205. Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points)
  206. {
  207. float matrix[6];
  208. computeCovariance(n, points, matrix);
  209. return firstEigenVector_EigenSolver3(matrix);
  210. }
  211. Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
  212. {
  213. float matrix[6];
  214. computeCovariance(n, points, weights, metric, matrix);
  215. return firstEigenVector_EigenSolver3(matrix);
  216. }
  217. static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix)
  218. {
  219. if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0)
  220. {
  221. return Vector4(0.0f);
  222. }
  223. float eigenValues[4];
  224. Vector4 eigenVectors[4];
  225. if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors))
  226. {
  227. return Vector4(0.0f);
  228. }
  229. return eigenVectors[0];
  230. }
  231. Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points)
  232. {
  233. float matrix[10];
  234. computeCovariance(n, points, matrix);
  235. return firstEigenVector_EigenSolver4(matrix);
  236. }
  237. Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
  238. {
  239. float matrix[10];
  240. computeCovariance(n, points, weights, metric, matrix);
  241. return firstEigenVector_EigenSolver4(matrix);
  242. }
  243. void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
  244. Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
  245. {
  246. // Store the points in an n x n matrix
  247. Array<float> Q; Q.resize(n*n, 0.0f);
  248. for (int i = 0; i < n; ++i)
  249. {
  250. Q[i*n+0] = points[i].x;
  251. Q[i*n+1] = points[i].y;
  252. Q[i*n+2] = points[i].z;
  253. }
  254. // Alloc space for the SVD outputs
  255. Array<float> diag; diag.resize(n, 0.0f);
  256. Array<float> R; R.resize(n*n, 0.0f);
  257. ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
  258. // Get the principal component
  259. return Vector3(R[0], R[1], R[2]);
  260. }
  261. Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
  262. {
  263. // Store the points in an n x n matrix
  264. Array<float> Q; Q.resize(n*n, 0.0f);
  265. for (int i = 0; i < n; ++i)
  266. {
  267. Q[i*n+0] = points[i].x;
  268. Q[i*n+1] = points[i].y;
  269. Q[i*n+2] = points[i].z;
  270. Q[i*n+3] = points[i].w;
  271. }
  272. // Alloc space for the SVD outputs
  273. Array<float> diag; diag.resize(n, 0.0f);
  274. Array<float> R; R.resize(n*n, 0.0f);
  275. ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
  276. // Get the principal component
  277. return Vector4(R[0], R[1], R[2], R[3]);
  278. }
  279. Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
  280. {
  281. // compute the centroid and covariance
  282. float matrix[6];
  283. Vector3 centroid = computeCovariance(n, points, matrix);
  284. if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
  285. {
  286. // If no plane defined, then return a horizontal plane.
  287. return Plane(Vector3(0, 0, 1), centroid);
  288. }
  289. float eigenValues[3];
  290. Vector3 eigenVectors[3];
  291. if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
  292. // If no plane defined, then return a horizontal plane.
  293. return Plane(Vector3(0, 0, 1), centroid);
  294. }
  295. return Plane(eigenVectors[2], centroid);
  296. }
  297. bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/)
  298. {
  299. // compute the centroid and covariance
  300. float matrix[6];
  301. computeCovariance(n, points, matrix);
  302. float eigenValues[3];
  303. Vector3 eigenVectors[3];
  304. if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
  305. return false;
  306. }
  307. return eigenValues[2] < epsilon;
  308. }
  309. // Tridiagonal solver from Charles Bloom.
  310. // Householder transforms followed by QL decomposition.
  311. // Seems to be based on the code from Numerical Recipes in C.
  312. static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd);
  313. static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd);
  314. bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
  315. {
  316. nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
  317. float subd[3];
  318. float diag[3];
  319. float work[3][3];
  320. work[0][0] = matrix[0];
  321. work[0][1] = work[1][0] = matrix[1];
  322. work[0][2] = work[2][0] = matrix[2];
  323. work[1][1] = matrix[3];
  324. work[1][2] = work[2][1] = matrix[4];
  325. work[2][2] = matrix[5];
  326. EigenSolver3_Tridiagonal(work, diag, subd);
  327. if (!EigenSolver3_QLAlgorithm(work, diag, subd))
  328. {
  329. for (int i = 0; i < 3; i++) {
  330. eigenValues[i] = 0;
  331. eigenVectors[i] = Vector3(0);
  332. }
  333. return false;
  334. }
  335. for (int i = 0; i < 3; i++) {
  336. eigenValues[i] = (float)diag[i];
  337. }
  338. // eigenvectors are the columns; make them the rows :
  339. for (int i=0; i < 3; i++)
  340. {
  341. for (int j = 0; j < 3; j++)
  342. {
  343. eigenVectors[j].component[i] = (float) work[i][j];
  344. }
  345. }
  346. // shuffle to sort by singular value :
  347. if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1])
  348. {
  349. swap(eigenValues[0], eigenValues[2]);
  350. swap(eigenVectors[0], eigenVectors[2]);
  351. }
  352. if (eigenValues[1] > eigenValues[0])
  353. {
  354. swap(eigenValues[0], eigenValues[1]);
  355. swap(eigenVectors[0], eigenVectors[1]);
  356. }
  357. if (eigenValues[2] > eigenValues[1])
  358. {
  359. swap(eigenValues[1], eigenValues[2]);
  360. swap(eigenVectors[1], eigenVectors[2]);
  361. }
  362. nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]);
  363. nvDebugCheck(eigenValues[1] >= eigenValues[2]);
  364. return true;
  365. }
  366. static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd)
  367. {
  368. // Householder reduction T = Q^t M Q
  369. // Input:
  370. // mat, symmetric 3x3 matrix M
  371. // Output:
  372. // mat, orthogonal matrix Q
  373. // diag, diagonal entries of T
  374. // subd, subdiagonal entries of T (T is symmetric)
  375. const float epsilon = 1e-08f;
  376. float a = mat[0][0];
  377. float b = mat[0][1];
  378. float c = mat[0][2];
  379. float d = mat[1][1];
  380. float e = mat[1][2];
  381. float f = mat[2][2];
  382. diag[0] = a;
  383. subd[2] = 0.f;
  384. if (fabsf(c) >= epsilon)
  385. {
  386. const float ell = sqrtf(b*b+c*c);
  387. b /= ell;
  388. c /= ell;
  389. const float q = 2*b*e+c*(f-d);
  390. diag[1] = d+c*q;
  391. diag[2] = f-c*q;
  392. subd[0] = ell;
  393. subd[1] = e-b*q;
  394. mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
  395. mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c;
  396. mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b;
  397. }
  398. else
  399. {
  400. diag[1] = d;
  401. diag[2] = f;
  402. subd[0] = b;
  403. subd[1] = e;
  404. mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
  405. mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0;
  406. mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1;
  407. }
  408. }
  409. static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd)
  410. {
  411. // QL iteration with implicit shifting to reduce matrix from tridiagonal
  412. // to diagonal
  413. const int maxiter = 32;
  414. for (int ell = 0; ell < 3; ell++)
  415. {
  416. int iter;
  417. for (iter = 0; iter < maxiter; iter++)
  418. {
  419. int m;
  420. for (m = ell; m <= 1; m++)
  421. {
  422. float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
  423. if ( fabsf(subd[m]) + dd == dd )
  424. break;
  425. }
  426. if ( m == ell )
  427. break;
  428. float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
  429. float r = sqrtf(g*g+1);
  430. if ( g < 0 )
  431. g = diag[m]-diag[ell]+subd[ell]/(g-r);
  432. else
  433. g = diag[m]-diag[ell]+subd[ell]/(g+r);
  434. float s = 1, c = 1, p = 0;
  435. for (int i = m-1; i >= ell; i--)
  436. {
  437. float f = s*subd[i], b = c*subd[i];
  438. if ( fabsf(f) >= fabsf(g) )
  439. {
  440. c = g/f;
  441. r = sqrtf(c*c+1);
  442. subd[i+1] = f*r;
  443. c *= (s = 1/r);
  444. }
  445. else
  446. {
  447. s = f/g;
  448. r = sqrtf(s*s+1);
  449. subd[i+1] = g*r;
  450. s *= (c = 1/r);
  451. }
  452. g = diag[i+1]-p;
  453. r = (diag[i]-g)*s+2*b*c;
  454. p = s*r;
  455. diag[i+1] = g+p;
  456. g = c*r-b;
  457. for (int k = 0; k < 3; k++)
  458. {
  459. f = mat[k][i+1];
  460. mat[k][i+1] = s*mat[k][i]+c*f;
  461. mat[k][i] = c*mat[k][i]-s*f;
  462. }
  463. }
  464. diag[ell] -= p;
  465. subd[ell] = g;
  466. subd[m] = 0;
  467. }
  468. if ( iter == maxiter )
  469. // should not get here under normal circumstances
  470. return false;
  471. }
  472. return true;
  473. }
  474. // Tridiagonal solver for 4x4 symmetric matrices.
  475. static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd);
  476. static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd);
  477. bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4])
  478. {
  479. nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
  480. float subd[4];
  481. float diag[4];
  482. float work[4][4];
  483. work[0][0] = matrix[0];
  484. work[0][1] = work[1][0] = matrix[1];
  485. work[0][2] = work[2][0] = matrix[2];
  486. work[0][3] = work[3][0] = matrix[3];
  487. work[1][1] = matrix[4];
  488. work[1][2] = work[2][1] = matrix[5];
  489. work[1][3] = work[3][1] = matrix[6];
  490. work[2][2] = matrix[7];
  491. work[2][3] = work[3][2] = matrix[8];
  492. work[3][3] = matrix[9];
  493. EigenSolver4_Tridiagonal(work, diag, subd);
  494. if (!EigenSolver4_QLAlgorithm(work, diag, subd))
  495. {
  496. for (int i = 0; i < 4; i++) {
  497. eigenValues[i] = 0;
  498. eigenVectors[i] = Vector4(0);
  499. }
  500. return false;
  501. }
  502. for (int i = 0; i < 4; i++) {
  503. eigenValues[i] = (float)diag[i];
  504. }
  505. // eigenvectors are the columns; make them the rows
  506. for (int i = 0; i < 4; i++)
  507. {
  508. for (int j = 0; j < 4; j++)
  509. {
  510. eigenVectors[j].component[i] = (float) work[i][j];
  511. }
  512. }
  513. // sort by singular value
  514. for (int i = 0; i < 3; ++i)
  515. {
  516. for (int j = i+1; j < 4; ++j)
  517. {
  518. if (eigenValues[j] > eigenValues[i])
  519. {
  520. swap(eigenValues[i], eigenValues[j]);
  521. swap(eigenVectors[i], eigenVectors[j]);
  522. }
  523. }
  524. }
  525. nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]);
  526. nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]);
  527. nvDebugCheck(eigenValues[2] >= eigenValues[2]);
  528. return true;
  529. }
  530. inline float signNonzero(float x)
  531. {
  532. return (x >= 0.0f) ? 1.0f : -1.0f;
  533. }
  534. static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd)
  535. {
  536. // Householder reduction T = Q^t M Q
  537. // Input:
  538. // mat, symmetric 3x3 matrix M
  539. // Output:
  540. // mat, orthogonal matrix Q
  541. // diag, diagonal entries of T
  542. // subd, subdiagonal entries of T (T is symmetric)
  543. static const int n = 4;
  544. // Set epsilon relative to size of elements in matrix
  545. static const float relEpsilon = 1e-6f;
  546. float maxElement = FLT_MAX;
  547. for (int i = 0; i < n; ++i)
  548. for (int j = 0; j < n; ++j)
  549. maxElement = max(maxElement, fabsf(mat[i][j]));
  550. float epsilon = relEpsilon * maxElement;
  551. // Iterative algorithm, works for any size of matrix but might be slower than
  552. // a closed-form solution for symmetric 4x4 matrices. Based on this article:
  553. // http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization
  554. Matrix A, Q(identity);
  555. memcpy(&A, mat, sizeof(float)*n*n);
  556. // We proceed from left to right, making the off-tridiagonal entries zero in
  557. // one column of the matrix at a time.
  558. for (int k = 0; k < n - 2; ++k)
  559. {
  560. float sum = 0.0f;
  561. for (int j = k+1; j < n; ++j)
  562. sum += A(j,k)*A(j,k);
  563. float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum);
  564. float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha));
  565. // If r is zero, skip this column - already in tridiagonal form
  566. if (fabsf(r) < epsilon)
  567. continue;
  568. float v[n] = {};
  569. v[k+1] = 0.5f * (A(k+1,k) - alpha) / r;
  570. for (int j = k+2; j < n; ++j)
  571. v[j] = 0.5f * A(j,k) / r;
  572. Matrix P(identity);
  573. for (int i = 0; i < n; ++i)
  574. for (int j = 0; j < n; ++j)
  575. P(i,j) -= 2.0f * v[i] * v[j];
  576. A = mul(mul(P, A), P);
  577. Q = mul(Q, P);
  578. }
  579. nvDebugCheck(fabsf(A(2,0)) < epsilon);
  580. nvDebugCheck(fabsf(A(0,2)) < epsilon);
  581. nvDebugCheck(fabsf(A(3,0)) < epsilon);
  582. nvDebugCheck(fabsf(A(0,3)) < epsilon);
  583. nvDebugCheck(fabsf(A(3,1)) < epsilon);
  584. nvDebugCheck(fabsf(A(1,3)) < epsilon);
  585. for (int i = 0; i < n; ++i)
  586. diag[i] = A(i,i);
  587. for (int i = 0; i < n - 1; ++i)
  588. subd[i] = A(i+1,i);
  589. subd[n-1] = 0.0f;
  590. memcpy(mat, &Q, sizeof(float)*n*n);
  591. }
  592. static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd)
  593. {
  594. // QL iteration with implicit shifting to reduce matrix from tridiagonal
  595. // to diagonal
  596. const int maxiter = 32;
  597. for (int ell = 0; ell < 4; ell++)
  598. {
  599. int iter;
  600. for (iter = 0; iter < maxiter; iter++)
  601. {
  602. int m;
  603. for (m = ell; m < 3; m++)
  604. {
  605. float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
  606. if ( fabsf(subd[m]) + dd == dd )
  607. break;
  608. }
  609. if ( m == ell )
  610. break;
  611. float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
  612. float r = sqrtf(g*g+1);
  613. if ( g < 0 )
  614. g = diag[m]-diag[ell]+subd[ell]/(g-r);
  615. else
  616. g = diag[m]-diag[ell]+subd[ell]/(g+r);
  617. float s = 1, c = 1, p = 0;
  618. for (int i = m-1; i >= ell; i--)
  619. {
  620. float f = s*subd[i], b = c*subd[i];
  621. if ( fabsf(f) >= fabsf(g) )
  622. {
  623. c = g/f;
  624. r = sqrtf(c*c+1);
  625. subd[i+1] = f*r;
  626. c *= (s = 1/r);
  627. }
  628. else
  629. {
  630. s = f/g;
  631. r = sqrtf(s*s+1);
  632. subd[i+1] = g*r;
  633. s *= (c = 1/r);
  634. }
  635. g = diag[i+1]-p;
  636. r = (diag[i]-g)*s+2*b*c;
  637. p = s*r;
  638. diag[i+1] = g+p;
  639. g = c*r-b;
  640. for (int k = 0; k < 4; k++)
  641. {
  642. f = mat[k][i+1];
  643. mat[k][i+1] = s*mat[k][i]+c*f;
  644. mat[k][i] = c*mat[k][i]-s*f;
  645. }
  646. }
  647. diag[ell] -= p;
  648. subd[ell] = g;
  649. subd[m] = 0;
  650. }
  651. if ( iter == maxiter )
  652. // should not get here under normal circumstances
  653. return false;
  654. }
  655. return true;
  656. }
  657. int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster)
  658. {
  659. // Compute principal component.
  660. float matrix[6];
  661. Vector3 centroid = computeCovariance(n, points, weights, metric, matrix);
  662. Vector3 principal = firstEigenVector_PowerMethod(matrix);
  663. // Pick initial solution.
  664. int mini, maxi;
  665. mini = maxi = 0;
  666. float mindps, maxdps;
  667. mindps = maxdps = dot(points[0] - centroid, principal);
  668. for (int i = 1; i < n; ++i)
  669. {
  670. float dps = dot(points[i] - centroid, principal);
  671. if (dps < mindps) {
  672. mindps = dps;
  673. mini = i;
  674. }
  675. else {
  676. maxdps = dps;
  677. maxi = i;
  678. }
  679. }
  680. cluster[0] = centroid + mindps * principal;
  681. cluster[1] = centroid + maxdps * principal;
  682. cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f;
  683. cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f;
  684. // Now we have to iteratively refine the clusters.
  685. while (true)
  686. {
  687. Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) };
  688. float total[4] = {0, 0, 0, 0};
  689. for (int i = 0; i < n; ++i)
  690. {
  691. // Find nearest cluster.
  692. int nearest = 0;
  693. float mindist = FLT_MAX;
  694. for (int j = 0; j < 4; j++)
  695. {
  696. float dist = lengthSquared((cluster[j] - points[i]) * metric);
  697. if (dist < mindist)
  698. {
  699. mindist = dist;
  700. nearest = j;
  701. }
  702. }
  703. newCluster[nearest] += weights[i] * points[i];
  704. total[nearest] += weights[i];
  705. }
  706. for (int j = 0; j < 4; j++)
  707. {
  708. if (total[j] != 0)
  709. newCluster[j] /= total[j];
  710. }
  711. if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) &&
  712. equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3]))
  713. {
  714. return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0);
  715. }
  716. cluster[0] = newCluster[0];
  717. cluster[1] = newCluster[1];
  718. cluster[2] = newCluster[2];
  719. cluster[3] = newCluster[3];
  720. // Sort clusters by weight.
  721. for (int i = 0; i < 4; i++)
  722. {
  723. for (int j = i; j > 0 && total[j] > total[j - 1]; j--)
  724. {
  725. swap( total[j], total[j - 1] );
  726. swap( cluster[j], cluster[j - 1] );
  727. }
  728. }
  729. }
  730. }
  731. // Adaptation of James Arvo's SVD code, as found in ZOH.
  732. inline float Sqr(float x) { return x*x; }
  733. inline float svd_pythag( float a, float b )
  734. {
  735. float at = fabsf(a);
  736. float bt = fabsf(b);
  737. if( at > bt )
  738. return at * sqrtf( 1.0f + Sqr( bt / at ) );
  739. else if( bt > 0.0f )
  740. return bt * sqrtf( 1.0f + Sqr( at / bt ) );
  741. else return 0.0f;
  742. }
  743. inline float SameSign( float a, float b )
  744. {
  745. float t;
  746. if( b >= 0.0f ) t = fabsf( a );
  747. else t = -fabsf( a );
  748. return t;
  749. }
  750. void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
  751. {
  752. static const int MaxIterations = 30;
  753. int i, j, k, l, p, q, iter;
  754. float c, f, h, s, x, y, z;
  755. float norm = 0.0f;
  756. float g = 0.0f;
  757. float scale = 0.0f;
  758. Array<float> temp; temp.resize(cols, 0.0f);
  759. for( i = 0; i < cols; i++ )
  760. {
  761. temp[i] = scale * g;
  762. scale = 0.0f;
  763. g = 0.0f;
  764. s = 0.0f;
  765. l = i + 1;
  766. if( i < rows )
  767. {
  768. for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] );
  769. if( scale != 0.0f )
  770. {
  771. for( k = i; k < rows; k++ )
  772. {
  773. Q[k*cols+i] /= scale;
  774. s += Sqr( Q[k*cols+i] );
  775. }
  776. f = Q[i*cols+i];
  777. g = -SameSign( sqrtf(s), f );
  778. h = f * g - s;
  779. Q[i*cols+i] = f - g;
  780. if( i != cols - 1 )
  781. {
  782. for( j = l; j < cols; j++ )
  783. {
  784. s = 0.0f;
  785. for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
  786. f = s / h;
  787. for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
  788. }
  789. }
  790. for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale;
  791. }
  792. }
  793. diag[i] = scale * g;
  794. g = 0.0f;
  795. s = 0.0f;
  796. scale = 0.0f;
  797. if( i < rows && i != cols - 1 )
  798. {
  799. for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] );
  800. if( scale != 0.0f )
  801. {
  802. for( k = l; k < cols; k++ )
  803. {
  804. Q[i*cols+k] /= scale;
  805. s += Sqr( Q[i*cols+k] );
  806. }
  807. f = Q[i*cols+l];
  808. g = -SameSign( sqrtf(s), f );
  809. h = f * g - s;
  810. Q[i*cols+l] = f - g;
  811. for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h;
  812. if( i != rows - 1 )
  813. {
  814. for( j = l; j < rows; j++ )
  815. {
  816. s = 0.0f;
  817. for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k];
  818. for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k];
  819. }
  820. }
  821. for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale;
  822. }
  823. }
  824. norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) );
  825. }
  826. for( i = cols - 1; i >= 0; i-- )
  827. {
  828. if( i < cols - 1 )
  829. {
  830. if( g != 0.0f )
  831. {
  832. for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g;
  833. for( j = l; j < cols; j++ )
  834. {
  835. s = 0.0f;
  836. for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k];
  837. for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k];
  838. }
  839. }
  840. for( j = l; j < cols; j++ )
  841. {
  842. R[i*cols+j] = 0.0f;
  843. R[j*cols+i] = 0.0f;
  844. }
  845. }
  846. R[i*cols+i] = 1.0f;
  847. g = temp[i];
  848. l = i;
  849. }
  850. for( i = cols - 1; i >= 0; i-- )
  851. {
  852. l = i + 1;
  853. g = diag[i];
  854. if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f;
  855. if( g != 0.0f )
  856. {
  857. g = 1.0f / g;
  858. if( i != cols - 1 )
  859. {
  860. for( j = l; j < cols; j++ )
  861. {
  862. s = 0.0f;
  863. for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
  864. f = ( s / Q[i*cols+i] ) * g;
  865. for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
  866. }
  867. }
  868. for( j = i; j < rows; j++ ) Q[j*cols+i] *= g;
  869. }
  870. else
  871. {
  872. for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f;
  873. }
  874. Q[i*cols+i] += 1.0f;
  875. }
  876. for( k = cols - 1; k >= 0; k-- )
  877. {
  878. for( iter = 1; iter <= MaxIterations; iter++ )
  879. {
  880. int jump;
  881. for( l = k; l >= 0; l-- )
  882. {
  883. q = l - 1;
  884. if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; }
  885. if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; }
  886. }
  887. if( !jump )
  888. {
  889. c = 0.0f;
  890. s = 1.0f;
  891. for( i = l; i <= k; i++ )
  892. {
  893. f = s * temp[i];
  894. temp[i] *= c;
  895. if( fabsf( f ) + norm == norm ) break;
  896. g = diag[i];
  897. h = svd_pythag( f, g );
  898. diag[i] = h;
  899. h = 1.0f / h;
  900. c = g * h;
  901. s = -f * h;
  902. for( j = 0; j < rows; j++ )
  903. {
  904. y = Q[j*cols+q];
  905. z = Q[j*cols+i];
  906. Q[j*cols+q] = y * c + z * s;
  907. Q[j*cols+i] = z * c - y * s;
  908. }
  909. }
  910. }
  911. z = diag[k];
  912. if( l == k )
  913. {
  914. if( z < 0.0f )
  915. {
  916. diag[k] = -z;
  917. for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f;
  918. }
  919. break;
  920. }
  921. if( iter >= MaxIterations ) return;
  922. x = diag[l];
  923. q = k - 1;
  924. y = diag[q];
  925. g = temp[q];
  926. h = temp[k];
  927. f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y );
  928. g = svd_pythag( f, 1.0f );
  929. f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
  930. c = 1.0f;
  931. s = 1.0f;
  932. for( j = l; j <= q; j++ )
  933. {
  934. i = j + 1;
  935. g = temp[i];
  936. y = diag[i];
  937. h = s * g;
  938. g = c * g;
  939. z = svd_pythag( f, h );
  940. temp[j] = z;
  941. c = f / z;
  942. s = h / z;
  943. f = x * c + g * s;
  944. g = g * c - x * s;
  945. h = y * s;
  946. y = y * c;
  947. for( p = 0; p < cols; p++ )
  948. {
  949. x = R[j*cols+p];
  950. z = R[i*cols+p];
  951. R[j*cols+p] = x * c + z * s;
  952. R[i*cols+p] = z * c - x * s;
  953. }
  954. z = svd_pythag( f, h );
  955. diag[j] = z;
  956. if( z != 0.0f )
  957. {
  958. z = 1.0f / z;
  959. c = f * z;
  960. s = h * z;
  961. }
  962. f = c * g + s * y;
  963. x = c * y - s * g;
  964. for( p = 0; p < rows; p++ )
  965. {
  966. y = Q[p*cols+j];
  967. z = Q[p*cols+i];
  968. Q[p*cols+j] = y * c + z * s;
  969. Q[p*cols+i] = z * c - y * s;
  970. }
  971. }
  972. temp[l] = 0.0f;
  973. temp[k] = f;
  974. diag[k] = x;
  975. }
  976. }
  977. // Sort the singular values into descending order.
  978. for( i = 0; i < cols - 1; i++ )
  979. {
  980. float biggest = diag[i]; // Biggest singular value so far.
  981. int bindex = i; // The row/col it occurred in.
  982. for( j = i + 1; j < cols; j++ )
  983. {
  984. if( diag[j] > biggest )
  985. {
  986. biggest = diag[j];
  987. bindex = j;
  988. }
  989. }
  990. if( bindex != i ) // Need to swap rows and columns.
  991. {
  992. // Swap columns in Q.
  993. for (int j = 0; j < rows; ++j)
  994. swap(Q[j*cols+i], Q[j*cols+bindex]);
  995. // Swap rows in R.
  996. for (int j = 0; j < rows; ++j)
  997. swap(R[i*cols+j], R[bindex*cols+j]);
  998. // Swap elements in diag.
  999. swap(diag[i], diag[bindex]);
  1000. }
  1001. }
  1002. }