lcp.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317
  1. /*************************************************************************
  2. * *
  3. * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
  4. * All rights reserved. Email: [email protected] Web: www.q12.org *
  5. * *
  6. * This library is free software; you can redistribute it and/or *
  7. * modify it under the terms of EITHER: *
  8. * (1) The GNU Lesser General Public License as published by the Free *
  9. * Software Foundation; either version 2.1 of the License, or (at *
  10. * your option) any later version. The text of the GNU Lesser *
  11. * General Public License is included with this library in the *
  12. * file LICENSE.TXT. *
  13. * (2) The BSD-style license that is included with this library in *
  14. * the file LICENSE-BSD.TXT. *
  15. * *
  16. * This library is distributed in the hope that it will be useful, *
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
  19. * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
  20. * *
  21. *************************************************************************/
  22. /*
  23. THE ALGORITHM
  24. -------------
  25. solve A*x = b+w, with x and w subject to certain LCP conditions.
  26. each x(i),w(i) must lie on one of the three line segments in the following
  27. diagram. each line segment corresponds to one index set :
  28. w(i)
  29. /|\ | :
  30. | | :
  31. | |i in N :
  32. w>0 | |state[i]=0 :
  33. | | :
  34. | | : i in C
  35. w=0 + +-----------------------+
  36. | : |
  37. | : |
  38. w<0 | : |i in N
  39. | : |state[i]=1
  40. | : |
  41. | : |
  42. +-------|-----------|-----------|----------> x(i)
  43. lo 0 hi
  44. the Dantzig algorithm proceeds as follows:
  45. for i=1:n
  46. * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
  47. negative towards the line. as this is done, the other (x(j),w(j))
  48. for j<i are constrained to be on the line. if any (x,w) reaches the
  49. end of a line segment then it is switched between index sets.
  50. * i is added to the appropriate index set depending on what line segment
  51. it hits.
  52. we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
  53. simpler, because the starting point for x(i),w(i) is always on the dotted
  54. line x=0 and x will only ever increase in one direction, so it can only hit
  55. two out of the three line segments.
  56. NOTES
  57. -----
  58. this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
  59. the implementation is split into an LCP problem object (dLCP) and an LCP
  60. driver function. most optimization occurs in the dLCP object.
  61. a naive implementation of the algorithm requires either a lot of data motion
  62. or a lot of permutation-array lookup, because we are constantly re-ordering
  63. rows and columns. to avoid this and make a more optimized algorithm, a
  64. non-trivial data structure is used to represent the matrix A (this is
  65. implemented in the fast version of the dLCP object).
  66. during execution of this algorithm, some indexes in A are clamped (set C),
  67. some are non-clamped (set N), and some are "don't care" (where x=0).
  68. A,x,b,w (and other problem vectors) are permuted such that the clamped
  69. indexes are first, the unclamped indexes are next, and the don't-care
  70. indexes are last. this permutation is recorded in the array `p'.
  71. initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
  72. the corresponding elements of p are swapped.
  73. because the C and N elements are grouped together in the rows of A, we can do
  74. lots of work with a fast dot product function. if A,x,etc were not permuted
  75. and we only had a permutation array, then those dot products would be much
  76. slower as we would have a permutation array lookup in some inner loops.
  77. A is accessed through an array of row pointers, so that element (i,j) of the
  78. permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
  79. we still have to actually move the data.
  80. during execution of this algorithm we maintain an L*D*L' factorization of
  81. the clamped submatrix of A (call it `AC') which is the top left nC*nC
  82. submatrix of A. there are two ways we could arrange the rows/columns in AC.
  83. (1) AC is always permuted such that L*D*L' = AC. this causes a problem
  84. when a row/column is removed from C, because then all the rows/columns of A
  85. between the deleted index and the end of C need to be rotated downward.
  86. this results in a lot of data motion and slows things down.
  87. (2) L*D*L' is actually a factorization of a *permutation* of AC (which is
  88. itself a permutation of the underlying A). this is what we do - the
  89. permutation is recorded in the vector C. call this permutation A[C,C].
  90. when a row/column is removed from C, all we have to do is swap two
  91. rows/columns and manipulate C.
  92. */
  93. #include <ode/common.h>
  94. #include <ode/misc.h>
  95. #include <ode/timer.h> // for testing
  96. #include "config.h"
  97. #include "lcp.h"
  98. #include "util.h"
  99. #include "matrix.h"
  100. #include "mat.h" // for testing
  101. #include "threaded_solver_ldlt.h"
  102. #include "fastdot_impl.h"
  103. #include "fastldltfactor_impl.h"
  104. #include "fastldltsolve_impl.h"
  105. //***************************************************************************
  106. // code generation parameters
  107. // LCP debugging (mostly for fast dLCP) - this slows things down a lot
  108. //#define DEBUG_LCP
  109. #define dLCP_FAST // use fast dLCP object
  110. #define NUB_OPTIMIZATIONS // use NUB optimizations
  111. // option 1 : matrix row pointers (less data copying)
  112. #define ROWPTRS
  113. #define ATYPE dReal **
  114. #define AROW(i) (m_A[i])
  115. // option 2 : no matrix row pointers (slightly faster inner loops)
  116. //#define NOROWPTRS
  117. //#define ATYPE dReal *
  118. //#define AROW(i) (m_A+(i)*m_nskip)
  119. //***************************************************************************
  120. #define dMIN(A,B) ((A)>(B) ? (B) : (A))
  121. #define dMAX(A,B) ((B)>(A) ? (B) : (A))
  122. #define LMATRIX_ALIGNMENT dMAX(64, EFFICIENT_ALIGNMENT)
  123. //***************************************************************************
  124. // transfer b-values to x-values
  125. template<bool zero_b>
  126. inline
  127. void transfer_b_to_x(dReal pairsbx[PBX__MAX], unsigned n)
  128. {
  129. dReal *const endbx = pairsbx + (sizeint)n * PBX__MAX;
  130. for (dReal *currbx = pairsbx; currbx != endbx; currbx += PBX__MAX) {
  131. currbx[PBX_X] = currbx[PBX_B];
  132. if (zero_b) {
  133. currbx[PBX_B] = REAL(0.0);
  134. }
  135. }
  136. }
  137. // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
  138. // A is nskip. this only references and swaps the lower triangle.
  139. // if `do_fast_row_swaps' is nonzero and row pointers are being used, then
  140. // rows will be swapped by exchanging row pointers. otherwise the data will
  141. // be copied.
  142. static
  143. void swapRowsAndCols (ATYPE A, unsigned n, unsigned i1, unsigned i2, unsigned nskip,
  144. int do_fast_row_swaps)
  145. {
  146. dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
  147. nskip >= n && i1 < i2);
  148. # ifdef ROWPTRS
  149. dReal *A_i1 = A[i1];
  150. dReal *A_i2 = A[i2];
  151. for (unsigned i=i1+1; i<i2; ++i) {
  152. dReal *A_i_i1 = A[i] + i1;
  153. A_i1[i] = *A_i_i1;
  154. *A_i_i1 = A_i2[i];
  155. }
  156. A_i1[i2] = A_i1[i1];
  157. A_i1[i1] = A_i2[i1];
  158. A_i2[i1] = A_i2[i2];
  159. // swap rows, by swapping row pointers
  160. if (do_fast_row_swaps) {
  161. A[i1] = A_i2;
  162. A[i2] = A_i1;
  163. }
  164. else {
  165. // Only swap till i2 column to match A plain storage variant.
  166. for (unsigned k = 0; k <= i2; ++k) {
  167. dxSwap(A_i1[k], A_i2[k]);
  168. }
  169. }
  170. // swap columns the hard way
  171. for (unsigned j = i2 + 1; j < n; ++j) {
  172. dReal *A_j = A[j];
  173. dxSwap(A_j[i1], A_j[i2]);
  174. }
  175. # else
  176. dReal *A_i1 = A + (sizeint)nskip * i1;
  177. dReal *A_i2 = A + (sizeint)nskip * i2;
  178. for (unsigned k = 0; k < i1; ++k) {
  179. dxSwap(A_i1[k], A_i2[k]);
  180. }
  181. dReal *A_i = A_i1 + nskip;
  182. for (unsigned i= i1 + 1; i < i2; A_i += nskip, ++i) {
  183. dxSwap(A_i2[i], A_i[i1]);
  184. }
  185. dxSwap(A_i1[i1], A_i2[i2]);
  186. dReal *A_j = A_i2 + nskip;
  187. for (unsigned j = i2 + 1; j < n; A_j += nskip, ++j) {
  188. dxSwap(A_j[i1], A_j[i2]);
  189. }
  190. # endif
  191. }
  192. // swap two indexes in the n*n LCP problem. i1 must be <= i2.
  193. static
  194. void swapProblem (ATYPE A, dReal pairsbx[PBX__MAX], dReal *w, dReal pairslh[PLH__MAX],
  195. unsigned *p, bool *state, int *findex,
  196. unsigned n, unsigned i1, unsigned i2, unsigned nskip,
  197. int do_fast_row_swaps)
  198. {
  199. dIASSERT (n>0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
  200. if (i1 != i2) {
  201. swapRowsAndCols (A, n, i1, i2, nskip, do_fast_row_swaps);
  202. dxSwap((pairsbx + (sizeint)i1 * PBX__MAX)[PBX_B], (pairsbx + (sizeint)i2 * PBX__MAX)[PBX_B]);
  203. dxSwap((pairsbx + (sizeint)i1 * PBX__MAX)[PBX_X], (pairsbx + (sizeint)i2 * PBX__MAX)[PBX_X]);
  204. dSASSERT(PBX__MAX == 2);
  205. dxSwap(w[i1], w[i2]);
  206. dxSwap((pairslh + (sizeint)i1 * PLH__MAX)[PLH_LO], (pairslh + (sizeint)i2 * PLH__MAX)[PLH_LO]);
  207. dxSwap((pairslh + (sizeint)i1 * PLH__MAX)[PLH_HI], (pairslh + (sizeint)i2 * PLH__MAX)[PLH_HI]);
  208. dSASSERT(PLH__MAX == 2);
  209. dxSwap(p[i1], p[i2]);
  210. dxSwap(state[i1], state[i2]);
  211. if (findex != NULL) {
  212. dxSwap(findex[i1], findex[i2]);
  213. }
  214. }
  215. }
  216. // for debugging - check that L,d is the factorization of A[C,C].
  217. // A[C,C] has size nC*nC and leading dimension nskip.
  218. // L has size nC*nC and leading dimension nskip.
  219. // d has size nC.
  220. #ifdef DEBUG_LCP
  221. static
  222. void checkFactorization (ATYPE A, dReal *_L, dReal *_d,
  223. unsigned nC, unsigned *C, unsigned nskip)
  224. {
  225. unsigned i, j;
  226. if (nC == 0) return;
  227. // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
  228. dMatrix A1 (nC, nC);
  229. for (i=0; i < nC; i++) {
  230. for (j = 0; j <= i; j++) A1(i, j) = A1(j, i) = AROW(i)[j];
  231. }
  232. dMatrix A2 = A1.select (nC, C, nC, C);
  233. // printf ("A1=\n"); A1.print(); printf ("\n");
  234. // printf ("A2=\n"); A2.print(); printf ("\n");
  235. // compute A3 = L*D*L'
  236. dMatrix L (nC, nC, _L, nskip, 1);
  237. dMatrix D (nC, nC);
  238. for (i = 0; i < nC; i++) D(i, i) = 1.0 / _d[i];
  239. L.clearUpperTriangle();
  240. for (i = 0; i < nC; i++) L(i, i) = 1;
  241. dMatrix A3 = L * D * L.transpose();
  242. // printf ("L=\n"); L.print(); printf ("\n");
  243. // printf ("D=\n"); D.print(); printf ("\n");
  244. // printf ("A3=\n"); A2.print(); printf ("\n");
  245. // compare A2 and A3
  246. dReal diff = A2.maxDifference (A3);
  247. if (diff > 1e-8)
  248. dDebug (0, "L*D*L' check, maximum difference = %.6e\n", diff);
  249. }
  250. #endif
  251. // for debugging
  252. #ifdef DEBUG_LCP
  253. static
  254. void checkPermutations (unsigned i, unsigned n, unsigned nC, unsigned nN, unsigned *p, unsigned *C)
  255. {
  256. unsigned j,k;
  257. dIASSERT (/*nC >= 0 && nN >= 0 && */(nC + nN) == i && i < n);
  258. for (k=0; k<i; k++) dIASSERT (p[k] >= 0 && p[k] < i);
  259. for (k=i; k<n; k++) dIASSERT (p[k] == k);
  260. for (j=0; j<nC; j++) {
  261. int C_is_bad = 1;
  262. for (k=0; k<nC; k++) if (C[k]==j) C_is_bad = 0;
  263. dIASSERT (C_is_bad==0);
  264. }
  265. }
  266. #endif
  267. //***************************************************************************
  268. // dLCP manipulator object. this represents an n*n LCP problem.
  269. //
  270. // two index sets C and N are kept. each set holds a subset of
  271. // the variable indexes 0..n-1. an index can only be in one set.
  272. // initially both sets are empty.
  273. //
  274. // the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
  275. //***************************************************************************
  276. // fast implementation of dLCP. see the above definition of dLCP for
  277. // interface comments.
  278. //
  279. // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
  280. // permuted as the other vectors/matrices are permuted.
  281. //
  282. // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
  283. // contiguous indexes. the don't-care indexes follow N.
  284. //
  285. // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
  286. // added or removed from the set C the factorization is updated.
  287. // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
  288. // the leading dimension of the matrix L is always `nskip'.
  289. //
  290. // at the start there may be other indexes that are unbounded but are not
  291. // included in `nub'. dLCP will permute the matrix so that absolutely all
  292. // unbounded vectors are at the start. thus there may be some initial
  293. // permutation.
  294. //
  295. // the algorithms here assume certain patterns, particularly with respect to
  296. // index transfer.
  297. #ifdef dLCP_FAST
  298. struct dLCP {
  299. const unsigned m_n;
  300. const unsigned m_nskip;
  301. unsigned m_nub;
  302. unsigned m_nC, m_nN; // size of each index set
  303. ATYPE const m_A; // A rows
  304. dReal *const m_pairsbx, *const m_w, *const m_pairslh; // permuted LCP problem data
  305. dReal *const m_L, *const m_d; // L*D*L' factorization of set C
  306. dReal *const m_Dell, *const m_ell, *const m_tmp;
  307. bool *const m_state;
  308. int *const m_findex;
  309. unsigned *const m_p, *const m_C;
  310. dLCP (unsigned _n, unsigned _nskip, unsigned _nub, dReal *_Adata, dReal *_pairsbx, dReal *_w,
  311. dReal *_pairslh, dReal *_L, dReal *_d,
  312. dReal *_Dell, dReal *_ell, dReal *_tmp,
  313. bool *_state, int *_findex, unsigned *_p, unsigned *_C, dReal **Arows);
  314. unsigned getNub() const { return m_nub; }
  315. void transfer_i_to_C (unsigned i);
  316. void transfer_i_to_N (unsigned /*i*/) { m_nN++; } // because we can assume C and N span 1:i-1
  317. void transfer_i_from_N_to_C (unsigned i);
  318. void transfer_i_from_C_to_N (unsigned i, void *tmpbuf);
  319. static sizeint estimate_transfer_i_from_C_to_N_mem_req(unsigned nC, unsigned nskip) { return dEstimateLDLTRemoveTmpbufSize(nC, nskip); }
  320. unsigned numC() const { return m_nC; }
  321. unsigned numN() const { return m_nN; }
  322. unsigned indexC (unsigned i) const { return i; }
  323. unsigned indexN (unsigned i) const { return i+m_nC; }
  324. dReal Aii (unsigned i) const { return AROW(i)[i]; }
  325. template<unsigned q_stride>
  326. dReal AiC_times_qC (unsigned i, dReal *q) const { return calculateLargeVectorDot<q_stride> (AROW(i), q, m_nC); }
  327. template<unsigned q_stride>
  328. dReal AiN_times_qN (unsigned i, dReal *q) const { return calculateLargeVectorDot<q_stride> (AROW(i) + m_nC, q + (sizeint)m_nC * q_stride, m_nN); }
  329. void pN_equals_ANC_times_qC (dReal *p, dReal *q);
  330. void pN_plusequals_ANi (dReal *p, unsigned i, bool dir_positive);
  331. template<unsigned p_stride>
  332. void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q);
  333. void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q);
  334. void solve1 (dReal *a, unsigned i, bool dir_positive, int only_transfer=0);
  335. void unpermute_X();
  336. void unpermute_W();
  337. };
  338. dLCP::dLCP (unsigned _n, unsigned _nskip, unsigned _nub, dReal *_Adata, dReal *_pairsbx, dReal *_w,
  339. dReal *_pairslh, dReal *_L, dReal *_d,
  340. dReal *_Dell, dReal *_ell, dReal *_tmp,
  341. bool *_state, int *_findex, unsigned *_p, unsigned *_C, dReal **Arows):
  342. m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
  343. # ifdef ROWPTRS
  344. m_A(Arows),
  345. #else
  346. m_A(_Adata),
  347. #endif
  348. m_pairsbx(_pairsbx), m_w(_w), m_pairslh(_pairslh),
  349. m_L(_L), m_d(_d), m_Dell(_Dell), m_ell(_ell), m_tmp(_tmp),
  350. m_state(_state), m_findex(_findex), m_p(_p), m_C(_C)
  351. {
  352. dxtSetZero<PBX__MAX>(m_pairsbx + PBX_X, m_n);
  353. {
  354. # ifdef ROWPTRS
  355. // make matrix row pointers
  356. dReal *aptr = _Adata;
  357. ATYPE A = m_A;
  358. const unsigned n = m_n, nskip = m_nskip;
  359. for (unsigned k=0; k<n; aptr+=nskip, ++k) A[k] = aptr;
  360. # endif
  361. }
  362. {
  363. unsigned *p = m_p;
  364. const unsigned n = m_n;
  365. for (unsigned k=0; k != n; ++k) p[k] = k; // initially unpermutted
  366. }
  367. /*
  368. // for testing, we can do some random swaps in the area i > nub
  369. {
  370. const unsigned n = m_n;
  371. const unsigned nub = m_nub;
  372. if (nub < n) {
  373. for (unsigned k=0; k<100; k++) {
  374. unsigned i1,i2;
  375. do {
  376. i1 = dRandInt(n-nub)+nub;
  377. i2 = dRandInt(n-nub)+nub;
  378. }
  379. while (i1 > i2);
  380. //printf ("--> %d %d\n",i1,i2);
  381. swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, n, i1, i2, m_nskip, 0);
  382. }
  383. }
  384. */
  385. // permute the problem so that *all* the unbounded variables are at the
  386. // start, i.e. look for unbounded variables not included in `nub'. we can
  387. // potentially push up `nub' this way and get a bigger initial factorization.
  388. // note that when we swap rows/cols here we must not just swap row pointers,
  389. // as the initial factorization relies on the data being all in one chunk.
  390. // variables that have findex >= 0 are *not* considered to be unbounded even
  391. // if lo=-inf and hi=inf - this is because these limits may change during the
  392. // solution process.
  393. {
  394. int *findex = m_findex;
  395. dReal *pairslh = m_pairslh;
  396. const unsigned n = m_n;
  397. for (unsigned k = m_nub; k < n; ++k) {
  398. if (findex && findex[k] >= 0) continue;
  399. if ((pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) {
  400. swapProblem (m_A, m_pairsbx, m_w, pairslh, m_p, m_state, findex, n, m_nub, k, m_nskip, 0);
  401. m_nub++;
  402. }
  403. }
  404. }
  405. // if there are unbounded variables at the start, factorize A up to that
  406. // point and solve for x. this puts all indexes 0..nub-1 into C.
  407. if (m_nub > 0) {
  408. const unsigned nub = m_nub;
  409. {
  410. dReal *Lrow = m_L;
  411. const unsigned nskip = m_nskip;
  412. for (unsigned j = 0; j < nub; Lrow += nskip, ++j) memcpy(Lrow, AROW(j), (j + 1) * sizeof(dReal));
  413. }
  414. transfer_b_to_x<false> (m_pairsbx, nub);
  415. factorMatrixAsLDLT<1> (m_L, m_d, nub, m_nskip);
  416. solveEquationSystemWithLDLT<1, PBX__MAX> (m_L, m_d, m_pairsbx + PBX_X, nub, m_nskip);
  417. dSetZero (m_w, nub);
  418. {
  419. unsigned *C = m_C;
  420. for (unsigned k = 0; k < nub; ++k) C[k] = k;
  421. }
  422. m_nC = nub;
  423. }
  424. // permute the indexes > nub such that all findex variables are at the end
  425. if (m_findex) {
  426. const unsigned nub = m_nub;
  427. int *findex = m_findex;
  428. unsigned num_at_end = 0;
  429. for (unsigned k = m_n; k > nub; ) {
  430. --k;
  431. if (findex[k] >= 0) {
  432. swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, findex, m_n, k, m_n - 1 - num_at_end, m_nskip, 1);
  433. num_at_end++;
  434. }
  435. }
  436. }
  437. // print info about indexes
  438. /*
  439. {
  440. const unsigned n = m_n;
  441. const unsigned nub = m_nub;
  442. for (unsigned k=0; k<n; k++) {
  443. if (k<nub) printf ("C");
  444. else if ((m_pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (m_pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) printf ("c");
  445. else printf (".");
  446. }
  447. printf ("\n");
  448. }
  449. */
  450. }
  451. void dLCP::transfer_i_to_C (unsigned i)
  452. {
  453. {
  454. const unsigned nC = m_nC;
  455. if (nC > 0) {
  456. // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
  457. dReal *const Ltgt = m_L + (sizeint)m_nskip * nC, *ell = m_ell;
  458. memcpy(Ltgt, ell, nC * sizeof(dReal));
  459. dReal ell_Dell_dot = dxDot(m_ell, m_Dell, nC);
  460. dReal AROW_i_i = AROW(i)[i] != ell_Dell_dot ? AROW(i)[i] : dNextAfter(AROW(i)[i], dInfinity); // A hack to avoid getting a zero in the denominator
  461. m_d[nC] = dRecip (AROW_i_i - ell_Dell_dot);
  462. }
  463. else {
  464. m_d[0] = dRecip (AROW(i)[i]);
  465. }
  466. swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, nC, i, m_nskip, 1);
  467. m_C[nC] = nC;
  468. m_nC = nC + 1; // nC value is outdated after this line
  469. }
  470. # ifdef DEBUG_LCP
  471. checkFactorization (m_A, m_L, m_d, m_nC, m_C, m_nskip);
  472. if (i < (m_n-1)) checkPermutations (i+1, m_n, m_nC, m_nN, m_p, m_C);
  473. # endif
  474. }
  475. void dLCP::transfer_i_from_N_to_C (unsigned i)
  476. {
  477. {
  478. const unsigned nC = m_nC;
  479. if (nC > 0) {
  480. {
  481. dReal *const aptr = AROW(i);
  482. dReal *Dell = m_Dell;
  483. const unsigned *C = m_C;
  484. # ifdef NUB_OPTIMIZATIONS
  485. // if nub>0, initial part of aptr unpermuted
  486. const unsigned nub = m_nub;
  487. unsigned j=0;
  488. for ( ; j<nub; ++j) Dell[j] = aptr[j];
  489. for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
  490. # else
  491. for (unsigned j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
  492. # endif
  493. }
  494. solveL1Straight<1>(m_L, m_Dell, nC, m_nskip);
  495. dReal ell_Dell_dot = REAL(0.0);
  496. dReal *const Ltgt = m_L + (sizeint)m_nskip * nC;
  497. dReal *ell = m_ell, *Dell = m_Dell, *d = m_d;
  498. for (unsigned j = 0; j < nC; ++j) {
  499. dReal ell_j, Dell_j = Dell[j];
  500. Ltgt[j] = ell[j] = ell_j = Dell_j * d[j];
  501. ell_Dell_dot += ell_j * Dell_j;
  502. }
  503. dReal AROW_i_i = AROW(i)[i] != ell_Dell_dot ? AROW(i)[i] : dNextAfter(AROW(i)[i], dInfinity); // A hack to avoid getting a zero in the denominator
  504. m_d[nC] = dRecip (AROW_i_i - ell_Dell_dot);
  505. }
  506. else {
  507. m_d[0] = dRecip (AROW(i)[i]);
  508. }
  509. swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, nC, i, m_nskip, 1);
  510. m_C[nC] = nC;
  511. m_nN--;
  512. m_nC = nC + 1; // nC value is outdated after this line
  513. }
  514. // @@@ TO DO LATER
  515. // if we just finish here then we'll go back and re-solve for
  516. // delta_x. but actually we can be more efficient and incrementally
  517. // update delta_x here. but if we do this, we wont have ell and Dell
  518. // to use in updating the factorization later.
  519. # ifdef DEBUG_LCP
  520. checkFactorization (m_A,m_L,m_d,m_nC,m_C,m_nskip);
  521. # endif
  522. }
  523. void dLCP::transfer_i_from_C_to_N (unsigned i, void *tmpbuf)
  524. {
  525. {
  526. unsigned *C = m_C;
  527. // remove a row/column from the factorization, and adjust the
  528. // indexes (black magic!)
  529. int last_idx = -1;
  530. const unsigned nC = m_nC;
  531. unsigned j = 0;
  532. for ( ; j < nC; ++j) {
  533. if (C[j] == nC - 1) {
  534. last_idx = j;
  535. }
  536. if (C[j] == i) {
  537. dxLDLTRemove (m_A, C, m_L, m_d, m_n, nC, j, m_nskip, tmpbuf);
  538. unsigned k;
  539. if (last_idx == -1) {
  540. for (k = j + 1 ; k < nC; ++k) {
  541. if (C[k] == nC - 1) {
  542. break;
  543. }
  544. }
  545. dIASSERT (k < nC);
  546. }
  547. else {
  548. k = last_idx;
  549. }
  550. C[k] = C[j];
  551. if (j != (nC - 1)) memmove (C + j, C + j + 1, (nC - j - 1) * sizeof(C[0]));
  552. break;
  553. }
  554. }
  555. dIASSERT (j < nC);
  556. swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, i, nC - 1, m_nskip, 1);
  557. m_nN++;
  558. m_nC = nC - 1; // nC value is outdated after this line
  559. }
  560. # ifdef DEBUG_LCP
  561. checkFactorization (m_A, m_L, m_d, m_nC, m_C, m_nskip);
  562. # endif
  563. }
  564. void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
  565. {
  566. // we could try to make this matrix-vector multiplication faster using
  567. // outer product matrix tricks, e.g. with the dMultidotX() functions.
  568. // but i tried it and it actually made things slower on random 100x100
  569. // problems because of the overhead involved. so we'll stick with the
  570. // simple method for now.
  571. const unsigned nC = m_nC;
  572. dReal *ptgt = p + nC;
  573. const unsigned nN = m_nN;
  574. for (unsigned i = 0; i < nN; ++i) {
  575. ptgt[i] = dxDot (AROW(i + nC), q, nC);
  576. }
  577. }
  578. void dLCP::pN_plusequals_ANi (dReal *p, unsigned i, bool dir_positive)
  579. {
  580. const unsigned nC = m_nC;
  581. dReal *aptr = AROW(i) + nC;
  582. dReal *ptgt = p + nC;
  583. if (dir_positive) {
  584. const unsigned nN = m_nN;
  585. for (unsigned j=0; j < nN; ++j) ptgt[j] += aptr[j];
  586. }
  587. else {
  588. const unsigned nN = m_nN;
  589. for (unsigned j=0; j < nN; ++j) ptgt[j] -= aptr[j];
  590. }
  591. }
  592. template<unsigned p_stride>
  593. void dLCP::pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
  594. {
  595. const unsigned nC = m_nC;
  596. dReal *q_end = q + nC;
  597. for (; q != q_end; p += p_stride, ++q) {
  598. *p += s * (*q);
  599. }
  600. }
  601. void dLCP::pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
  602. {
  603. const unsigned nC = m_nC;
  604. dReal *ptgt = p + nC, *qsrc = q + nC;
  605. const unsigned nN = m_nN;
  606. for (unsigned i = 0; i < nN; ++i) {
  607. ptgt[i] += s * qsrc[i];
  608. }
  609. }
  610. void dLCP::solve1 (dReal *a, unsigned i, bool dir_positive, int only_transfer)
  611. {
  612. // the `Dell' and `ell' that are computed here are saved. if index i is
  613. // later added to the factorization then they can be reused.
  614. //
  615. // @@@ question: do we need to solve for entire delta_x??? yes, but
  616. // only if an x goes below 0 during the step.
  617. const unsigned nC = m_nC;
  618. if (nC > 0) {
  619. {
  620. dReal *Dell = m_Dell;
  621. unsigned *C = m_C;
  622. dReal *aptr = AROW(i);
  623. # ifdef NUB_OPTIMIZATIONS
  624. // if nub>0, initial part of aptr[] is guaranteed unpermuted
  625. const unsigned nub = m_nub;
  626. unsigned j = 0;
  627. for ( ; j < nub; ++j) Dell[j] = aptr[j];
  628. for ( ; j < nC; ++j) Dell[j] = aptr[C[j]];
  629. # else
  630. for (unsigned j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
  631. # endif
  632. }
  633. solveL1Straight<1>(m_L, m_Dell, nC, m_nskip);
  634. {
  635. dReal *ell = m_ell, *Dell = m_Dell, *d = m_d;
  636. for (unsigned j = 0; j < nC; ++j) ell[j] = Dell[j] * d[j];
  637. }
  638. if (!only_transfer) {
  639. dReal *tmp = m_tmp, *ell = m_ell;
  640. {
  641. for (unsigned j = 0; j < nC; ++j) tmp[j] = ell[j];
  642. }
  643. solveL1Transposed<1>(m_L, tmp, nC, m_nskip);
  644. if (dir_positive) {
  645. unsigned *C = m_C;
  646. dReal *tmp = m_tmp;
  647. for (unsigned j = 0; j < nC; ++j) a[C[j]] = -tmp[j];
  648. } else {
  649. unsigned *C = m_C;
  650. dReal *tmp = m_tmp;
  651. for (unsigned j = 0; j < nC; ++j) a[C[j]] = tmp[j];
  652. }
  653. }
  654. }
  655. }
  656. void dLCP::unpermute_X()
  657. {
  658. unsigned *p = m_p;
  659. dReal *pairsbx = m_pairsbx;
  660. const unsigned n = m_n;
  661. for (unsigned j = 0; j < n; ++j) {
  662. unsigned k = p[j];
  663. if (k != j) {
  664. // p[j] = j; -- not going to be checked anymore anyway
  665. dReal x_j = (pairsbx + (sizeint)j * PBX__MAX)[PBX_X];
  666. for (;;) {
  667. dxSwap(x_j, (pairsbx + (sizeint)k * PBX__MAX)[PBX_X]);
  668. unsigned orig_k = p[k];
  669. p[k] = k;
  670. if (orig_k == j) {
  671. break;
  672. }
  673. k = orig_k;
  674. }
  675. (pairsbx + (sizeint)j * PBX__MAX)[PBX_X] = x_j;
  676. }
  677. }
  678. }
  679. void dLCP::unpermute_W()
  680. {
  681. memcpy (m_tmp, m_w, m_n * sizeof(dReal));
  682. const unsigned *p = m_p;
  683. dReal *w = m_w, *tmp = m_tmp;
  684. const unsigned n = m_n;
  685. for (unsigned j = 0; j < n; ++j) {
  686. unsigned k = p[j];
  687. w[k] = tmp[j];
  688. }
  689. }
  690. #endif // dLCP_FAST
  691. static void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX]);
  692. static void dxSolveLCP_Generic (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
  693. dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex);
  694. /*extern */
  695. void dxSolveLCP (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
  696. dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex)
  697. {
  698. if (nub >= n)
  699. {
  700. dxSolveLCP_AllUnbounded (memarena, n, A, pairsbx);
  701. }
  702. else
  703. {
  704. dxSolveLCP_Generic (memarena, n, A, pairsbx, outer_w, nub, pairslh, findex);
  705. }
  706. }
  707. //***************************************************************************
  708. // if all the variables are unbounded then we can just factor, solve, and return
  709. static
  710. void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX])
  711. {
  712. dAASSERT(A != NULL);
  713. dAASSERT(pairsbx != NULL);
  714. dAASSERT(n != 0);
  715. transfer_b_to_x<true>(pairsbx, n);
  716. unsigned nskip = dPAD(n);
  717. factorMatrixAsLDLT<PBX__MAX> (A, pairsbx + PBX_B, n, nskip);
  718. solveEquationSystemWithLDLT<PBX__MAX, PBX__MAX> (A, pairsbx + PBX_B, pairsbx + PBX_X, n, nskip);
  719. }
  720. //***************************************************************************
  721. // an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
  722. static
  723. void dxSolveLCP_Generic (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
  724. dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex)
  725. {
  726. dAASSERT (n > 0 && A && pairsbx && pairslh && nub >= 0 && nub < n);
  727. # ifndef dNODEBUG
  728. {
  729. // check restrictions on lo and hi
  730. dReal *endlh = pairslh + (sizeint)n * PLH__MAX;
  731. for (dReal *currlh = pairslh; currlh != endlh; currlh += PLH__MAX) dIASSERT (currlh[PLH_LO] <= 0 && currlh[PLH_HI] >= 0);
  732. }
  733. # endif
  734. const unsigned nskip = dPAD(n);
  735. dReal *L = memarena->AllocateOveralignedArray<dReal> ((sizeint)nskip * n, LMATRIX_ALIGNMENT);
  736. dReal *d = memarena->AllocateArray<dReal> (n);
  737. dReal *w = outer_w != NULL ? outer_w : memarena->AllocateArray<dReal> (n);
  738. dReal *delta_w = memarena->AllocateArray<dReal> (n);
  739. dReal *delta_x = memarena->AllocateArray<dReal> (n);
  740. dReal *Dell = memarena->AllocateArray<dReal> (n);
  741. dReal *ell = memarena->AllocateArray<dReal> (n);
  742. #ifdef ROWPTRS
  743. dReal **Arows = memarena->AllocateArray<dReal *> (n);
  744. #else
  745. dReal **Arows = NULL;
  746. #endif
  747. unsigned *p = memarena->AllocateArray<unsigned> (n);
  748. unsigned *C = memarena->AllocateArray<unsigned> (n);
  749. // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
  750. bool *state = memarena->AllocateArray<bool> (n);
  751. // create LCP object. note that tmp is set to delta_w to save space, this
  752. // optimization relies on knowledge of how tmp is used, so be careful!
  753. dLCP lcp(n, nskip, nub, A, pairsbx, w, pairslh, L, d, Dell, ell, delta_w, state, findex, p, C, Arows);
  754. unsigned adj_nub = lcp.getNub();
  755. // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
  756. // LCP conditions then i is added to the appropriate index set. otherwise
  757. // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
  758. // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
  759. // while driving x(i) we maintain the LCP conditions on the other variables
  760. // 0..i-1. we do this by watching out for other x(i),w(i) values going
  761. // outside the valid region, and then switching them between index sets
  762. // when that happens.
  763. bool hit_first_friction_index = false;
  764. for (unsigned i = adj_nub; i < n; ++i) {
  765. bool s_error = false;
  766. // the index i is the driving index and indexes i+1..n-1 are "dont care",
  767. // i.e. when we make changes to the system those x's will be zero and we
  768. // don't care what happens to those w's. in other words, we only consider
  769. // an (i+1)*(i+1) sub-problem of A*x=b+w.
  770. // if we've hit the first friction index, we have to compute the lo and
  771. // hi values based on the values of x already computed. we have been
  772. // permuting the indexes, so the values stored in the findex vector are
  773. // no longer valid. thus we have to temporarily unpermute the x vector.
  774. // for the purposes of this computation, 0*infinity = 0 ... so if the
  775. // contact constraint's normal force is 0, there should be no tangential
  776. // force applied.
  777. if (!hit_first_friction_index && findex && findex[i] >= 0) {
  778. // un-permute x into delta_w, which is not being used at the moment
  779. for (unsigned j = 0; j < n; ++j) delta_w[p[j]] = (pairsbx + (sizeint)j * PBX__MAX)[PBX_X];
  780. // set lo and hi values
  781. for (unsigned k = i; k < n; ++k) {
  782. dReal *currlh = pairslh + (sizeint)k * PLH__MAX;
  783. dReal wfk = delta_w[findex[k]];
  784. if (wfk == 0) {
  785. currlh[PLH_HI] = 0;
  786. currlh[PLH_LO] = 0;
  787. }
  788. else {
  789. currlh[PLH_HI] = dFabs (currlh[PLH_HI] * wfk);
  790. currlh[PLH_LO] = -currlh[PLH_HI];
  791. }
  792. }
  793. hit_first_friction_index = true;
  794. }
  795. // thus far we have not even been computing the w values for indexes
  796. // greater than i, so compute w[i] now.
  797. dReal wPrep = lcp.AiC_times_qC<PBX__MAX> (i, pairsbx + PBX_X) + lcp.AiN_times_qN<PBX__MAX> (i, pairsbx + PBX_X);
  798. dReal *currbx = pairsbx + (sizeint)i * PBX__MAX;
  799. w[i] = wPrep - currbx[PBX_B];
  800. // if lo=hi=0 (which can happen for tangential friction when normals are
  801. // 0) then the index will be assigned to set N with some state. however,
  802. // set C's line has zero size, so the index will always remain in set N.
  803. // with the "normal" switching logic, if w changed sign then the index
  804. // would have to switch to set C and then back to set N with an inverted
  805. // state. this is pointless, and also computationally expensive. to
  806. // prevent this from happening, we use the rule that indexes with lo=hi=0
  807. // will never be checked for set changes. this means that the state for
  808. // these indexes may be incorrect, but that doesn't matter.
  809. dReal *currlh = pairslh + (sizeint)i * PLH__MAX;
  810. // see if x(i),w(i) is in a valid region
  811. if (currlh[PLH_LO] == 0 && w[i] >= 0) {
  812. lcp.transfer_i_to_N (i);
  813. state[i] = false;
  814. }
  815. else if (currlh[PLH_HI] == 0 && w[i] <= 0) {
  816. lcp.transfer_i_to_N (i);
  817. state[i] = true;
  818. }
  819. else if (w[i] == 0) {
  820. // this is a degenerate case. by the time we get to this test we know
  821. // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
  822. // and similarly that hi > 0. this means that the line segment
  823. // corresponding to set C is at least finite in extent, and we are on it.
  824. // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
  825. lcp.solve1 (delta_x, i, false, 1);
  826. lcp.transfer_i_to_C (i);
  827. }
  828. else {
  829. // we must push x(i) and w(i)
  830. for (;;) {
  831. // find direction to push on x(i)
  832. bool dir_positive = (w[i] <= 0);
  833. // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
  834. lcp.solve1 (delta_x, i, dir_positive);
  835. // note that delta_x[i] = (dir_positive ? 1 : -1), but we wont bother to set it
  836. // compute: delta_w = A*delta_x ... note we only care about
  837. // delta_w(N) and delta_w(i), the rest is ignored
  838. lcp.pN_equals_ANC_times_qC (delta_w, delta_x);
  839. lcp.pN_plusequals_ANi (delta_w, i, dir_positive);
  840. delta_w[i] = dir_positive
  841. ? lcp.AiC_times_qC<1> (i, delta_x) + lcp.Aii(i)
  842. : lcp.AiC_times_qC<1> (i, delta_x) - lcp.Aii(i);
  843. // find largest step we can take (size=s), either to drive x(i),w(i)
  844. // to the valid LCP region or to drive an already-valid variable
  845. // outside the valid region.
  846. int cmd = 1; // index switching command
  847. unsigned si = 0; // si = index to switch if cmd>3
  848. dReal s = delta_w[i] != REAL(0.0)
  849. ? -w[i] / delta_w[i]
  850. : (w[i] != REAL(0.0) ? dCopySign(dInfinity, -w[i]) : REAL(0.0));
  851. if (dir_positive) {
  852. if (currlh[PLH_HI] < dInfinity) {
  853. dReal s2 = (currlh[PLH_HI] - currbx[PBX_X]); // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
  854. if (s2 < s) {
  855. s = s2;
  856. cmd = 3;
  857. }
  858. }
  859. }
  860. else {
  861. if (currlh[PLH_LO] > -dInfinity) {
  862. dReal s2 = (currbx[PBX_X] - currlh[PLH_LO]); // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
  863. if (s2 < s) {
  864. s = s2;
  865. cmd = 2;
  866. }
  867. }
  868. }
  869. {
  870. const unsigned numN = lcp.numN();
  871. for (unsigned k = 0; k < numN; ++k) {
  872. const unsigned indexN_k = lcp.indexN(k);
  873. if (!state[indexN_k] ? delta_w[indexN_k] < 0 : delta_w[indexN_k] > 0) {
  874. // don't bother checking if lo=hi=0
  875. dReal *indexlh = pairslh + (sizeint)indexN_k * PLH__MAX;
  876. if (indexlh[PLH_LO] == 0 && indexlh[PLH_HI] == 0) continue;
  877. dReal s2 = -w[indexN_k] / delta_w[indexN_k];
  878. if (s2 < s) {
  879. s = s2;
  880. cmd = 4;
  881. si = indexN_k;
  882. }
  883. }
  884. }
  885. }
  886. {
  887. const unsigned numC = lcp.numC();
  888. for (unsigned k = adj_nub; k < numC; ++k) {
  889. const unsigned indexC_k = lcp.indexC(k);
  890. dReal *indexlh = pairslh + (sizeint)indexC_k * PLH__MAX;
  891. if (delta_x[indexC_k] < 0 && indexlh[PLH_LO] > -dInfinity) {
  892. dReal s2 = (indexlh[PLH_LO] - (pairsbx + (sizeint)indexC_k * PBX__MAX)[PBX_X]) / delta_x[indexC_k];
  893. if (s2 < s) {
  894. s = s2;
  895. cmd = 5;
  896. si = indexC_k;
  897. }
  898. }
  899. if (delta_x[indexC_k] > 0 && indexlh[PLH_HI] < dInfinity) {
  900. dReal s2 = (indexlh[PLH_HI] - (pairsbx + (sizeint)indexC_k * PBX__MAX)[PBX_X]) / delta_x[indexC_k];
  901. if (s2 < s) {
  902. s = s2;
  903. cmd = 6;
  904. si = indexC_k;
  905. }
  906. }
  907. }
  908. }
  909. //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
  910. // "C->NL","C->NH"};
  911. //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
  912. // if s <= 0 then we've got a problem. if we just keep going then
  913. // we're going to get stuck in an infinite loop. instead, just cross
  914. // our fingers and exit with the current solution.
  915. if (s <= REAL(0.0)) {
  916. dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",(double)s);
  917. if (i < n) {
  918. dxtSetZero<PBX__MAX>(currbx + PBX_X, n - i);
  919. dxSetZero (w + i, n - i);
  920. }
  921. s_error = true;
  922. break;
  923. }
  924. // apply x = x + s * delta_x
  925. lcp.pC_plusequals_s_times_qC<PBX__MAX> (pairsbx + PBX_X, s, delta_x);
  926. currbx[PBX_X] = dir_positive
  927. ? currbx[PBX_X] + s
  928. : currbx[PBX_X] - s;
  929. // apply w = w + s * delta_w
  930. lcp.pN_plusequals_s_times_qN (w, s, delta_w);
  931. w[i] += s * delta_w[i];
  932. void *tmpbuf;
  933. // switch indexes between sets if necessary
  934. switch (cmd) {
  935. case 1: // done
  936. w[i] = 0;
  937. lcp.transfer_i_to_C (i);
  938. break;
  939. case 2: // done
  940. currbx[PBX_X] = currlh[PLH_LO];
  941. state[i] = false;
  942. lcp.transfer_i_to_N (i);
  943. break;
  944. case 3: // done
  945. currbx[PBX_X] = currlh[PLH_HI];
  946. state[i] = true;
  947. lcp.transfer_i_to_N (i);
  948. break;
  949. case 4: // keep going
  950. w[si] = 0;
  951. lcp.transfer_i_from_N_to_C (si);
  952. break;
  953. case 5: // keep going
  954. (pairsbx + (sizeint)si * PBX__MAX)[PBX_X] = (pairslh + (sizeint)si * PLH__MAX)[PLH_LO];
  955. state[si] = false;
  956. tmpbuf = memarena->PeekBufferRemainder();
  957. lcp.transfer_i_from_C_to_N (si, tmpbuf);
  958. break;
  959. case 6: // keep going
  960. (pairsbx + (sizeint)si * PBX__MAX)[PBX_X] = (pairslh + (sizeint)si * PLH__MAX)[PLH_HI];
  961. state[si] = true;
  962. tmpbuf = memarena->PeekBufferRemainder();
  963. lcp.transfer_i_from_C_to_N (si, tmpbuf);
  964. break;
  965. }
  966. if (cmd <= 3) break;
  967. } // for (;;)
  968. } // else
  969. if (s_error) {
  970. break;
  971. }
  972. } // for (unsigned i = adj_nub; i < n; ++i)
  973. // now we have to un-permute x and w
  974. if (outer_w != NULL) {
  975. lcp.unpermute_W();
  976. }
  977. lcp.unpermute_X(); // This destroys p[] and must be done last
  978. }
  979. sizeint dxEstimateSolveLCPMemoryReq(unsigned n, bool outer_w_avail)
  980. {
  981. const unsigned nskip = dPAD(n);
  982. sizeint res = 0;
  983. res += dOVERALIGNED_SIZE(sizeof(dReal) * ((sizeint)n * nskip), LMATRIX_ALIGNMENT); // for L
  984. res += 5 * dEFFICIENT_SIZE(sizeof(dReal) * n); // for d, delta_w, delta_x, Dell, ell
  985. if (!outer_w_avail) {
  986. res += dEFFICIENT_SIZE(sizeof(dReal) * n); // for w
  987. }
  988. #ifdef ROWPTRS
  989. res += dEFFICIENT_SIZE(sizeof(dReal *) * n); // for Arows
  990. #endif
  991. res += 2 * dEFFICIENT_SIZE(sizeof(unsigned) * n); // for p, C
  992. res += dEFFICIENT_SIZE(sizeof(bool) * n); // for state
  993. // Use n instead of nC as nC varies at runtime while n is greater or equal to nC
  994. sizeint lcp_transfer_req = dLCP::estimate_transfer_i_from_C_to_N_mem_req(n, nskip);
  995. res += dEFFICIENT_SIZE(lcp_transfer_req); // for dLCP::transfer_i_from_C_to_N
  996. return res;
  997. }
  998. //***************************************************************************
  999. // accuracy and timing test
  1000. static sizeint EstimateTestSolveLCPMemoryReq(unsigned n)
  1001. {
  1002. const unsigned nskip = dPAD(n);
  1003. sizeint res = 0;
  1004. res += 2 * dEFFICIENT_SIZE(sizeof(dReal) * ((sizeint)n * nskip)); // for A, A2
  1005. res += 7 * dEFFICIENT_SIZE(sizeof(dReal) * n); // for x, b, w, lo, hi, tmp1, tmp2
  1006. res += dEFFICIENT_SIZE(sizeof(dReal) * PBX__MAX * n); // for pairsbx,
  1007. res += dEFFICIENT_SIZE(sizeof(dReal) * PLH__MAX * n); // for pairslh
  1008. res += dxEstimateSolveLCPMemoryReq(n, true);
  1009. return res;
  1010. }
  1011. extern "C" ODE_API int dTestSolveLCP()
  1012. {
  1013. const unsigned n = 100;
  1014. sizeint memreq = EstimateTestSolveLCPMemoryReq(n);
  1015. dxWorldProcessMemArena *arena = dxAllocateTemporaryWorldProcessMemArena(memreq, NULL, NULL);
  1016. if (arena == NULL) {
  1017. return 0;
  1018. }
  1019. arena->ResetState();
  1020. unsigned i,nskip = dPAD(n);
  1021. #ifdef dDOUBLE
  1022. const dReal tol = REAL(1e-9);
  1023. #endif
  1024. #ifdef dSINGLE
  1025. const dReal tol = REAL(1e-4);
  1026. #endif
  1027. printf ("dTestSolveLCP()\n");
  1028. dReal *A = arena->AllocateArray<dReal> (n*nskip);
  1029. dReal *x = arena->AllocateArray<dReal> (n);
  1030. dReal *b = arena->AllocateArray<dReal> (n);
  1031. dReal *w = arena->AllocateArray<dReal> (n);
  1032. dReal *lo = arena->AllocateArray<dReal> (n);
  1033. dReal *hi = arena->AllocateArray<dReal> (n);
  1034. dReal *A2 = arena->AllocateArray<dReal> (n*nskip);
  1035. dReal *pairsbx = arena->AllocateArray<dReal> (n * PBX__MAX);
  1036. dReal *pairslh = arena->AllocateArray<dReal> (n * PLH__MAX);
  1037. dReal *tmp1 = arena->AllocateArray<dReal> (n);
  1038. dReal *tmp2 = arena->AllocateArray<dReal> (n);
  1039. double total_time = 0;
  1040. for (unsigned count=0; count < 1000; count++) {
  1041. BEGIN_STATE_SAVE(arena, saveInner) {
  1042. // form (A,b) = a random positive definite LCP problem
  1043. dMakeRandomMatrix (A2,n,n,1.0);
  1044. dMultiply2 (A,A2,A2,n,n,n);
  1045. dMakeRandomMatrix (x,n,1,1.0);
  1046. dMultiply0 (b,A,x,n,n,1);
  1047. for (i=0; i<n; i++) b[i] += (dRandReal()*REAL(0.2))-REAL(0.1);
  1048. // choose `nub' in the range 0..n-1
  1049. unsigned nub = 50; //dRandInt (n);
  1050. // make limits
  1051. for (i=0; i<nub; i++) lo[i] = -dInfinity;
  1052. for (i=0; i<nub; i++) hi[i] = dInfinity;
  1053. //for (i=nub; i<n; i++) lo[i] = 0;
  1054. //for (i=nub; i<n; i++) hi[i] = dInfinity;
  1055. //for (i=nub; i<n; i++) lo[i] = -dInfinity;
  1056. //for (i=nub; i<n; i++) hi[i] = 0;
  1057. for (i=nub; i<n; i++) lo[i] = -(dRandReal()*REAL(1.0))-REAL(0.01);
  1058. for (i=nub; i<n; i++) hi[i] = (dRandReal()*REAL(1.0))+REAL(0.01);
  1059. // set a few limits to lo=hi=0
  1060. /*
  1061. for (i=0; i<10; i++) {
  1062. unsigned j = dRandInt (n-nub) + nub;
  1063. lo[j] = 0;
  1064. hi[j] = 0;
  1065. }
  1066. */
  1067. // solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
  1068. // SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
  1069. // ensure that it doesn't get referenced (if it does, the answer will be
  1070. // wrong).
  1071. memcpy (A2, A, n * nskip * sizeof(dReal));
  1072. dClearUpperTriangle (A2, n);
  1073. for (i = 0; i != n; ++i) {
  1074. dReal *currbx = pairsbx + i * PBX__MAX;
  1075. currbx[PBX_B] = b[i];
  1076. currbx[PBX_X] = 0;
  1077. }
  1078. for (i = 0; i != n; ++i) {
  1079. dReal *currlh = pairslh + i * PLH__MAX;
  1080. currlh[PLH_LO] = lo[i];
  1081. currlh[PLH_HI] = hi[i];
  1082. }
  1083. dSetZero (w,n);
  1084. dStopwatch sw;
  1085. dStopwatchReset (&sw);
  1086. dStopwatchStart (&sw);
  1087. dxSolveLCP (arena,n,A2,pairsbx,w,nub,pairslh,0);
  1088. dStopwatchStop (&sw);
  1089. double time = dStopwatchTime(&sw);
  1090. total_time += time;
  1091. double average = total_time / double(count+1) * 1000.0;
  1092. for (i = 0; i != n; ++i) {
  1093. const dReal *currbx = pairsbx + i * PBX__MAX;
  1094. x[i] = currbx[PBX_X];
  1095. }
  1096. // check the solution
  1097. dMultiply0 (tmp1,A,x,n,n,1);
  1098. for (i=0; i<n; i++) tmp2[i] = b[i] + w[i];
  1099. dReal diff = dMaxDifference (tmp1,tmp2,n,1);
  1100. // printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
  1101. // diff > tol ? "FAILED" : "passed");
  1102. if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff);
  1103. unsigned n1=0,n2=0,n3=0;
  1104. for (i=0; i<n; i++) {
  1105. if (x[i]==lo[i] && w[i] >= 0) {
  1106. n1++; // ok
  1107. }
  1108. else if (x[i]==hi[i] && w[i] <= 0) {
  1109. n2++; // ok
  1110. }
  1111. else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) {
  1112. n3++; // ok
  1113. }
  1114. else {
  1115. dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i,
  1116. x[i],w[i],lo[i],hi[i]);
  1117. }
  1118. }
  1119. // pacifier
  1120. printf ("passed: NL=%3d NH=%3d C=%3d ",n1,n2,n3);
  1121. printf ("time=%10.3f ms avg=%10.4f\n",time * 1000.0,average);
  1122. } END_STATE_SAVE(arena, saveInner);
  1123. }
  1124. dxFreeTemporaryWorldProcessMemArena(arena);
  1125. return 1;
  1126. }