fastltsolve.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /*************************************************************************
  2. * *
  3. * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
  4. * All rights reserved. Email: [email protected] Web: www.q12.org *
  5. * *
  6. * This library is free software; you can redistribute it and/or *
  7. * modify it under the terms of EITHER: *
  8. * (1) The GNU Lesser General Public License as published by the Free *
  9. * Software Foundation; either version 2.1 of the License, or (at *
  10. * your option) any later version. The text of the GNU Lesser *
  11. * General Public License is included with this library in the *
  12. * file LICENSE.TXT. *
  13. * (2) The BSD-style license that is included with this library in *
  14. * the file LICENSE-BSD.TXT. *
  15. * *
  16. * This library is distributed in the hope that it will be useful, *
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
  19. * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
  20. * *
  21. *************************************************************************/
  22. /* generated code, do not edit. */
  23. #include <ode/common.h>
  24. #include "config.h"
  25. #include "matrix.h"
  26. /* solve L^T * x=b, with b containing 1 right hand side.
  27. * L is an n*n lower triangular matrix with ones on the diagonal.
  28. * L is stored by rows and its leading dimension is lskip.
  29. * b is an n*1 matrix that contains the right hand side.
  30. * b is overwritten with x.
  31. * this processes blocks of 4.
  32. */
  33. void _dSolveL1T (const dReal *L, dReal *B, int n, int lskip1)
  34. {
  35. /* declare variables - Z matrix, p and q vectors, etc */
  36. dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
  37. const dReal *ell;
  38. int lskip2/*,lskip3*/,i,j;
  39. /* special handling for L and B because we're solving L1 *transpose* */
  40. L = L + (n-1)*(lskip1+1);
  41. B = B + n-1;
  42. lskip1 = -lskip1;
  43. /* compute lskip values */
  44. lskip2 = 2*lskip1;
  45. /*lskip3 = 3*lskip1;*/
  46. /* compute all 4 x 1 blocks of X */
  47. for (i=0; i <= n-4; i+=4) {
  48. /* compute all 4 x 1 block of X, from rows i..i+4-1 */
  49. /* set the Z matrix to 0 */
  50. Z11=0;
  51. Z21=0;
  52. Z31=0;
  53. Z41=0;
  54. ell = L - i;
  55. ex = B;
  56. /* the inner loop that computes outer products and adds them to Z */
  57. for (j=i-4; j >= 0; j -= 4) {
  58. /* load p and q values */
  59. p1=ell[0];
  60. q1=ex[0];
  61. p2=ell[-1];
  62. p3=ell[-2];
  63. p4=ell[-3];
  64. /* compute outer product and add it to the Z matrix */
  65. m11 = p1 * q1;
  66. m21 = p2 * q1;
  67. m31 = p3 * q1;
  68. m41 = p4 * q1;
  69. ell += lskip1;
  70. Z11 += m11;
  71. Z21 += m21;
  72. Z31 += m31;
  73. Z41 += m41;
  74. /* load p and q values */
  75. p1=ell[0];
  76. q1=ex[-1];
  77. p2=ell[-1];
  78. p3=ell[-2];
  79. p4=ell[-3];
  80. /* compute outer product and add it to the Z matrix */
  81. m11 = p1 * q1;
  82. m21 = p2 * q1;
  83. m31 = p3 * q1;
  84. m41 = p4 * q1;
  85. ell += lskip1;
  86. Z11 += m11;
  87. Z21 += m21;
  88. Z31 += m31;
  89. Z41 += m41;
  90. /* load p and q values */
  91. p1=ell[0];
  92. q1=ex[-2];
  93. p2=ell[-1];
  94. p3=ell[-2];
  95. p4=ell[-3];
  96. /* compute outer product and add it to the Z matrix */
  97. m11 = p1 * q1;
  98. m21 = p2 * q1;
  99. m31 = p3 * q1;
  100. m41 = p4 * q1;
  101. ell += lskip1;
  102. Z11 += m11;
  103. Z21 += m21;
  104. Z31 += m31;
  105. Z41 += m41;
  106. /* load p and q values */
  107. p1=ell[0];
  108. q1=ex[-3];
  109. p2=ell[-1];
  110. p3=ell[-2];
  111. p4=ell[-3];
  112. /* compute outer product and add it to the Z matrix */
  113. m11 = p1 * q1;
  114. m21 = p2 * q1;
  115. m31 = p3 * q1;
  116. m41 = p4 * q1;
  117. ell += lskip1;
  118. ex -= 4;
  119. Z11 += m11;
  120. Z21 += m21;
  121. Z31 += m31;
  122. Z41 += m41;
  123. /* end of inner loop */
  124. }
  125. /* compute left-over iterations */
  126. j += 4;
  127. for (; j > 0; j--) {
  128. /* load p and q values */
  129. p1=ell[0];
  130. q1=ex[0];
  131. p2=ell[-1];
  132. p3=ell[-2];
  133. p4=ell[-3];
  134. /* compute outer product and add it to the Z matrix */
  135. m11 = p1 * q1;
  136. m21 = p2 * q1;
  137. m31 = p3 * q1;
  138. m41 = p4 * q1;
  139. ell += lskip1;
  140. ex -= 1;
  141. Z11 += m11;
  142. Z21 += m21;
  143. Z31 += m31;
  144. Z41 += m41;
  145. }
  146. /* finish computing the X(i) block */
  147. Z11 = ex[0] - Z11;
  148. ex[0] = Z11;
  149. p1 = ell[-1];
  150. Z21 = ex[-1] - Z21 - p1*Z11;
  151. ex[-1] = Z21;
  152. p1 = ell[-2];
  153. p2 = ell[-2+lskip1];
  154. Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
  155. ex[-2] = Z31;
  156. p1 = ell[-3];
  157. p2 = ell[-3+lskip1];
  158. p3 = ell[-3+lskip2];
  159. Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
  160. ex[-3] = Z41;
  161. /* end of outer loop */
  162. }
  163. /* compute rows at end that are not a multiple of block size */
  164. for (; i < n; i++) {
  165. /* compute all 1 x 1 block of X, from rows i..i+1-1 */
  166. /* set the Z matrix to 0 */
  167. Z11=0;
  168. ell = L - i;
  169. ex = B;
  170. /* the inner loop that computes outer products and adds them to Z */
  171. for (j=i-4; j >= 0; j -= 4) {
  172. /* load p and q values */
  173. p1=ell[0];
  174. q1=ex[0];
  175. /* compute outer product and add it to the Z matrix */
  176. m11 = p1 * q1;
  177. ell += lskip1;
  178. Z11 += m11;
  179. /* load p and q values */
  180. p1=ell[0];
  181. q1=ex[-1];
  182. /* compute outer product and add it to the Z matrix */
  183. m11 = p1 * q1;
  184. ell += lskip1;
  185. Z11 += m11;
  186. /* load p and q values */
  187. p1=ell[0];
  188. q1=ex[-2];
  189. /* compute outer product and add it to the Z matrix */
  190. m11 = p1 * q1;
  191. ell += lskip1;
  192. Z11 += m11;
  193. /* load p and q values */
  194. p1=ell[0];
  195. q1=ex[-3];
  196. /* compute outer product and add it to the Z matrix */
  197. m11 = p1 * q1;
  198. ell += lskip1;
  199. ex -= 4;
  200. Z11 += m11;
  201. /* end of inner loop */
  202. }
  203. /* compute left-over iterations */
  204. j += 4;
  205. for (; j > 0; j--) {
  206. /* load p and q values */
  207. p1=ell[0];
  208. q1=ex[0];
  209. /* compute outer product and add it to the Z matrix */
  210. m11 = p1 * q1;
  211. ell += lskip1;
  212. ex -= 1;
  213. Z11 += m11;
  214. }
  215. /* finish computing the X(i) block */
  216. Z11 = ex[0] - Z11;
  217. ex[0] = Z11;
  218. }
  219. }
  220. #undef dSolveL1T
  221. void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1)
  222. {
  223. _dSolveL1T (L, B, n, lskip1);
  224. }