vec3n.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. //
  2. // Big Vector and Sparse Matrix Classes
  3. //
  4. // (c) S Melax 2006
  5. //
  6. // The focus is on 3D applications, so
  7. // the big vector is an array of float3s
  8. // and the matrix class uses 3x3 blocks.
  9. //
  10. // This file includes both:
  11. // - basic non-optimized version
  12. // - an expression optimized version
  13. //
  14. // Optimized Expressions
  15. //
  16. // We want to write sweet looking code such as V=As+Bt with big vectors.
  17. // However, we dont want the extra overheads with allocating memory for temps and excessing copying.
  18. // Instead of a full Template Metaprogramming approach, we explicitly write
  19. // classes to specifically handle all the expressions we are likely to use.
  20. // Most applicable lines of code will be of the same handful of basic forms,
  21. // but with different parameters for the operands.
  22. // In the future, if we ever need a longer expression with more operands,
  23. // then we will just add whatever additional building blocks that are necessary - not a big deal.
  24. // This approach is much simpler to develop, debug and optimize (restrict keyword, simd etc)
  25. // than template metaprogramming is. We do not rely on the implementation
  26. // of a particular compiler to be able to expand extensive nested inline codes.
  27. // Additionally, we reliably get our optimizations even within a debug build.
  28. // Therefore we believe that our Optimized Expressions
  29. // are a good compromise that give us the best of both worlds.
  30. // The code within those important algorithms, which use this library,
  31. // can now remain clean and readable yet still execute quickly.
  32. //
  33. #ifndef SM_VEC3N_H
  34. #define SM_VEC3N_H
  35. #include "vecmath.h"
  36. #include "array.h"
  37. //#include <malloc.h>
  38. //template <class T> void * vec4<T>::operator new[](size_t n){ return _mm_malloc(n,64); }
  39. //template <class T> void vec4<T>::operator delete[](void *a) { _mm_free(a); }
  40. struct HalfConstraint {
  41. float3 n;int vi;
  42. float s,t;
  43. HalfConstraint(const float3& _n,int _vi,float _t):n(_n),vi(_vi),s(0),t(_t){}
  44. HalfConstraint():vi(-1){}
  45. };
  46. class float3Nx3N
  47. {
  48. public:
  49. class Block
  50. {
  51. public:
  52. float3x3 m;
  53. int r,c;
  54. float unused[16];
  55. Block(){}
  56. Block(short _r,short _c):r(_r),c(_c){m.x=m.y=m.z=float3(0,0,0);}
  57. };
  58. Array<Block> blocks; // the first n blocks use as the diagonal.
  59. int n;
  60. void Zero();
  61. void InitDiagonal(float d);
  62. void Identity(){InitDiagonal(1.0f);}
  63. float3Nx3N():n(0){}
  64. float3Nx3N(int _n):n(_n) {for(int i=0;i<n;i++) blocks.Add(Block((short)i,(short)i));}
  65. template<class E> float3Nx3N &operator= (const E& expression) {expression.evalequals(*this);return *this;}
  66. template<class E> float3Nx3N &operator+=(const E& expression) {expression.evalpluseq(*this);return *this;}
  67. template<class E> float3Nx3N &operator-=(const E& expression) {expression.evalmnuseq(*this);return *this;}
  68. };
  69. class float3N: public Array<float3>
  70. {
  71. public:
  72. float3N(int _count=0)
  73. {
  74. SetSize(_count);
  75. }
  76. void Zero();
  77. void Init(const float3 &v); // sets each subvector to v
  78. template<class E> float3N &operator= (const E& expression) {expression.evalequals(*this);return *this;}
  79. template<class E> float3N &operator+=(const E& expression) {expression.evalpluseq(*this);return *this;}
  80. template<class E> float3N &operator-=(const E& expression) {expression.evalmnuseq(*this);return *this;}
  81. float3N &operator=( const float3N &V) { this->copy(V); return *this;}
  82. };
  83. int ConjGradient(float3N &X, float3Nx3N &A, float3N &B);
  84. int ConjGradientFiltered(float3N &X, const float3Nx3N &A, const float3N &B,const float3Nx3N &S,Array<HalfConstraint> &H);
  85. int ConjGradientFiltered(float3N &X, const float3Nx3N &A, const float3N &B,const float3Nx3N &S);
  86. inline float3N& Mul(float3N &r,const float3Nx3N &m, const float3N &v)
  87. {
  88. int i;
  89. for(i=0;i<r.count;i++) r[i]=float3(0,0,0);
  90. for(i=0;i<m.blocks.count;i++)
  91. {
  92. r[m.blocks[i].r] += m.blocks[i].m * v[m.blocks[i].c];
  93. }
  94. return r;
  95. }
  96. inline float dot(const float3N &a,const float3N &b)
  97. {
  98. float d=0;
  99. for(int i=0;i<a.count;i++)
  100. {
  101. d+= dot(a[i],b[i]);
  102. }
  103. return d;
  104. }
  105. inline void float3Nx3N::Zero()
  106. {
  107. for(int i=0;i<blocks.count;i++)
  108. {
  109. blocks[i].m = float3x3(0,0,0,0,0,0,0,0,0);
  110. }
  111. }
  112. inline void float3Nx3N::InitDiagonal(float d)
  113. {
  114. for(int i=0;i<blocks.count;i++)
  115. {
  116. blocks[i].m = (blocks[i].c==blocks[i].r) ? float3x3(d,0,0,0,d,0,0,0,d) : float3x3(0,0,0,0,0,0,0,0,0);
  117. }
  118. }
  119. inline void float3N::Zero()
  120. {
  121. for(int i=0;i<count;i++)
  122. {
  123. element[i] = float3(0,0,0);
  124. }
  125. }
  126. inline void float3N::Init(const float3 &v)
  127. {
  128. for(int i=0;i<count;i++)
  129. {
  130. element[i] = v;
  131. }
  132. }
  133. #ifdef WE_LIKE_SLOW_CODE
  134. // Unoptimized Slow Basic Version of big vector operators.
  135. // Uses typical implmentation for operators +/-*=
  136. // These operators cause lots of unnecessary construction, memory allocation, and copying.
  137. inline float3N operator +(const float3N &a,const float3N &b)
  138. {
  139. float3N r(a.count);
  140. for(int i=0;i<a.count;i++) r[i]=a[i]+b[i];
  141. return r;
  142. }
  143. inline float3N operator *(const float3N &a,const float &s)
  144. {
  145. float3N r(a.count);
  146. for(int i=0;i<a.count;i++) r[i]=a[i]*s;
  147. return r;
  148. }
  149. inline float3N operator /(const float3N &a,const float &s)
  150. {
  151. float3N r(a.count);
  152. return Mul(r,a, 1.0f/s );
  153. }
  154. inline float3N operator -(const float3N &a,const float3N &b)
  155. {
  156. float3N r(a.count);
  157. for(int i=0;i<a.count;i++) r[i]=a[i]-b[i];
  158. return r;
  159. }
  160. inline float3N operator -(const float3N &a)
  161. {
  162. float3N r(a.count);
  163. for(int i=0;i<a.count;i++) r[i]=-a[i];
  164. return r;
  165. }
  166. inline float3N operator *(const float3Nx3N &m,const float3N &v)
  167. {
  168. float3N r(v.count);
  169. return Mul(r,m,v);
  170. }
  171. inline float3N &operator-=(float3N &A, const float3N &B)
  172. {
  173. assert(A.count==B.count);
  174. for(int i=0;i<A.count;i++) A[i] -= B[i];
  175. return A;
  176. }
  177. inline float3N &operator+=(float3N &A, const float3N &B)
  178. {
  179. assert(A.count==B.count);
  180. for(int i=0;i<A.count;i++) A[i] += B[i];
  181. return A;
  182. }
  183. #else
  184. // Optimized Expressions
  185. class exVneg
  186. {
  187. public:
  188. const float3N &v;
  189. exVneg(const float3N &_v): v(_v){}
  190. void evalequals(float3N &r)const { for(int i=0;i<v.count;i++) r[i] =-v[i];}
  191. void evalpluseq(float3N &r)const { for(int i=0;i<v.count;i++) r[i]+=-v[i];}
  192. void evalmnuseq(float3N &r)const { for(int i=0;i<v.count;i++) r[i]-=-v[i];}
  193. };
  194. class exVaddV
  195. {
  196. public:
  197. const float3N &a;
  198. const float3N &b;
  199. exVaddV(const float3N &_a,const float3N &_b): a(_a),b(_b){}
  200. void evalequals(float3N &r)const { for(int i=0;i<a.count;i++) r[i] =a[i]+b[i];}
  201. void evalpluseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]+=a[i]+b[i];}
  202. void evalmnuseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]-=a[i]+b[i];}
  203. };
  204. class exVsubV
  205. {
  206. public:
  207. const float3N &a;
  208. const float3N &b;
  209. exVsubV(const float3N &_a,const float3N &_b): a(_a),b(_b){}
  210. void evalequals(float3N &r)const { for(int i=0;i<a.count;i++) r[i] =a[i]-b[i];}
  211. void evalpluseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]+=a[i]-b[i];}
  212. void evalmnuseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]-=a[i]-b[i];}
  213. };
  214. class exVs
  215. {
  216. public:
  217. const float3N &v;
  218. const float s;
  219. exVs(const float3N &_v,const float &_s): v(_v),s(_s){}
  220. void evalequals(float3N &r)const { for(int i=0;i<v.count;i++) r[i] =v[i]*s;}
  221. void evalpluseq(float3N &r)const { for(int i=0;i<v.count;i++) r[i]+=v[i]*s;}
  222. void evalmnuseq(float3N &r)const { for(int i=0;i<v.count;i++) r[i]-=v[i]*s;}
  223. };
  224. class exAsaddB
  225. {
  226. public:
  227. const float3N &a;
  228. const float3N &b;
  229. const float s;
  230. exAsaddB(const float3N &_a,const float &_s,const float3N &_b): a(_a),s(_s),b(_b){}
  231. void evalequals(float3N &r)const { for(int i=0;i<a.count;i++) r[i] =a[i]*s+b[i];}
  232. void evalpluseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]+=a[i]*s+b[i];}
  233. void evalmnuseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]-=a[i]*s+b[i];}
  234. };
  235. class exAsaddBt
  236. {
  237. public:
  238. const float3N &a;
  239. const float3N &b;
  240. const float s;
  241. const float t;
  242. exAsaddBt(const float3N &_a,const float &_s,const float3N &_b,const float &_t): a(_a),s(_s),b(_b),t(_t){}
  243. void evalequals(float3N &r)const { for(int i=0;i<a.count;i++) r[i] =a[i]*s+b[i]*t;}
  244. void evalpluseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]+=a[i]*s+b[i]*t;}
  245. void evalmnuseq(float3N &r)const { for(int i=0;i<a.count;i++) r[i]-=a[i]*s+b[i]*t;}
  246. };
  247. class exMv
  248. {
  249. public:
  250. const float3Nx3N &m;
  251. const float3N &v;
  252. exMv(const float3Nx3N &_m,const float3N &_v): m(_m),v(_v){}
  253. void evalequals(float3N &r)const { Mul(r,m,v);}
  254. };
  255. class exMs
  256. {
  257. public:
  258. const float3Nx3N &m;
  259. const float s;
  260. exMs(const float3Nx3N &_m,const float &_s): m(_m),s(_s){}
  261. void evalequals(float3Nx3N &r)const { for(int i=0;i<r.blocks.count;i++) r.blocks[i].m = m.blocks[i].m*s;}
  262. void evalpluseq(float3Nx3N &r)const { for(int i=0;i<r.blocks.count;i++) r.blocks[i].m += m.blocks[i].m*s;}
  263. void evalmnuseq(float3Nx3N &r)const { for(int i=0;i<r.blocks.count;i++) r.blocks[i].m -= m.blocks[i].m*s;}
  264. };
  265. class exMAsMBt
  266. {
  267. public:
  268. const float3Nx3N &a;
  269. const float s;
  270. const float3Nx3N &b;
  271. const float t;
  272. exMAsMBt(const float3Nx3N &_a,const float &_s,const float3Nx3N &_b,const float &_t): a(_a),s(_s),b(_b),t(_t){}
  273. void evalequals(float3Nx3N &r)const { for(int i=0;i<r.blocks.count;i++) r.blocks[i].m = a.blocks[i].m*s + b.blocks[i].m*t;}
  274. void evalpluseq(float3Nx3N &r)const { for(int i=0;i<r.blocks.count;i++) r.blocks[i].m += a.blocks[i].m*s + b.blocks[i].m*t;}
  275. void evalmnuseq(float3Nx3N &r)const { for(int i=0;i<r.blocks.count;i++) r.blocks[i].m -= a.blocks[i].m*s + b.blocks[i].m*t;}
  276. };
  277. inline exVaddV operator +(const float3N &a,const float3N &b) {return exVaddV(a,b);}
  278. inline exVsubV operator +(const exVneg &E,const float3N &b) {return exVsubV(b,E.v);}
  279. inline exVsubV operator -(const float3N &a,const float3N &b) {return exVsubV(a,b);}
  280. inline exVs operator *(const float3N &V,const float &s) {return exVs(V,s); }
  281. inline exVs operator *(const exVs &E,const float &s) {return exVs(E.v,E.s*s); }
  282. inline exAsaddB operator +(const exVs &E,const float3N &b) {return exAsaddB(E.v, E.s,b);}
  283. inline exAsaddB operator +(const float3N &b,const exVs &E) {return exAsaddB(E.v, E.s,b);}
  284. inline exAsaddB operator -(const float3N &b,const exVs &E) {return exAsaddB(E.v,-E.s,b);}
  285. inline exAsaddBt operator +(const exVs &Ea,const exVs &Eb) {return exAsaddBt(Ea.v,Ea.s,Eb.v, Eb.s);}
  286. inline exAsaddBt operator -(const exVs &Ea,const exVs &Eb) {return exAsaddBt(Ea.v,Ea.s,Eb.v,-Eb.s);}
  287. inline exMv operator *(const float3Nx3N &m,const float3N &v) {return exMv(m,v); }
  288. inline exMs operator *(const exMs &E,const float &s) {return exMs(E.m,E.s*s); }
  289. inline exMs operator *(const float3Nx3N &m,const float &s) {return exMs(m,s); }
  290. inline exMAsMBt operator +(const exMs &Ea,const exMs &Eb) {return exMAsMBt(Ea.m,Ea.s, Eb.m,Eb.s);}
  291. inline exMAsMBt operator -(const exMs &Ea,const exMs &Eb) {return exMAsMBt(Ea.m,Ea.s, Eb.m,-Eb.s);}
  292. #endif
  293. #endif