vec4.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "math.h"
  18. #include "vec3.h"
  19. namespace embree
  20. {
  21. ////////////////////////////////////////////////////////////////////////////////
  22. /// Generic 4D vector Class
  23. ////////////////////////////////////////////////////////////////////////////////
  24. template<typename T> struct Vec4
  25. {
  26. T x, y, z, w;
  27. typedef T Scalar;
  28. enum { N = 4 };
  29. ////////////////////////////////////////////////////////////////////////////////
  30. /// Construction
  31. ////////////////////////////////////////////////////////////////////////////////
  32. __forceinline Vec4( ) {}
  33. __forceinline explicit Vec4( const T& a ) : x(a), y(a), z(a), w(a) {}
  34. __forceinline Vec4( const T& x, const T& y, const T& z, const T& w ) : x(x), y(y), z(z), w(w) {}
  35. __forceinline Vec4( const Vec3<T>& xyz, const T& w ) : x(xyz.x), y(xyz.y), z(xyz.z), w(w) {}
  36. __forceinline Vec4( const Vec4& other ) { x = other.x; y = other.y; z = other.z; w = other.w; }
  37. __forceinline Vec4( const Vec3fa& other );
  38. template<typename T1> __forceinline Vec4( const Vec4<T1>& a ) : x(T(a.x)), y(T(a.y)), z(T(a.z)), w(T(a.w)) {}
  39. template<typename T1> __forceinline Vec4& operator =(const Vec4<T1>& other) { x = other.x; y = other.y; z = other.z; w = other.w; return *this; }
  40. __forceinline operator Vec3<T> () const { return Vec3<T>(x,y,z); }
  41. ////////////////////////////////////////////////////////////////////////////////
  42. /// Constants
  43. ////////////////////////////////////////////////////////////////////////////////
  44. __forceinline Vec4( ZeroTy ) : x(zero), y(zero), z(zero), w(zero) {}
  45. __forceinline Vec4( OneTy ) : x(one), y(one), z(one), w(one) {}
  46. __forceinline Vec4( PosInfTy ) : x(pos_inf), y(pos_inf), z(pos_inf), w(pos_inf) {}
  47. __forceinline Vec4( NegInfTy ) : x(neg_inf), y(neg_inf), z(neg_inf), w(neg_inf) {}
  48. __forceinline const T& operator []( const size_t axis ) const { assert(axis < 4); return (&x)[axis]; }
  49. __forceinline T& operator []( const size_t axis ) { assert(axis < 4); return (&x)[axis]; }
  50. ////////////////////////////////////////////////////////////////////////////////
  51. /// Swizzles
  52. ////////////////////////////////////////////////////////////////////////////////
  53. __forceinline Vec3<T> xyz() const { return Vec3<T>(x, y, z); }
  54. };
  55. ////////////////////////////////////////////////////////////////////////////////
  56. /// Unary Operators
  57. ////////////////////////////////////////////////////////////////////////////////
  58. template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a ) { return Vec4<T>(+a.x, +a.y, +a.z, +a.w); }
  59. template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a ) { return Vec4<T>(-a.x, -a.y, -a.z, -a.w); }
  60. template<typename T> __forceinline Vec4<T> abs ( const Vec4<T>& a ) { return Vec4<T>(abs (a.x), abs (a.y), abs (a.z), abs (a.w)); }
  61. template<typename T> __forceinline Vec4<T> rcp ( const Vec4<T>& a ) { return Vec4<T>(rcp (a.x), rcp (a.y), rcp (a.z), rcp (a.w)); }
  62. template<typename T> __forceinline Vec4<T> rsqrt ( const Vec4<T>& a ) { return Vec4<T>(rsqrt(a.x), rsqrt(a.y), rsqrt(a.z), rsqrt(a.w)); }
  63. template<typename T> __forceinline Vec4<T> sqrt ( const Vec4<T>& a ) { return Vec4<T>(sqrt (a.x), sqrt (a.y), sqrt (a.z), sqrt (a.w)); }
  64. ////////////////////////////////////////////////////////////////////////////////
  65. /// Binary Operators
  66. ////////////////////////////////////////////////////////////////////////////////
  67. template<typename T> __forceinline Vec4<T> operator +( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
  68. template<typename T> __forceinline Vec4<T> operator -( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
  69. template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
  70. template<typename T> __forceinline Vec4<T> operator *( const T& a, const Vec4<T>& b ) { return Vec4<T>(a * b.x, a * b.y, a * b.z, a * b.w); }
  71. template<typename T> __forceinline Vec4<T> operator *( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x * b , a.y * b , a.z * b , a.w * b ); }
  72. template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const Vec4<T>& b ) { return Vec4<T>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); }
  73. template<typename T> __forceinline Vec4<T> operator /( const Vec4<T>& a, const T& b ) { return Vec4<T>(a.x / b , a.y / b , a.z / b , a.w / b ); }
  74. template<typename T> __forceinline Vec4<T> operator /( const T& a, const Vec4<T>& b ) { return Vec4<T>(a / b.x, a / b.y, a / b.z, a / b.w); }
  75. template<typename T> __forceinline Vec4<T> min(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); }
  76. template<typename T> __forceinline Vec4<T> max(const Vec4<T>& a, const Vec4<T>& b) { return Vec4<T>(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); }
  77. ////////////////////////////////////////////////////////////////////////////////
  78. /// Ternary Operators
  79. ////////////////////////////////////////////////////////////////////////////////
  80. template<typename T> __forceinline const Vec4<T> madd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
  81. template<typename T> __forceinline const Vec4<T> msub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
  82. template<typename T> __forceinline const Vec4<T> nmadd ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a.x,b.x,c.x),nmadd(a.y,b.y,c.y),nmadd(a.z,b.z,c.z),nmadd(a.w,b.w,c.w)); }
  83. template<typename T> __forceinline const Vec4<T> nmsub ( const Vec4<T>& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a.x,b.x,c.x),nmsub(a.y,b.y,c.y),nmsub(a.z,b.z,c.z),nmsub(a.w,b.w,c.w)); }
  84. template<typename T> __forceinline const Vec4<T> madd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( madd(a,b.x,c.x), madd(a,b.y,c.y), madd(a,b.z,c.z), madd(a,b.w,c.w)); }
  85. template<typename T> __forceinline const Vec4<T> msub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>( msub(a,b.x,c.x), msub(a,b.y,c.y), msub(a,b.z,c.z), msub(a,b.w,c.w)); }
  86. template<typename T> __forceinline const Vec4<T> nmadd ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmadd(a,b.x,c.x),nmadd(a,b.y,c.y),nmadd(a,b.z,c.z),nmadd(a,b.w,c.w)); }
  87. template<typename T> __forceinline const Vec4<T> nmsub ( const T& a, const Vec4<T>& b, const Vec4<T>& c) { return Vec4<T>(nmsub(a,b.x,c.x),nmsub(a,b.y,c.y),nmsub(a,b.z,c.z),nmsub(a,b.w,c.w)); }
  88. ////////////////////////////////////////////////////////////////////////////////
  89. /// Assignment Operators
  90. ////////////////////////////////////////////////////////////////////////////////
  91. template<typename T> __forceinline Vec4<T>& operator +=( Vec4<T>& a, const Vec4<T>& b ) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; return a; }
  92. template<typename T> __forceinline Vec4<T>& operator -=( Vec4<T>& a, const Vec4<T>& b ) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; return a; }
  93. template<typename T> __forceinline Vec4<T>& operator *=( Vec4<T>& a, const T& b ) { a.x *= b ; a.y *= b ; a.z *= b ; a.w *= b ; return a; }
  94. template<typename T> __forceinline Vec4<T>& operator /=( Vec4<T>& a, const T& b ) { a.x /= b ; a.y /= b ; a.z /= b ; a.w /= b ; return a; }
  95. ////////////////////////////////////////////////////////////////////////////////
  96. /// Reduction Operators
  97. ////////////////////////////////////////////////////////////////////////////////
  98. template<typename T> __forceinline T reduce_add( const Vec4<T>& a ) { return a.x + a.y + a.z + a.w; }
  99. template<typename T> __forceinline T reduce_mul( const Vec4<T>& a ) { return a.x * a.y * a.z * a.w; }
  100. template<typename T> __forceinline T reduce_min( const Vec4<T>& a ) { return min(a.x, a.y, a.z, a.w); }
  101. template<typename T> __forceinline T reduce_max( const Vec4<T>& a ) { return max(a.x, a.y, a.z, a.w); }
  102. ////////////////////////////////////////////////////////////////////////////////
  103. /// Comparison Operators
  104. ////////////////////////////////////////////////////////////////////////////////
  105. template<typename T> __forceinline bool operator ==( const Vec4<T>& a, const Vec4<T>& b ) { return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; }
  106. template<typename T> __forceinline bool operator !=( const Vec4<T>& a, const Vec4<T>& b ) { return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; }
  107. template<typename T> __forceinline bool operator < ( const Vec4<T>& a, const Vec4<T>& b ) {
  108. if (a.x != b.x) return a.x < b.x;
  109. if (a.y != b.y) return a.y < b.y;
  110. if (a.z != b.z) return a.z < b.z;
  111. if (a.w != b.w) return a.w < b.w;
  112. return false;
  113. }
  114. ////////////////////////////////////////////////////////////////////////////////
  115. /// Shift Operators
  116. ////////////////////////////////////////////////////////////////////////////////
  117. template<typename T> __forceinline Vec4<T> shift_right_1( const Vec4<T>& a ) {
  118. return Vec4<T>(shift_right_1(a.x),shift_right_1(a.y),shift_right_1(a.z),shift_right_1(a.w));
  119. }
  120. ////////////////////////////////////////////////////////////////////////////////
  121. /// Euclidian Space Operators
  122. ////////////////////////////////////////////////////////////////////////////////
  123. template<typename T> __forceinline T dot ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); }
  124. template<typename T> __forceinline T length ( const Vec4<T>& a ) { return sqrt(dot(a,a)); }
  125. template<typename T> __forceinline Vec4<T> normalize( const Vec4<T>& a ) { return a*rsqrt(dot(a,a)); }
  126. template<typename T> __forceinline T distance ( const Vec4<T>& a, const Vec4<T>& b ) { return length(a-b); }
  127. ////////////////////////////////////////////////////////////////////////////////
  128. /// Select
  129. ////////////////////////////////////////////////////////////////////////////////
  130. template<typename T> __forceinline Vec4<T> select ( bool s, const Vec4<T>& t, const Vec4<T>& f ) {
  131. return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
  132. }
  133. template<typename T> __forceinline Vec4<T> select ( const Vec4<bool>& s, const Vec4<T>& t, const Vec4<T>& f ) {
  134. return Vec4<T>(select(s.x,t.x,f.x),select(s.y,t.y,f.y),select(s.z,t.z,f.z),select(s.w,t.w,f.w));
  135. }
  136. template<typename T> __forceinline Vec4<T> select ( const typename T::Bool& s, const Vec4<T>& t, const Vec4<T>& f ) {
  137. return Vec4<T>(select(s,t.x,f.x),select(s,t.y,f.y),select(s,t.z,f.z),select(s,t.w,f.w));
  138. }
  139. template<typename T>
  140. __forceinline Vec4<T> lerp(const Vec4<T>& v0, const Vec4<T>& v1, const T& t) {
  141. return madd(Vec4<T>(T(1.0f)-t),v0,t*v1);
  142. }
  143. ////////////////////////////////////////////////////////////////////////////////
  144. /// Output Operators
  145. ////////////////////////////////////////////////////////////////////////////////
  146. template<typename T> inline std::ostream& operator<<(std::ostream& cout, const Vec4<T>& a) {
  147. return cout << "(" << a.x << ", " << a.y << ", " << a.z << ", " << a.w << ")";
  148. }
  149. ////////////////////////////////////////////////////////////////////////////////
  150. /// Default template instantiations
  151. ////////////////////////////////////////////////////////////////////////////////
  152. typedef Vec4<bool > Vec4b;
  153. typedef Vec4<unsigned char> Vec4uc;
  154. typedef Vec4<int > Vec4i;
  155. typedef Vec4<float > Vec4f;
  156. }
  157. #include "vec3ba.h"
  158. #include "vec3ia.h"
  159. #include "vec3fa.h"
  160. ////////////////////////////////////////////////////////////////////////////////
  161. /// SSE / AVX / MIC specializations
  162. ////////////////////////////////////////////////////////////////////////////////
  163. #if defined __SSE__
  164. #include "../simd/sse.h"
  165. #endif
  166. #if defined __AVX__
  167. #include "../simd/avx.h"
  168. #endif
  169. #if defined __AVX512F__
  170. #include "../simd/avx512.h"
  171. #endif
  172. namespace embree
  173. {
  174. template<> __forceinline Vec4<float>::Vec4( const Vec3fa& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
  175. #if defined(__AVX__)
  176. template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fa& a ) {
  177. x = a.x; y = a.y; z = a.z; w = a.w;
  178. }
  179. #elif defined(__SSE__)
  180. template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fa& a ) {
  181. const vfloat4 v = vfloat4(a); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
  182. }
  183. #endif
  184. #if defined(__SSE__)
  185. __forceinline Vec4<vfloat4> broadcast4f( const Vec4<vfloat4>& a, const size_t k ) {
  186. return Vec4<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]), vfloat4::broadcast(&a.w[k]));
  187. }
  188. #endif
  189. #if defined(__AVX__)
  190. template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fa& a ) {
  191. x = a.x; y = a.y; z = a.z; w = a.w;
  192. }
  193. __forceinline Vec4<vfloat4> broadcast4f( const Vec4<vfloat8>& a, const size_t k ) {
  194. return Vec4<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]), vfloat4::broadcast(&a.w[k]));
  195. }
  196. __forceinline Vec4<vfloat8> broadcast8f( const Vec4<vfloat4>& a, const size_t k ) {
  197. return Vec4<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]), vfloat8::broadcast(&a.w[k]));
  198. }
  199. __forceinline Vec4<vfloat8> broadcast8f( const Vec4<vfloat8>& a, const size_t k ) {
  200. return Vec4<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]), vfloat8::broadcast(&a.w[k]));
  201. }
  202. #endif
  203. #if defined(__AVX512F__)
  204. template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fa& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
  205. #endif
  206. }