emath.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "../sys/platform.h"
  5. #include "../sys/intrinsics.h"
  6. #include "constants.h"
  7. #include <cmath>
  8. #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
  9. # include "math_sycl.h"
  10. #else
  11. #if defined(__ARM_NEON)
  12. #include "../simd/arm/emulation.h"
  13. #else
  14. #include <emmintrin.h>
  15. #include <xmmintrin.h>
  16. #include <immintrin.h>
  17. #endif
  18. #if defined(__WIN32__)
  19. #if defined(_MSC_VER) && (_MSC_VER <= 1700)
  20. namespace std
  21. {
  22. __forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
  23. __forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
  24. __forceinline bool isfinite (const float x) { return _finite(x) != 0; }
  25. }
  26. #endif
  27. #endif
  28. namespace embree
  29. {
  30. __forceinline bool isvalid ( const float& v ) {
  31. return (v > -FLT_LARGE) & (v < +FLT_LARGE);
  32. }
  33. __forceinline int cast_f2i(float f) {
  34. union { float f; int i; } v; v.f = f; return v.i;
  35. }
  36. __forceinline float cast_i2f(int i) {
  37. union { float f; int i; } v; v.i = i; return v.f;
  38. }
  39. __forceinline int toInt (const float& a) { return int(a); }
  40. __forceinline float toFloat(const int& a) { return float(a); }
  41. __forceinline int asInt (const float& a) { return *((int*)&a); }
  42. __forceinline float asFloat(const int& a) { return *((float*)&a); }
  43. #if defined(__WIN32__)
  44. __forceinline bool finite ( const float x ) { return _finite(x) != 0; }
  45. #endif
  46. __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
  47. __forceinline float sqr ( const float x ) { return x*x; }
  48. __forceinline float rcp ( const float x )
  49. {
  50. #if defined(__aarch64__)
  51. // Move scalar to vector register and do rcp.
  52. __m128 a;
  53. a[0] = x;
  54. float32x4_t reciprocal = vrecpeq_f32(a);
  55. reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
  56. reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
  57. return reciprocal[0];
  58. #else
  59. const __m128 a = _mm_set_ss(x);
  60. #if defined(__AVX512VL__)
  61. const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
  62. #else
  63. const __m128 r = _mm_rcp_ss(a);
  64. #endif
  65. #if defined(__AVX2__)
  66. return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
  67. #else
  68. return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
  69. #endif
  70. #endif //defined(__aarch64__)
  71. }
  72. __forceinline float signmsk ( const float x ) {
  73. #if defined(__aarch64__)
  74. // FP and Neon shares same vector register in arm64
  75. __m128 a;
  76. __m128i b;
  77. a[0] = x;
  78. b[0] = 0x80000000;
  79. a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
  80. return a[0];
  81. #else
  82. return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
  83. #endif
  84. }
  85. __forceinline float xorf( const float x, const float y ) {
  86. #if defined(__aarch64__)
  87. // FP and Neon shares same vector register in arm64
  88. __m128 a;
  89. __m128 b;
  90. a[0] = x;
  91. b[0] = y;
  92. a = _mm_xor_ps(a, b);
  93. return a[0];
  94. #else
  95. return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
  96. #endif
  97. }
  98. __forceinline float andf( const float x, const unsigned y ) {
  99. #if defined(__aarch64__)
  100. // FP and Neon shares same vector register in arm64
  101. __m128 a;
  102. __m128i b;
  103. a[0] = x;
  104. b[0] = y;
  105. a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
  106. return a[0];
  107. #else
  108. return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
  109. #endif
  110. }
  111. __forceinline float rsqrt( const float x )
  112. {
  113. #if defined(__aarch64__)
  114. // FP and Neon shares same vector register in arm64
  115. __m128 a;
  116. a[0] = x;
  117. __m128 value = _mm_rsqrt_ps(a);
  118. value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
  119. value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
  120. return value[0];
  121. #else
  122. const __m128 a = _mm_set_ss(x);
  123. #if defined(__AVX512VL__)
  124. __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
  125. #else
  126. __m128 r = _mm_rsqrt_ss(a);
  127. #endif
  128. const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r),
  129. _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
  130. return _mm_cvtss_f32(c);
  131. #endif
  132. }
  133. #if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
  134. __forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
  135. __forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
  136. __forceinline int roundf(float f) { return (int)(f + 0.5f); }
  137. #else
  138. __forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
  139. __forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
  140. #endif
  141. __forceinline float abs ( const float x ) { return ::fabsf(x); }
  142. __forceinline float acos ( const float x ) { return ::acosf (x); }
  143. __forceinline float asin ( const float x ) { return ::asinf (x); }
  144. __forceinline float atan ( const float x ) { return ::atanf (x); }
  145. __forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
  146. __forceinline float cos ( const float x ) { return ::cosf (x); }
  147. __forceinline float cosh ( const float x ) { return ::coshf (x); }
  148. __forceinline float exp ( const float x ) { return ::expf (x); }
  149. __forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
  150. __forceinline float log ( const float x ) { return ::logf (x); }
  151. __forceinline float log10( const float x ) { return ::log10f(x); }
  152. __forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
  153. __forceinline float sin ( const float x ) { return ::sinf (x); }
  154. __forceinline float sinh ( const float x ) { return ::sinhf (x); }
  155. __forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
  156. __forceinline float tan ( const float x ) { return ::tanf (x); }
  157. __forceinline float tanh ( const float x ) { return ::tanhf (x); }
  158. __forceinline float floor( const float x ) { return ::floorf (x); }
  159. __forceinline float ceil ( const float x ) { return ::ceilf (x); }
  160. __forceinline float frac ( const float x ) { return x-floor(x); }
  161. __forceinline double abs ( const double x ) { return ::fabs(x); }
  162. __forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
  163. __forceinline double acos ( const double x ) { return ::acos (x); }
  164. __forceinline double asin ( const double x ) { return ::asin (x); }
  165. __forceinline double atan ( const double x ) { return ::atan (x); }
  166. __forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
  167. __forceinline double cos ( const double x ) { return ::cos (x); }
  168. __forceinline double cosh ( const double x ) { return ::cosh (x); }
  169. __forceinline double exp ( const double x ) { return ::exp (x); }
  170. __forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
  171. __forceinline double log ( const double x ) { return ::log (x); }
  172. __forceinline double log10( const double x ) { return ::log10(x); }
  173. __forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
  174. __forceinline double rcp ( const double x ) { return 1.0/x; }
  175. __forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
  176. __forceinline double sin ( const double x ) { return ::sin (x); }
  177. __forceinline double sinh ( const double x ) { return ::sinh (x); }
  178. __forceinline double sqr ( const double x ) { return x*x; }
  179. __forceinline double sqrt ( const double x ) { return ::sqrt (x); }
  180. __forceinline double tan ( const double x ) { return ::tan (x); }
  181. __forceinline double tanh ( const double x ) { return ::tanh (x); }
  182. __forceinline double floor( const double x ) { return ::floor (x); }
  183. __forceinline double ceil ( const double x ) { return ::ceil (x); }
  184. #if defined(__aarch64__)
  185. __forceinline float mini(float a, float b) {
  186. // FP and Neon shares same vector register in arm64
  187. __m128 x;
  188. __m128 y;
  189. x[0] = a;
  190. y[0] = b;
  191. x = _mm_min_ps(x, y);
  192. return x[0];
  193. }
  194. #elif defined(__SSE4_1__)
  195. __forceinline float mini(float a, float b) {
  196. const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
  197. const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
  198. const __m128i ci = _mm_min_epi32(ai,bi);
  199. return _mm_cvtss_f32(_mm_castsi128_ps(ci));
  200. }
  201. #endif
  202. #if defined(__aarch64__)
  203. __forceinline float maxi(float a, float b) {
  204. // FP and Neon shares same vector register in arm64
  205. __m128 x;
  206. __m128 y;
  207. x[0] = a;
  208. y[0] = b;
  209. x = _mm_max_ps(x, y);
  210. return x[0];
  211. }
  212. #elif defined(__SSE4_1__)
  213. __forceinline float maxi(float a, float b) {
  214. const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
  215. const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
  216. const __m128i ci = _mm_max_epi32(ai,bi);
  217. return _mm_cvtss_f32(_mm_castsi128_ps(ci));
  218. }
  219. #endif
  220. template<typename T>
  221. __forceinline T twice(const T& a) { return a+a; }
  222. __forceinline int min(int a, int b) { return a<b ? a:b; }
  223. __forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
  224. __forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
  225. __forceinline float min(float a, float b) { return a<b ? a:b; }
  226. __forceinline double min(double a, double b) { return a<b ? a:b; }
  227. #if defined(__64BIT__) || defined(__EMSCRIPTEN__)
  228. __forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
  229. #endif
  230. #if defined(__EMSCRIPTEN__)
  231. __forceinline long min(long a, long b) { return a<b ? a:b; }
  232. #endif
  233. template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
  234. template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
  235. template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
  236. template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
  237. template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
  238. template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
  239. __forceinline int max(int a, int b) { return a<b ? b:a; }
  240. __forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
  241. __forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
  242. __forceinline float max(float a, float b) { return a<b ? b:a; }
  243. __forceinline double max(double a, double b) { return a<b ? b:a; }
  244. #if defined(__64BIT__) || defined(__EMSCRIPTEN__)
  245. __forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
  246. #endif
  247. #if defined(__EMSCRIPTEN__)
  248. __forceinline long max(long a, long b) { return a<b ? b:a; }
  249. #endif
  250. template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
  251. template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
  252. template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
  253. template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
  254. template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
  255. template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
  256. #if defined(__MACOSX__)
  257. __forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
  258. __forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
  259. #endif
  260. #if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
  261. __forceinline void sincosf(float x, float *sin, float *cos) {
  262. __sincosf(x,sin,cos);
  263. }
  264. #endif
  265. #if defined(__WIN32__) || defined(__FreeBSD__)
  266. __forceinline void sincosf(float x, float *s, float *c) {
  267. *s = sinf(x); *c = cosf(x);
  268. }
  269. #endif
  270. template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
  271. template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
  272. template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
  273. template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
  274. template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
  275. template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
  276. #if defined(__AVX2__)
  277. __forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
  278. __forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
  279. __forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
  280. __forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
  281. #elif defined (__aarch64__) && defined(__clang__)
  282. #pragma clang fp contract(fast)
  283. __forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
  284. __forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
  285. __forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
  286. __forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
  287. #pragma clang fp contract(on)
  288. #else
  289. __forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
  290. __forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
  291. __forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
  292. __forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
  293. #endif
  294. /*! random functions */
  295. template<typename T> T random() { return T(0); }
  296. #if defined(_WIN32)
  297. template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
  298. template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
  299. #else
  300. template<> __forceinline int random() { return int(rand()); }
  301. template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
  302. #endif
  303. template<> __forceinline float random() { return rand()/float(RAND_MAX); }
  304. template<> __forceinline double random() { return rand()/double(RAND_MAX); }
  305. #if _WIN32
  306. __forceinline double drand48() {
  307. return double(rand())/double(RAND_MAX);
  308. }
  309. __forceinline void srand48(long seed) {
  310. return srand(seed);
  311. }
  312. #endif
  313. /*! selects */
  314. __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
  315. __forceinline int select(bool s, int t, int f) { return s ? t : f; }
  316. __forceinline float select(bool s, float t, float f) { return s ? t : f; }
  317. __forceinline bool none(bool s) { return !s; }
  318. __forceinline bool all (bool s) { return s; }
  319. __forceinline bool any (bool s) { return s; }
  320. __forceinline unsigned movemask (bool s) { return (unsigned)s; }
  321. __forceinline float lerp(const float v0, const float v1, const float t) {
  322. return madd(1.0f-t,v0,t*v1);
  323. }
  324. template<typename T>
  325. __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
  326. return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
  327. }
  328. /*! exchange */
  329. template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
  330. /* load/store */
  331. template<typename Ty> struct mem;
  332. template<> struct mem<float> {
  333. static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
  334. static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
  335. static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
  336. static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
  337. };
  338. /*! bit reverse operation */
  339. template<class T>
  340. __forceinline T bitReverse(const T& vin)
  341. {
  342. T v = vin;
  343. v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
  344. v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
  345. v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
  346. v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
  347. v = ( v >> 16 ) | ( v << 16);
  348. return v;
  349. }
  350. /*! bit interleave operation */
  351. template<class T>
  352. __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
  353. {
  354. T x = xin, y = yin, z = zin;
  355. x = (x | (x << 16)) & 0x030000FF;
  356. x = (x | (x << 8)) & 0x0300F00F;
  357. x = (x | (x << 4)) & 0x030C30C3;
  358. x = (x | (x << 2)) & 0x09249249;
  359. y = (y | (y << 16)) & 0x030000FF;
  360. y = (y | (y << 8)) & 0x0300F00F;
  361. y = (y | (y << 4)) & 0x030C30C3;
  362. y = (y | (y << 2)) & 0x09249249;
  363. z = (z | (z << 16)) & 0x030000FF;
  364. z = (z | (z << 8)) & 0x0300F00F;
  365. z = (z | (z << 4)) & 0x030C30C3;
  366. z = (z | (z << 2)) & 0x09249249;
  367. return x | (y << 1) | (z << 2);
  368. }
  369. #if defined(__AVX2__) && !defined(__aarch64__)
  370. template<>
  371. __forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
  372. {
  373. const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
  374. const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
  375. const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
  376. return xx | yy | zz;
  377. }
  378. #endif
  379. /*! bit interleave operation for 64bit data types*/
  380. template<class T>
  381. __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
  382. T x = xin & 0x1fffff;
  383. T y = yin & 0x1fffff;
  384. T z = zin & 0x1fffff;
  385. x = (x | x << 32) & 0x1f00000000ffff;
  386. x = (x | x << 16) & 0x1f0000ff0000ff;
  387. x = (x | x << 8) & 0x100f00f00f00f00f;
  388. x = (x | x << 4) & 0x10c30c30c30c30c3;
  389. x = (x | x << 2) & 0x1249249249249249;
  390. y = (y | y << 32) & 0x1f00000000ffff;
  391. y = (y | y << 16) & 0x1f0000ff0000ff;
  392. y = (y | y << 8) & 0x100f00f00f00f00f;
  393. y = (y | y << 4) & 0x10c30c30c30c30c3;
  394. y = (y | y << 2) & 0x1249249249249249;
  395. z = (z | z << 32) & 0x1f00000000ffff;
  396. z = (z | z << 16) & 0x1f0000ff0000ff;
  397. z = (z | z << 8) & 0x100f00f00f00f00f;
  398. z = (z | z << 4) & 0x10c30c30c30c30c3;
  399. z = (z | z << 2) & 0x1249249249249249;
  400. return x | (y << 1) | (z << 2);
  401. }
  402. }
  403. #endif