varying.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "../sys/platform.h"
  18. namespace embree
  19. {
  20. /* Varying numeric types */
  21. template<int N>
  22. struct vfloat
  23. {
  24. union { float f[N]; int i[N]; };
  25. __forceinline const float& operator [](size_t index) const { assert(index < N); return f[index]; }
  26. __forceinline float& operator [](size_t index) { assert(index < N); return f[index]; }
  27. };
  28. template<int N>
  29. struct vdouble
  30. {
  31. union { double f[N]; long long i[N]; };
  32. __forceinline const double& operator [](size_t index) const { assert(index < N); return f[index]; }
  33. __forceinline double& operator [](size_t index) { assert(index < N); return f[index]; }
  34. };
  35. template<int N>
  36. struct vint
  37. {
  38. int i[N];
  39. __forceinline const int& operator [](size_t index) const { assert(index < N); return i[index]; }
  40. __forceinline int& operator [](size_t index) { assert(index < N); return i[index]; }
  41. };
  42. template<int N>
  43. struct vuint
  44. {
  45. unsigned int i[N];
  46. __forceinline const unsigned int& operator [](size_t index) const { assert(index < N); return i[index]; }
  47. __forceinline unsigned int& operator [](size_t index) { assert(index < N); return i[index]; }
  48. };
  49. template<int N>
  50. struct vllong
  51. {
  52. long long i[N];
  53. __forceinline const long long& operator [](size_t index) const { assert(index < N); return i[index]; }
  54. __forceinline long long& operator [](size_t index) { assert(index < N); return i[index]; }
  55. };
  56. #if !defined(_MSC_VER) || _MSC_VER >= 1800
  57. /* Varying bool types */
  58. template<int N> struct vboolf { int i[N]; }; // for float/int
  59. template<int N> struct vboold { long long i[N]; }; // for double/long long
  60. /* Aliases to default types */
  61. template<int N> using vreal = vfloat<N>;
  62. template<int N> using vbool = vboolf<N>;
  63. #else
  64. /* Workaround for VS2012 */
  65. #define vreal vfloat
  66. #define vboolf vbool
  67. template<int N> struct vboolf { int i[N]; };
  68. template<int N> struct vboold { long long i[N]; };
  69. #endif
  70. /* Maximum supported varying size */
  71. #if defined(__AVX512F__)
  72. const int VSIZEX = 16;
  73. #elif defined(__AVX__)
  74. const int VSIZEX = 8;
  75. #else
  76. const int VSIZEX = 4;
  77. #endif
  78. /* Extends varying size N to optimal or up to max(N, N2) */
  79. template<int N, int N2 = VSIZEX>
  80. struct vextend
  81. {
  82. #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
  83. /* use 16-wide SIMD calculations on KNL even for 4 and 8 wide SIMD */
  84. static const int size = (N2 == VSIZEX) ? VSIZEX : N;
  85. #define SIMD_MODE(N) N, 16
  86. #else
  87. /* calculate with same SIMD width otherwise */
  88. static const int size = N;
  89. #define SIMD_MODE(N) N, N
  90. #endif
  91. };
  92. /* 4-wide shortcuts */
  93. typedef vfloat<4> vfloat4;
  94. typedef vdouble<4> vdouble4;
  95. typedef vreal<4> vreal4;
  96. typedef vint<4> vint4;
  97. typedef vllong<4> vllong4;
  98. typedef vbool<4> vbool4;
  99. typedef vboolf<4> vboolf4;
  100. typedef vboold<4> vboold4;
  101. /* 8-wide shortcuts */
  102. typedef vfloat<8> vfloat8;
  103. typedef vdouble<8> vdouble8;
  104. typedef vreal<8> vreal8;
  105. typedef vint<8> vint8;
  106. typedef vllong<8> vllong8;
  107. typedef vbool<8> vbool8;
  108. typedef vboolf<8> vboolf8;
  109. typedef vboold<8> vboold8;
  110. /* 16-wide shortcuts */
  111. typedef vfloat<16> vfloat16;
  112. typedef vdouble<16> vdouble16;
  113. typedef vreal<16> vreal16;
  114. typedef vint<16> vint16;
  115. typedef vuint<16> vuint16;
  116. typedef vllong<16> vllong16;
  117. typedef vbool<16> vbool16;
  118. typedef vboolf<16> vboolf16;
  119. typedef vboold<16> vboold16;
  120. /* Maximum shortcuts */
  121. typedef vfloat<VSIZEX> vfloatx;
  122. typedef vdouble<VSIZEX> vdoublex;
  123. typedef vreal<VSIZEX> vrealx;
  124. typedef vint<VSIZEX> vintx;
  125. typedef vllong<VSIZEX> vllongx;
  126. typedef vbool<VSIZEX> vboolx;
  127. typedef vboolf<VSIZEX> vboolfx;
  128. typedef vboold<VSIZEX> vbooldx;
  129. }