simd.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #pragma once
  17. #include "../math/math.h"
  18. /* include SSE wrapper classes */
  19. #if defined(__SSE__)
  20. # include "sse.h"
  21. #endif
  22. /* include AVX wrapper classes */
  23. #if defined(__AVX__)
  24. # include "avx.h"
  25. #endif
  26. /* include AVX512 wrapper classes */
  27. #if defined (__AVX512F__)
  28. # include "avx512.h"
  29. #endif
  30. #if defined(__AVX512F__)
  31. # define AVX_ZERO_UPPER()
  32. #elif defined (__AVX__)
  33. # define AVX_ZERO_UPPER() _mm256_zeroupper()
  34. #else
  35. # define AVX_ZERO_UPPER()
  36. #endif
  37. namespace embree
  38. {
  39. /* foreach unique */
  40. template<typename vbool, typename vint, typename Closure>
  41. __forceinline void foreach_unique(const vbool& valid0, const vint& vi, const Closure& closure)
  42. {
  43. vbool valid1 = valid0;
  44. while (any(valid1)) {
  45. const int j = int(__bsf(movemask(valid1)));
  46. const int i = vi[j];
  47. const vbool valid2 = valid1 & (i == vi);
  48. valid1 = valid1 & !valid2;
  49. closure(valid2,i);
  50. }
  51. }
  52. /* foreach unique */
  53. template<typename vbool, typename vint, typename Closure>
  54. __forceinline void foreach_unique_index(const vbool& valid0, const vint& vi, const Closure& closure)
  55. {
  56. vbool valid1 = valid0;
  57. while (any(valid1)) {
  58. const int j = (int) __bsf(movemask(valid1));
  59. const int i = vi[j];
  60. const vbool valid2 = valid1 & (i == vi);
  61. valid1 = valid1 & !valid2;
  62. closure(valid2,i,j);
  63. }
  64. }
  65. template<typename Closure>
  66. __forceinline void foreach2(int x0, int x1, int y0, int y1, const Closure& closure)
  67. {
  68. __aligned(64) int U[128];
  69. __aligned(64) int V[128];
  70. int index = 0;
  71. for (int y=y0; y<y1; y++) {
  72. const bool lasty = y+1>=y1;
  73. const vintx vy = y;
  74. for (int x=x0; x<x1; ) { //x+=VSIZEX) {
  75. const bool lastx = x+VSIZEX >= x1;
  76. vintx vx = x+vintx(step);
  77. vintx::storeu(&U[index],vx);
  78. vintx::storeu(&V[index],vy);
  79. const int dx = min(x1-x,VSIZEX);
  80. index += dx;
  81. x += dx;
  82. if (index >= VSIZEX || (lastx && lasty)) {
  83. const vboolx valid = vintx(step) < vintx(index);
  84. closure(valid,vintx::load(U),vintx::load(V));
  85. x-= max(0,index-VSIZEX);
  86. index = 0;
  87. }
  88. }
  89. }
  90. }
  91. }