Vec8.inl 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  2. // SPDX-License-Identifier: MIT
  3. #include <Jolt/Math/UVec8.h>
  4. JPH_NAMESPACE_BEGIN
  5. Vec8::Vec8(Vec4Arg inLo, Vec4Arg inHi) :
  6. mValue(_mm256_insertf128_ps(_mm256_castps128_ps256(inLo.mValue), inHi.mValue, 1))
  7. {
  8. }
  9. Vec8 Vec8::sZero()
  10. {
  11. return _mm256_setzero_ps();
  12. }
  13. Vec8 Vec8::sReplicate(float inV)
  14. {
  15. return _mm256_set1_ps(inV);
  16. }
  17. Vec8 Vec8::sSplatX(Vec4Arg inV)
  18. {
  19. return _mm256_set1_ps(inV.GetX());
  20. }
  21. Vec8 Vec8::sSplatY(Vec4Arg inV)
  22. {
  23. return _mm256_set1_ps(inV.GetY());
  24. }
  25. Vec8 Vec8::sSplatZ(Vec4Arg inV)
  26. {
  27. return _mm256_set1_ps(inV.GetZ());
  28. }
  29. Vec8 Vec8::sFusedMultiplyAdd(Vec8Arg inMul1, Vec8Arg inMul2, Vec8Arg inAdd)
  30. {
  31. #ifdef JPH_USE_FMADD
  32. return _mm256_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
  33. #else
  34. return _mm256_add_ps(_mm256_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
  35. #endif
  36. }
  37. Vec8 Vec8::sSelect(Vec8Arg inV1, Vec8Arg inV2, UVec8Arg inControl)
  38. {
  39. return _mm256_blendv_ps(inV1.mValue, inV2.mValue, _mm256_castsi256_ps(inControl.mValue));
  40. }
  41. Vec8 Vec8::sMin(Vec8Arg inV1, Vec8Arg inV2)
  42. {
  43. return _mm256_min_ps(inV1.mValue, inV2.mValue);
  44. }
  45. Vec8 Vec8::sMax(Vec8Arg inV1, Vec8Arg inV2)
  46. {
  47. return _mm256_max_ps(inV1.mValue, inV2.mValue);
  48. }
  49. UVec8 Vec8::sLess(Vec8Arg inV1, Vec8Arg inV2)
  50. {
  51. return _mm256_castps_si256(_mm256_cmp_ps(inV1.mValue, inV2.mValue, _CMP_LT_OQ));
  52. }
  53. UVec8 Vec8::sGreater(Vec8Arg inV1, Vec8Arg inV2)
  54. {
  55. return _mm256_castps_si256(_mm256_cmp_ps(inV1.mValue, inV2.mValue, _CMP_GT_OQ));
  56. }
  57. Vec8 Vec8::sLoadFloat8(const float *inV)
  58. {
  59. return _mm256_loadu_ps(inV);
  60. }
  61. Vec8 Vec8::sLoadFloat8Aligned(const float *inV)
  62. {
  63. return _mm256_load_ps(inV);
  64. }
  65. Vec8 Vec8::operator * (Vec8Arg inV2) const
  66. {
  67. return _mm256_mul_ps(mValue, inV2.mValue);
  68. }
  69. Vec8 Vec8::operator * (float inV2) const
  70. {
  71. return _mm256_mul_ps(mValue, _mm256_set1_ps(inV2));
  72. }
  73. Vec8 Vec8::operator + (Vec8Arg inV2) const
  74. {
  75. return _mm256_add_ps(mValue, inV2.mValue);
  76. }
  77. Vec8 Vec8::operator - (Vec8Arg inV2) const
  78. {
  79. return _mm256_sub_ps(mValue, inV2.mValue);
  80. }
  81. Vec8 Vec8::operator / (Vec8Arg inV2) const
  82. {
  83. return _mm256_div_ps(mValue, inV2.mValue);
  84. }
  85. Vec8 Vec8::Reciprocal() const
  86. {
  87. return Vec8::sReplicate(1.0f) / mValue;
  88. }
  89. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
  90. Vec8 Vec8::Swizzle() const
  91. {
  92. static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
  93. static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
  94. static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
  95. static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
  96. return _mm256_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
  97. }
  98. Vec8 Vec8::Abs() const
  99. {
  100. #if defined(JPH_USE_AVX512)
  101. return _mm256_range_ps(mValue, mValue, 0b1000);
  102. #else
  103. return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), mValue), mValue);
  104. #endif
  105. }
  106. Vec4 Vec8::LowerVec4() const
  107. {
  108. return _mm256_castps256_ps128(mValue);
  109. }
  110. Vec4 Vec8::UpperVec4() const
  111. {
  112. return _mm256_extractf128_ps(mValue, 1);
  113. }
  114. float Vec8::ReduceMin() const
  115. {
  116. return Vec4::sMin(LowerVec4(), UpperVec4()).ReduceMin();
  117. }
  118. JPH_NAMESPACE_END