Vec8.inl 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #include <Jolt/Math/UVec8.h>
  5. JPH_NAMESPACE_BEGIN
  6. Vec8::Vec8(Vec4Arg inLo, Vec4Arg inHi) :
  7. mValue(_mm256_insertf128_ps(_mm256_castps128_ps256(inLo.mValue), inHi.mValue, 1))
  8. {
  9. }
  10. Vec8 Vec8::sZero()
  11. {
  12. return _mm256_setzero_ps();
  13. }
  14. Vec8 Vec8::sReplicate(float inV)
  15. {
  16. return _mm256_set1_ps(inV);
  17. }
  18. Vec8 Vec8::sSplatX(Vec4Arg inV)
  19. {
  20. return _mm256_set1_ps(inV.GetX());
  21. }
  22. Vec8 Vec8::sSplatY(Vec4Arg inV)
  23. {
  24. return _mm256_set1_ps(inV.GetY());
  25. }
  26. Vec8 Vec8::sSplatZ(Vec4Arg inV)
  27. {
  28. return _mm256_set1_ps(inV.GetZ());
  29. }
  30. Vec8 Vec8::sFusedMultiplyAdd(Vec8Arg inMul1, Vec8Arg inMul2, Vec8Arg inAdd)
  31. {
  32. #ifdef JPH_USE_FMADD
  33. return _mm256_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
  34. #else
  35. return _mm256_add_ps(_mm256_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
  36. #endif
  37. }
  38. Vec8 Vec8::sSelect(Vec8Arg inV1, Vec8Arg inV2, UVec8Arg inControl)
  39. {
  40. return _mm256_blendv_ps(inV1.mValue, inV2.mValue, _mm256_castsi256_ps(inControl.mValue));
  41. }
  42. Vec8 Vec8::sMin(Vec8Arg inV1, Vec8Arg inV2)
  43. {
  44. return _mm256_min_ps(inV1.mValue, inV2.mValue);
  45. }
  46. Vec8 Vec8::sMax(Vec8Arg inV1, Vec8Arg inV2)
  47. {
  48. return _mm256_max_ps(inV1.mValue, inV2.mValue);
  49. }
  50. UVec8 Vec8::sLess(Vec8Arg inV1, Vec8Arg inV2)
  51. {
  52. return _mm256_castps_si256(_mm256_cmp_ps(inV1.mValue, inV2.mValue, _CMP_LT_OQ));
  53. }
  54. UVec8 Vec8::sGreater(Vec8Arg inV1, Vec8Arg inV2)
  55. {
  56. return _mm256_castps_si256(_mm256_cmp_ps(inV1.mValue, inV2.mValue, _CMP_GT_OQ));
  57. }
  58. Vec8 Vec8::sLoadFloat8(const float *inV)
  59. {
  60. return _mm256_loadu_ps(inV);
  61. }
  62. Vec8 Vec8::sLoadFloat8Aligned(const float *inV)
  63. {
  64. return _mm256_load_ps(inV);
  65. }
  66. Vec8 Vec8::operator * (Vec8Arg inV2) const
  67. {
  68. return _mm256_mul_ps(mValue, inV2.mValue);
  69. }
  70. Vec8 Vec8::operator * (float inV2) const
  71. {
  72. return _mm256_mul_ps(mValue, _mm256_set1_ps(inV2));
  73. }
  74. Vec8 Vec8::operator + (Vec8Arg inV2) const
  75. {
  76. return _mm256_add_ps(mValue, inV2.mValue);
  77. }
  78. Vec8 Vec8::operator - (Vec8Arg inV2) const
  79. {
  80. return _mm256_sub_ps(mValue, inV2.mValue);
  81. }
  82. Vec8 Vec8::operator / (Vec8Arg inV2) const
  83. {
  84. return _mm256_div_ps(mValue, inV2.mValue);
  85. }
  86. Vec8 Vec8::Reciprocal() const
  87. {
  88. return Vec8::sReplicate(1.0f) / mValue;
  89. }
  90. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
  91. Vec8 Vec8::Swizzle() const
  92. {
  93. static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
  94. static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
  95. static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
  96. static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
  97. return _mm256_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX));
  98. }
  99. Vec8 Vec8::Abs() const
  100. {
  101. #if defined(JPH_USE_AVX512)
  102. return _mm256_range_ps(mValue, mValue, 0b1000);
  103. #else
  104. return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), mValue), mValue);
  105. #endif
  106. }
  107. Vec4 Vec8::LowerVec4() const
  108. {
  109. return _mm256_castps256_ps128(mValue);
  110. }
  111. Vec4 Vec8::UpperVec4() const
  112. {
  113. return _mm256_extractf128_ps(mValue, 1);
  114. }
  115. float Vec8::ReduceMin() const
  116. {
  117. return Vec4::sMin(LowerVec4(), UpperVec4()).ReduceMin();
  118. }
  119. JPH_NAMESPACE_END