mixer_neon.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #include "config.h"
  2. #ifdef HAVE_ARM_NEON_H
  3. #include <arm_neon.h>
  4. #endif
  5. #include "AL/al.h"
  6. #include "AL/alc.h"
  7. #include "alMain.h"
  8. #include "alu.h"
  9. static inline void ApplyCoeffsStep(const ALuint IrSize,
  10. ALfloat (*restrict Coeffs)[2],
  11. const ALfloat (*restrict CoeffStep)[2])
  12. {
  13. float32x4_t coeffs, deltas;
  14. ALuint c;
  15. for(c = 0;c < IrSize;c += 2)
  16. {
  17. coeffs = vld1q_f32(&Coeffs[c][0]);
  18. deltas = vld1q_f32(&CoeffStep[c][0]);
  19. coeffs = vaddq_f32(coeffs, deltas);
  20. vst1q_f32(&Coeffs[c][0], coeffs);
  21. }
  22. }
  23. static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
  24. const ALuint IrSize,
  25. ALfloat (*restrict Coeffs)[2],
  26. ALfloat left, ALfloat right)
  27. {
  28. ALuint c;
  29. float32x4_t leftright4;
  30. {
  31. float32x2_t leftright2 = vdup_n_f32(0.0);
  32. leftright2 = vset_lane_f32(left, leftright2, 0);
  33. leftright2 = vset_lane_f32(right, leftright2, 1);
  34. leftright4 = vcombine_f32(leftright2, leftright2);
  35. }
  36. for(c = 0;c < IrSize;c += 2)
  37. {
  38. const ALuint o0 = (Offset+c)&HRIR_MASK;
  39. const ALuint o1 = (o0+1)&HRIR_MASK;
  40. float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
  41. vld1_f32((float32_t*)&Values[o1][0]));
  42. float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
  43. vals = vmlaq_f32(vals, coefs, leftright4);
  44. vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
  45. vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
  46. }
  47. }
  48. #define SUFFIX Neon
  49. #include "mixer_inc.c"
  50. #undef SUFFIX
  51. void MixDirect_Neon(const DirectParams *params, const ALfloat *restrict data, ALuint srcchan,
  52. ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
  53. {
  54. ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
  55. ALfloat *restrict ClickRemoval = params->ClickRemoval;
  56. ALfloat *restrict PendingClicks = params->PendingClicks;
  57. ALfloat DrySend;
  58. float32x4_t gain;
  59. ALuint pos;
  60. ALuint c;
  61. for(c = 0;c < MaxChannels;c++)
  62. {
  63. DrySend = params->Gains[srcchan][c];
  64. if(!(DrySend > GAIN_SILENCE_THRESHOLD))
  65. continue;
  66. if(OutPos == 0)
  67. ClickRemoval[c] -= data[0]*DrySend;
  68. gain = vdupq_n_f32(DrySend);
  69. for(pos = 0;BufferSize-pos > 3;pos += 4)
  70. {
  71. const float32x4_t val4 = vld1q_f32(&data[pos]);
  72. float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
  73. dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain));
  74. vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
  75. }
  76. for(;pos < BufferSize;pos++)
  77. OutBuffer[c][OutPos+pos] += data[pos]*DrySend;
  78. if(OutPos+pos == SamplesToDo)
  79. PendingClicks[c] += data[pos]*DrySend;
  80. }
  81. }
  82. void MixSend_Neon(const SendParams *params, const ALfloat *restrict data,
  83. ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
  84. {
  85. ALfloat (*restrict OutBuffer)[BUFFERSIZE] = params->OutBuffer;
  86. ALfloat *restrict ClickRemoval = params->ClickRemoval;
  87. ALfloat *restrict PendingClicks = params->PendingClicks;
  88. ALfloat WetGain;
  89. float32x4_t gain;
  90. ALuint pos;
  91. WetGain = params->Gain;
  92. if(!(WetGain > GAIN_SILENCE_THRESHOLD))
  93. return;
  94. if(OutPos == 0)
  95. ClickRemoval[0] -= data[0] * WetGain;
  96. gain = vdupq_n_f32(WetGain);
  97. for(pos = 0;BufferSize-pos > 3;pos += 4)
  98. {
  99. const float32x4_t val4 = vld1q_f32(&data[pos]);
  100. float32x4_t wet4 = vld1q_f32(&OutBuffer[0][OutPos+pos]);
  101. wet4 = vaddq_f32(wet4, vmulq_f32(val4, gain));
  102. vst1q_f32(&OutBuffer[0][OutPos+pos], wet4);
  103. }
  104. for(;pos < BufferSize;pos++)
  105. OutBuffer[0][OutPos+pos] += data[pos] * WetGain;
  106. if(OutPos+pos == SamplesToDo)
  107. PendingClicks[0] += data[pos] * WetGain;
  108. }