Browse Source

Partially vectorized the WinMM audio backend.

David Piuva 2 years ago
parent
commit
7c4bdda3c6
2 changed files with 39 additions and 18 deletions
  1. 0 1
      Source/soundManagers/AlsaSound.cpp
  2. 39 17
      Source/soundManagers/WinMMSound.cpp

+ 0 - 1
Source/soundManagers/AlsaSound.cpp

@@ -75,7 +75,6 @@ bool sound_streamToSpeakers(int channels, int sampleRate, std::function<bool(flo
 	allocateBuffers(totalSamples);
 	while (true) {
 		safeMemorySet(floatData, 0, totalSamples * sizeof(float));
-		//memset(floatBuffer, 0, totalSamples * sizeof(float));
 		bool keepRunning = soundOutput(floatData.getUnsafe(), samplesPerChannel);
 		// Convert to target format so that the sound can be played
 		for (uint32_t t = 0; t < samplesPerChannel * channels; t+=8) {

+ 39 - 17
Source/soundManagers/WinMMSound.cpp

@@ -11,13 +11,19 @@ using namespace dsr;
 static const int samplesPerChannel = 2048;
 
 static int bufferElements = 0;
-static int16_t *outputBuffer[2] = {nullptr, nullptr};
-static float *floatBuffer = nullptr;
+static Buffer outputBuffer, floatBuffer;
+static SafePointer<int16_t> outputData[2];
+static SafePointer<float> floatData;
+
 static void allocateBuffers(int neededElements) {
-	// TODO: Use aligned memory with Buffer
-	outputBuffer[0] = (int16_t *)calloc(roundUp(neededElements, 8), sizeof(int16_t));
-	outputBuffer[1] = (int16_t *)calloc(roundUp(neededElements, 8), sizeof(int16_t));
-	floatBuffer = (float *)calloc(roundUp(neededElements, 8), sizeof(float));
+	int64_t roundedElements = roundUp(neededElements, 8); // Using the same padding for both allow loading two whole SIMD vectors for large input and writing a single output vector.
+	int64_t outputSize = roundedElements * sizeof(int16_t);
+	outputBuffer = buffer_create(outputSize * 2);
+	floatBuffer = buffer_create(roundedElements * sizeof(float));
+	SafePointer<int16_t> allOutputData = buffer_getSafeData<int16_t>(outputBuffer, "Output data");
+	outputData[0] = allOutputData.slice("Output data 0", 0, outputSize);
+	outputData[1] = allOutputData.slice("Output data 1", outputSize, outputSize);
+	floatData = buffer_getSafeData<float>(floatBuffer, "Output data");
 	bufferElements = neededElements;
 }
 
@@ -41,10 +47,6 @@ static void terminateSound() {
 		CloseHandle(bufferEndEvent);
 		bufferEndEvent = 0;
 	}
-	for (int b = 0; b < 2; b++) {
-		if (outputBuffer[b]) { free(outputBuffer[b]); }
-	}
-	if (floatBuffer) { free(floatBuffer); }
 }
 
 bool sound_streamToSpeakers(int channels, int sampleRate, std::function<bool(float*, int)> soundOutput) {
@@ -75,7 +77,7 @@ bool sound_streamToSpeakers(int channels, int sampleRate, std::function<bool(flo
 	for (int b = 0; b < 2; b++) {
 		ZeroMemory(&header[b], sizeof(WAVEHDR));
 		header[b].dwBufferLength = totalSamples * sizeof(int16_t);
-		header[b].lpData = (LPSTR)(outputBuffer[b]);
+		header[b].lpData = (LPSTR)(outputData[b].getUnsafe());
 		if (waveOutPrepareHeader(waveOutput, &header[b], sizeof(WAVEHDR)) != MMSYSERR_NOERROR) {
 			terminateSound();
 			throwError(U"Failed to prepare buffer for streaming!");
@@ -86,12 +88,32 @@ bool sound_streamToSpeakers(int channels, int sampleRate, std::function<bool(flo
 		for (int b = 0; b < 2; b++) {
 			if ((header[b].dwFlags & WHDR_INQUEUE) == 0) {
 				// When one of the buffers is done playing, generate new sound and write more data to the output.
-				memset(floatBuffer, 0, totalSamples * sizeof(float));
-				// TODO: Use 128-bit aligned memory
-				running = soundOutput(floatBuffer, samplesPerChannel);
-				// TODO: Use SIMD
-				for (int i = 0; i < totalSamples; i++) {
-					outputBuffer[b][i] = sound_convertF32ToI16(floatBuffer[i]);
+				safeMemorySet(floatData, 0, totalSamples * sizeof(float));
+				running = soundOutput(floatData.getUnsafe(), samplesPerChannel);
+				//for (int i = 0; i < totalSamples; i++) {
+				//	outputData[b][i] = sound_convertF32ToI16(floatBuffer[i]);
+				//}
+				SafePointer<int16_t> target = outputData[b];
+				// Convert to target format so that the sound can be played
+				for (uint32_t t = 0; t < totalSamples; t+=8) {
+					// SIMD vectorized sound conversion with scaling and clamping to signed 16-bit integers.
+					F32x4 lowerFloats = F32x4::readAligned(floatData + t, "sound_streamToSpeakers: Reading lower floats");
+					F32x4 upperFloats = F32x4::readAligned(floatData + t + 4, "sound_streamToSpeakers: Reading upper floats");
+					I32x4 lowerInts = truncateToI32((lowerFloats * 32767.0f).clamp(-32768.0f, 32767.0f));
+					I32x4 upperInts = truncateToI32((upperFloats * 32767.0f).clamp(-32768.0f, 32767.0f));
+					// TODO: Create I16x8 SIMD vectors for processing sound as 16-bit integers?
+					//       Or just move unzip into simd.h with a fallback solution and remove simdExtra.h.
+					//       Or just implement reading and writing of 16-bit signed integers using multiple SIMD registers or smaller memory regions.
+					IVector4D lower = lowerInts.get();
+					IVector4D upper = upperInts.get();
+					target[t+0] = (int16_t)lower.x;
+					target[t+1] = (int16_t)lower.y;
+					target[t+2] = (int16_t)lower.z;
+					target[t+3] = (int16_t)lower.w;
+					target[t+4] = (int16_t)upper.x;
+					target[t+5] = (int16_t)upper.y;
+					target[t+6] = (int16_t)upper.z;
+					target[t+7] = (int16_t)upper.w;
 				}
 				if (waveOutWrite(waveOutput, &header[b], sizeof(WAVEHDR)) != MMSYSERR_NOERROR) {
 					terminateSound(); throwError(U"Failed to write wave output!");