2 tahun lalu · b8a5c98b79
--- a/Source/DFPSR/base/SafePointer.h
+++ b/Source/DFPSR/base/SafePointer.h
@@ -109,7 +109,9 @@ public:
 
				 	inline bool isNotNull() const {

			
 
				 		return this->data != nullptr;

			
 
				 	}

			
 
				-	// Get a new safe pointer from data to data + size

			
 
				+	// Get a new safe pointer from a sub-set of data

			
 
				+	//  byteOffset is which byte in the source will be index zero in the new pointer

			
 
				+	//  size is the new pointer's size, which may not exceed the remaining available space

			
 
				 	inline SafePointer<T> slice(const char* name, int byteOffset, int size) {

			
 
				 		T *newStart = (T*)(((uint8_t*)(this->data)) + (intptr_t)byteOffset);

			
 
				 		#ifdef SAFE_POINTER_CHECKS

			
--- a/Source/soundManagers/AlsaSound.cpp
+++ b/Source/soundManagers/AlsaSound.cpp
@@ -11,12 +11,16 @@ using namespace dsr;
 
				 

			
 
				 snd_pcm_t *pcm = nullptr;

			
 
				 static int bufferElements = 0;

			
 
				-static int16_t *outputBuffer = nullptr;

			
 
				-static float *floatBuffer = nullptr;

			
 
				+static Buffer outputBuffer, floatBuffer;

			
 
				+static SafePointer<int16_t> outputData;

			
 
				+static SafePointer<float> floatData;

			
 
				+

			
 
				 static void allocateBuffers(int neededElements) {

			
 
				-	// TODO: Use aligned memory with Buffer

			
 
				-	outputBuffer = (int16_t *)calloc(roundUp(neededElements, 8), sizeof(int16_t));

			
 
				-	floatBuffer = (float *)calloc(roundUp(neededElements, 8), sizeof(float));

			
 
				+	int64_t roundedElements = roundUp(neededElements, 8); // Using the same padding for both allow loading two whole SIMD vectors for large input and writing a single output vector.

			
 
				+	outputBuffer = buffer_create(roundedElements * sizeof(int16_t));

			
 
				+	floatBuffer = buffer_create(roundedElements * sizeof(float));

			
 
				+	outputData = buffer_getSafeData<int16_t>(outputBuffer, "Output data");

			
 
				+	floatData = buffer_getSafeData<float>(floatBuffer, "Output data");

			
 
				 	bufferElements = neededElements;

			
 
				 }

			
 
				 

			
@@ -27,8 +31,6 @@ static void terminateSound() {
 
				 		snd_pcm_close(pcm);

			
 
				 		pcm = nullptr;

			
 
				 	}

			
 
				-	if (outputBuffer) { free(outputBuffer); }

			
 
				-	if (floatBuffer) { free(floatBuffer); }

			
 
				 }

			
 
				 

			
 
				 bool sound_streamToSpeakers(int channels, int sampleRate, std::function<bool(float*, int)> soundOutput) {

			
@@ -72,14 +74,41 @@ bool sound_streamToSpeakers(int channels, int sampleRate, std::function<bool(flo
 
				 	int totalSamples = samplesPerChannel * channels;

			
 
				 	allocateBuffers(totalSamples);

			
 
				 	while (true) {

			
 
				-		memset(floatBuffer, 0, totalSamples * sizeof(float));

			
 
				-		bool keepRunning = soundOutput(floatBuffer, samplesPerChannel);

			
 
				+		safeMemorySet(floatData, 0, totalSamples * sizeof(float));

			
 
				+		//memset(floatBuffer, 0, totalSamples * sizeof(float));

			
 
				+		bool keepRunning = soundOutput(floatData.getUnsafe(), samplesPerChannel);

			
 
				 		// Convert to target format so that the sound can be played

			
 
				-		// TODO: Use SIMD

			
 
				-		for (uint32_t t = 0; t < samplesPerChannel * channels; t++) {

			
 
				-			outputBuffer[t] = sound_convertF32ToI16(floatBuffer[t]);

			
 
				+		for (uint32_t t = 0; t < samplesPerChannel * channels; t+=8) {

			
 
				+			// SIMD vectorized sound conversion with scaling and clamping to signed 16-bit integers.

			
 
				+			F32x4 lowerFloats = F32x4::readAligned(floatData + t, "sound_streamToSpeakers: Reading lower floats");

			
 
				+			F32x4 upperFloats = F32x4::readAligned(floatData + t + 4, "sound_streamToSpeakers: Reading upper floats");

			
 
				+			I32x4 lowerInts = truncateToI32((lowerFloats * 32767.0f).clamp(-32768.0f, 32767.0f));

			
 
				+			I32x4 upperInts = truncateToI32((upperFloats * 32767.0f).clamp(-32768.0f, 32767.0f));

			
 
				+			// TODO: Create I16x8 SIMD vectors for processing sound as 16-bit integers?

			
 
				+			//       Or just move unzip into simd.h with a fallback solution and remove simdExtra.h.

			
 
				+			//       Or just implement reading and writing of 16-bit signed integers using multiple SIMD registers or smaller memory regions.

			
 
				+			IVector4D lower = lowerInts.get();

			
 
				+			IVector4D upper = upperInts.get();

			
 
				+			outputData[t+0] = (int16_t)lower.x;

			
 
				+			outputData[t+1] = (int16_t)lower.y;

			
 
				+			outputData[t+2] = (int16_t)lower.z;

			
 
				+			outputData[t+3] = (int16_t)lower.w;

			
 
				+			outputData[t+4] = (int16_t)upper.x;

			
 
				+			outputData[t+5] = (int16_t)upper.y;

			
 
				+			outputData[t+6] = (int16_t)upper.z;

			
 
				+			outputData[t+7] = (int16_t)upper.w;

			
 
				+			/* Reference implementation without SIMD

			
 
				+			outputData[t+0] = sound_convertF32ToI16(floatData[t+0]);

			
 
				+			outputData[t+1] = sound_convertF32ToI16(floatData[t+1]);

			
 
				+			outputData[t+2] = sound_convertF32ToI16(floatData[t+2]);

			
 
				+			outputData[t+3] = sound_convertF32ToI16(floatData[t+3]);

			
 
				+			outputData[t+4] = sound_convertF32ToI16(floatData[t+4]);

			
 
				+			outputData[t+5] = sound_convertF32ToI16(floatData[t+5]);

			
 
				+			outputData[t+6] = sound_convertF32ToI16(floatData[t+6]);

			
 
				+			outputData[t+7] = sound_convertF32ToI16(floatData[t+7]);

			
 
				+			*/

			
 
				 		}

			
 
				-		errorCode = snd_pcm_writei(pcm, outputBuffer, samplesPerChannel);

			
 
				+		errorCode = snd_pcm_writei(pcm, outputData.getUnsafe(), samplesPerChannel);

			
 
				 		if (errorCode == -EPIPE) {

			
 
				 			// Came too late! Not enough written samples to play.

			
 
				 			snd_pcm_prepare(pcm);

			
--- a/Source/soundManagers/soundManagers.h
+++ b/Source/soundManagers/soundManagers.h
@@ -16,6 +16,8 @@ inline int sound_convertF32ToI16(float input) {
 
				 	return result;

			
 
				 }

			
 
				 

			
 
				+// TODO: The float array should be padded to at least 16 bytes for 128-bit SIMD.

			
 
				+

			
 
				 // Call this function from a separate thread in a sound engine to initialize the sound system, call back with sound output requests and terminate when the callback returns false.

			
 
				 // The float array given to soundOutput should be filled with samples from 0 to totalSamples - 1.

			
 
				 // Channels from the same point in time are packed together without any padding in between.