Browse Source

Trying a fixed size virtual stack in thread local memory.

David Piuva 1 year ago
parent
commit
d6981f34c8

+ 72 - 0
Source/DFPSR/base/virtualStack.cpp

@@ -0,0 +1,72 @@
+// zlib open source license
+//
+// Copyright (c) 2024 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "virtualStack.h"
+
+namespace dsr {
+	// How many bytes that are allocated directly in thread local memory.
+	static const size_t DSR_VIRTUAL_STACK_SIZE = 131072;
+
+	// TODO: Allow expanding using recycled heap memory when running out of stack space.
+	// TODO: Align the first allocation in address space from unaligned memory.
+	//       The easiest way would be to allocate memory in reverse order from the end.
+	//       * Subtract the amount of allocated memory from the previous uint8_t pointer.
+	//       * Use the pre-defined alignment mask that has zeroes for the rounded bits in the address.
+	//       * Place the allocation in the aligned location.
+	//       * Store an allocation size integer in front of the allocation to allow freeing.
+	//         The integer stores the total size of the size integer, allocation with padded size and alignment padding.
+	//         Arrays are allowed to access the padded size of all elements to allow optimizations.
+	//       * Store the new stack location pointing at the integer, with a fixed offset from the topmost allocation.
+	//       This would also make it easier to unwind the allocations when freeing memory.
+	//       * Read the integer pointed to and add it to the pointer.
+
+	struct StackMemory {
+		uint8_t data[DSR_VIRTUAL_STACK_SIZE];
+		uint64_t stackLocation = 0;
+		// TODO: Try to store stack locations between the allocations to avoid heap allocations.
+		List<uint64_t> allocationEnds;
+	};
+	thread_local StackMemory virtualStack;
+
+	uint8_t *virtualStack_push(uint64_t paddedSize, uint64_t alignment) {
+		uint64_t oldStackLocation = virtualStack.stackLocation;
+		// Align the start location by rounding up and then add padded elements.
+		uint64_t startOffset = roundUp(virtualStack.stackLocation, alignment);
+		virtualStack.stackLocation = startOffset + paddedSize;
+		if (virtualStack.stackLocation > DSR_VIRTUAL_STACK_SIZE) {
+			throwError(U"Ran out of stack memory!\n"); // TODO: Expand automatically using more memory blocks instead of crashing.
+			return nullptr;
+		} else {
+			virtualStack.allocationEnds.push(oldStackLocation);
+			// Clear the allocation for determinism.
+			std::memset((char*)(virtualStack.data + startOffset), 0, paddedSize);
+			return virtualStack.data + startOffset;
+		}
+	}
+
+	void virtualStack_pop() {
+		virtualStack.stackLocation = virtualStack.allocationEnds.last();
+		virtualStack.allocationEnds.pop();
+	}
+
+}

+ 78 - 0
Source/DFPSR/base/virtualStack.h

@@ -0,0 +1,78 @@
+// zlib open source license
+//
+// Copyright (c) 2024 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "SafePointer.h"
+#include "../api/stringAPI.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <mutex>
+#include <thread>
+
+namespace dsr {
+	// TODO: Make overloaded versions for signed and unsigned integer types.
+	constexpr uint64_t roundUp(uint64_t size, uint64_t alignment) {
+		return size + (alignment - 1) - ((size - 1) % alignment);
+	}
+
+	template <typename T>
+	constexpr uint64_t memory_getPaddedElementSize() {
+		return roundUp((uint64_t)sizeof(T), (uint64_t)alignof(T));
+	}
+
+	// Allocate memory in the virtual stack owned by the current thread.
+	//   paddedSize is the number of bytes to allocate including all elements and internal padding.
+	//   alignment is what the start address should be divisible by in bytes, which must be a power of two.
+	uint8_t *virtualStack_push(uint64_t paddedSize, uint64_t alignment);
+
+	// A simpler way to get the correct alignment is to allocate a number of elements with a specific type.
+	// TODO: Create another function for manual alignment exceeding the type's alignment using another template argument.
+	// TODO: Let the address offset be negated and start with the allocation size going down to zero,
+	//       so that rounding up addresses can be done by simply masking the least significant bits.
+	template <typename T>
+	SafePointer<T> virtualStack_push(uint64_t elementCount, const char *name) {
+		// Calculate element size and multiply by element count to get the total size.
+		uint64_t paddedSize = memory_getPaddedElementSize<T>() * elementCount;
+		// Allocate the data with the amount of alignment requested by the element type T.
+		uint8_t *data = virtualStack_push(paddedSize, (uint64_t)alignof(T));
+		// Return a safe pointer to the allocated data.
+		return SafePointer<T>(name, (T*)data, (intptr_t)paddedSize);
+	}
+
+	// Free the last allocation from the virtual stack.
+	//   Must be called from the same thread that pushed, because virtual stacks are local to their threads.
+	void virtualStack_pop();
+
+	// Allocate this array on the stack to automatically free the memory when the scope ends.
+	//   Replaces VLA or alloca.
+	template <typename T>
+	struct VirtualStackAllocation {
+		SafePointer<T> data;
+		VirtualStackAllocation(uint64_t elementCount)
+		: data(virtualStack_push<T>(elementCount, "virtual stack allocation")) {}
+		~VirtualStackAllocation() {
+			virtualStack_pop();
+		}
+	};
+}

+ 5 - 3
Source/DFPSR/render/renderCore.cpp

@@ -24,6 +24,7 @@
 #include <cassert>
 #include <cassert>
 #include "renderCore.h"
 #include "renderCore.h"
 #include "../image/internal/imageInternal.h"
 #include "../image/internal/imageInternal.h"
+#include "../base/virtualStack.h"
 #include "shader/Shader.h"
 #include "shader/Shader.h"
 #include "shader/RgbaMultiply.h"
 #include "shader/RgbaMultiply.h"
 #include "constants.h"
 #include "constants.h"
@@ -205,10 +206,11 @@ void dsr::executeTriangleDrawing(const TriangleDrawCommand &command, const IRect
 	int32_t rowCount = command.triangle.getBufferSize(finalClipBound, alignX, alignY);
 	int32_t rowCount = command.triangle.getBufferSize(finalClipBound, alignX, alignY);
 	if (rowCount > 0) {
 	if (rowCount > 0) {
 		int startRow;
 		int startRow;
-		RowInterval rows[rowCount];
-		command.triangle.getShape(startRow, rows, finalClipBound, alignX, alignY);
+		// TODO: Use SafePointer in shape functions.
+		VirtualStackAllocation<RowInterval> rows(rowCount);
+		command.triangle.getShape(startRow, rows.data.getUnsafe(), finalClipBound, alignX, alignY);
 		Projection projection = command.triangle.getProjection(command.subB, command.subC, command.perspective);
 		Projection projection = command.triangle.getProjection(command.subB, command.subC, command.perspective);
-		command.processTriangle(command.triangleInput, command.targetImage, command.depthBuffer, command.triangle, projection, RowShape(startRow, rowCount, rows), command.filter);
+		command.processTriangle(command.triangleInput, command.targetImage, command.depthBuffer, command.triangle, projection, RowShape(startRow, rowCount, rows.data.getUnsafe()), command.filter);
 		#ifdef SHOW_POST_CLIPPING_WIREFRAME
 		#ifdef SHOW_POST_CLIPPING_WIREFRAME
 			drawWireframe(command.targetImage, command.triangle);
 			drawWireframe(command.targetImage, command.triangle);
 		#endif
 		#endif