Pārlūkot izejas kodu

Using the virtual stack and doubling the stack memory.

David Piuva 1 gadu atpakaļ
vecāks
revīzija
935942e0cd

+ 3 - 2
Source/DFPSR/api/fileAPI.cpp

@@ -43,6 +43,7 @@
 #include <fstream>
 #include <cstdlib>
 #include "bufferAPI.h"
+#include "../base/virtualStack.h"
 
 namespace dsr {
 
@@ -775,7 +776,7 @@ DsrProcess process_execute(const ReadableString& programPath, List<String> argum
 		// Count arguments.
 		int argc = arguments.length() + 1;
 		// Allocate an array of pointers for each argument and a null terminator.
-		const NativeChar *argv[argc + 1]; // TODO: Implement without VLA.
+		VirtualStackAllocation<const NativeChar *> argv(argc + 1);
 		// Fill the array with pointers to the native strings.
 		int64_t startOffset = 0;
 		int currentArg = 0;
@@ -788,7 +789,7 @@ DsrProcess process_execute(const ReadableString& programPath, List<String> argum
 		}
 		argv[currentArg] = nullptr;
 		pid_t pid = 0;
-		if (posix_spawn(&pid, nativePath, nullptr, nullptr, (char* const*)argv, environ) == 0) {
+		if (posix_spawn(&pid, nativePath, nullptr, nullptr, (char**)argv.getUnsafe(), environ) == 0) {
 			return std::make_shared<DsrProcessImpl>(pid); // Success
 		} else {
 			return DsrProcess(); // Failure

+ 2 - 1
Source/DFPSR/api/modelAPI.cpp

@@ -28,6 +28,7 @@
 #include "drawAPI.h"
 #include "../render/model/Model.h"
 #include <limits>
+#include "../base/virtualStack.h"
 
 #define MUST_EXIST(OBJECT, METHOD) if (OBJECT.get() == nullptr) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
 
@@ -469,7 +470,7 @@ struct RendererImpl {
 	// Occlusion test for whole model bounds.
 	// Returns false if the convex hull of the corners has a chance to be seen from the camera.
 	bool isHullOccluded(ProjectedPoint* outputHullCorners, const FVector3D* inputHullCorners, int cornerCount, const Transform3D &modelToWorldTransform, const Camera &camera) {
-		FVector3D cameraPoints[cornerCount];
+		VirtualStackAllocation<FVector3D> cameraPoints(cornerCount);
 		for (int p = 0; p < cornerCount; p++) {
 			cameraPoints[p] = camera.worldToCamera(modelToWorldTransform.transformPoint(inputHullCorners[p]));
 			outputHullCorners[p] = camera.cameraToScreen(cameraPoints[p]);

+ 9 - 4
Source/DFPSR/base/threading.cpp

@@ -22,6 +22,7 @@
 //    distribution.
 
 #include "threading.h"
+#include "virtualStack.h"
 
 // Requires -pthread for linking
 #include <future>
@@ -61,8 +62,8 @@ void threadedWorkFromArray(std::function<void()>* jobs, int jobCount) {
 				// Multi-threaded work loop
 				int workerCount = std::min((int)std::thread::hardware_concurrency() - 1, jobCount); // All used threads
 				int helperCount = workerCount - 1; // Excluding the main thread
-				std::function<void()> workers[workerCount];
-				std::future<void> helpers[helperCount];
+				VirtualStackAllocation<std::function<void()>> workers(workerCount);
+				VirtualStackAllocation<std::future<void>> helpers(helperCount);
 				for (int w = 0; w < workerCount; w++) {
 					workers[w] = [jobs, jobCount]() {
 						while (true) {
@@ -95,6 +96,10 @@ void threadedWorkFromArray(std::function<void()>* jobs, int jobCount) {
 	#endif
 }
 
+void threadedWorkFromArray(SafePointer<std::function<void()>> jobs, int jobCount) {
+	threadedWorkFromArray(jobs.getUnsafe(), jobCount);
+}
+
 void threadedWorkFromList(List<std::function<void()>> jobs) {
 	threadedWorkFromArray(&jobs[0], jobs.length());
 	jobs.clear();
@@ -111,7 +116,7 @@ void threadedSplit(int startIndex, int stopIndex, std::function<void(int startIn
 		task(startIndex, stopIndex);
 	} else {
 		// Use multiple threads
-		std::function<void()> jobs[jobCount];
+		VirtualStackAllocation<std::function<void()>> jobs(jobCount);
 		int givenRow = startIndex;
 		for (int s = 0; s < jobCount; s++) {
 			int remainingJobs = jobCount - s;
@@ -142,7 +147,7 @@ void threadedSplit(const IRect& bound, std::function<void(const IRect& bound)> t
 		task(bound);
 	} else {
 		// Use multiple threads
-		std::function<void()> jobs[jobCount];
+		VirtualStackAllocation<std::function<void()>> jobs(jobCount);
 		int givenRow = bound.top();
 		for (int s = 0; s < jobCount; s++) {
 			int remainingJobs = jobCount - s;

+ 1 - 0
Source/DFPSR/base/threading.h

@@ -31,6 +31,7 @@
 namespace dsr {
 
 // Executes every function in the array of jobs from jobs[0] to jobs[jobCount - 1].
+void threadedWorkFromArray(SafePointer<std::function<void()>> jobs, int jobCount);
 void threadedWorkFromArray(std::function<void()>* jobs, int jobCount);
 
 // Executes every function in the list of jobs.

+ 2 - 2
Source/DFPSR/base/virtualStack.cpp

@@ -34,7 +34,7 @@ namespace dsr {
 	};
 
 	// How many bytes that are allocated directly in thread local memory.
-	static const size_t VIRTUAL_STACK_SIZE = 131072;
+	static const size_t VIRTUAL_STACK_SIZE = 262144;
 	// How many bytes are reserved for the head.
 	static const size_t ALLOCATION_HEAD_SIZE = memory_getPaddedSize<StackAllocationHeader>();
 	
@@ -91,7 +91,7 @@ namespace dsr {
 		// Check that we did not run out of memory.
 		if (virtualStack.top < virtualStack.data) {
 			// TODO: Expand automatically using heap memory instead of crashing.
-			throwError(U"Ran out of stack memory to allocate!\n");
+			throwError(U"Ran out of virtual stack memory to allocate when trying to allocate ", paddedSize, U" bytes!\n");
 			virtualStack.top = oldTop;
 			return nullptr;
 		} else {

+ 5 - 4
Source/DFPSR/render/model/Model.cpp

@@ -28,6 +28,7 @@
 #include "../../api/imageAPI.h"
 #include "../../image/ImageRgbaU8.h"
 #include "../../image/ImageF32.h"
+#include "../../base/virtualStack.h"
 
 using namespace dsr;
 
@@ -189,12 +190,12 @@ void ModelImpl::render(CommandQueue *commandQueue, ImageRgbaU8& targetImage, Ima
 	if (camera.isBoxSeen(this->minBound, this->maxBound, modelToWorldTransform)) {
 		// Transform and project all vertices
 		int positionCount = positionBuffer.length();
-		ProjectedPoint projected[positionCount]; // TODO: Only use stack memory with VLA when the number of points is resonable
+		VirtualStackAllocation<ProjectedPoint> projected(positionCount);
 		for (int vert = 0; vert < positionCount; vert++) {
 			projected[vert] = camera.worldToScreen(modelToWorldTransform.transformPoint(positionBuffer[vert]));
 		}
 		for (int partIndex = 0; partIndex < this->partBuffer.length(); partIndex++) {
-			this->partBuffer[partIndex].render(commandQueue, targetImage, depthBuffer, modelToWorldTransform, camera, this->filter, projected);
+			this->partBuffer[partIndex].render(commandQueue, targetImage, depthBuffer, modelToWorldTransform, camera, this->filter, projected.getUnsafe());
 		}
 	}
 }
@@ -203,12 +204,12 @@ void ModelImpl::renderDepth(ImageF32& depthBuffer, const Transform3D &modelToWor
 	if (camera.isBoxSeen(this->minBound, this->maxBound, modelToWorldTransform)) {
 		// Transform and project all vertices
 		int positionCount = positionBuffer.length();
-		ProjectedPoint projected[positionCount]; // TODO: Only use stack memory with VLA when the number of points is resonable
+		VirtualStackAllocation<ProjectedPoint> projected(positionCount);
 		for (int vert = 0; vert < positionCount; vert++) {
 			projected[vert] = camera.worldToScreen(modelToWorldTransform.transformPoint(positionBuffer[vert]));
 		}
 		for (int partIndex = 0; partIndex < this->partBuffer.length(); partIndex++) {
-			this->partBuffer[partIndex].renderDepth(depthBuffer, modelToWorldTransform, camera, projected);
+			this->partBuffer[partIndex].renderDepth(depthBuffer, modelToWorldTransform, camera, projected.getUnsafe());
 		}
 	}
 }

+ 2 - 2
Source/DFPSR/render/renderCore.cpp

@@ -173,7 +173,7 @@ public:
 Visibility dsr::getTriangleVisibility(const ITriangle2D &triangle, const Camera &camera, bool clipFrustum) {
 	static const int cornerCount = 3;
 	int planeCount = camera.getFrustumPlaneCount(clipFrustum);
-	bool outside[cornerCount * planeCount];
+	VirtualStackAllocation<bool> outside(cornerCount * planeCount);
 	// Check which corners are outside of the different planes
 	int offset = 0;
 	for (int c = 0; c < cornerCount; c++) {
@@ -438,7 +438,7 @@ void CommandQueue::execute(const IRect &clipBound, int jobCount) const {
 			}
 		}
 	} else {
-		std::function<void()> jobs[jobCount];
+		VirtualStackAllocation<std::function<void()>> jobs(jobCount);
 		int y1 = clipBound.top();
 		for (int j = 0; j < jobCount; j++) {
 			int y2 = clipBound.top() + ((clipBound.bottom() * (j + 1)) / jobCount);

+ 1 - 1
Source/tools/builder/buildProject.bat

@@ -27,7 +27,7 @@ set BUILDER_EXECUTABLE=%BUILDER_FOLDER%builder.exe
 echo BUILDER_EXECUTABLE = %BUILDER_EXECUTABLE%
 set DFPSR_LIBRARY=%BUILDER_FOLDER%..\..\DFPSR
 echo DFPSR_LIBRARY = %DFPSR_LIBRARY%
-set BUILDER_SOURCE=%BUILDER_FOLDER%\code\main.cpp %BUILDER_FOLDER%\code\Machine.cpp %BUILDER_FOLDER%\code\generator.cpp %BUILDER_FOLDER%\code\analyzer.cpp %BUILDER_FOLDER%\code\expression.cpp %DFPSR_LIBRARY%\collection\collections.cpp %DFPSR_LIBRARY%\api\fileAPI.cpp %DFPSR_LIBRARY%\api\bufferAPI.cpp %DFPSR_LIBRARY%\api\stringAPI.cpp %DFPSR_LIBRARY%\api\timeAPI.cpp %DFPSR_LIBRARY%\base\SafePointer.cpp
+set BUILDER_SOURCE=%BUILDER_FOLDER%\code\main.cpp %BUILDER_FOLDER%\code\Machine.cpp %BUILDER_FOLDER%\code\generator.cpp %BUILDER_FOLDER%\code\analyzer.cpp %BUILDER_FOLDER%\code\expression.cpp %DFPSR_LIBRARY%\collection\collections.cpp %DFPSR_LIBRARY%\api\fileAPI.cpp %DFPSR_LIBRARY%\api\bufferAPI.cpp %DFPSR_LIBRARY%\api\stringAPI.cpp %DFPSR_LIBRARY%\api\timeAPI.cpp %DFPSR_LIBRARY%\base\SafePointer.cpp %DFPSR_LIBRARY%\base\virtualStack.cpp
 echo BUILDER_SOURCE = %BUILDER_SOURCE%
 
 echo Change CPP_COMPILER_FOLDER and CPP_COMPILER_PATH in %BUILDER_FOLDER%\buildProject.bat if you are not using %CPP_COMPILER_PATH% as your compiler.

+ 1 - 1
Source/tools/builder/buildProject.sh

@@ -41,7 +41,7 @@ if [ -e "${BUILDER_EXECUTABLE}" ]; then
 else
 	echo "Building the Builder build system for first time use."
 	LIBRARY_PATH="$(realpath ${BUILDER_FOLDER}/../../DFPSR)"
-	SOURCE_CODE="${BUILDER_FOLDER}/code/main.cpp ${BUILDER_FOLDER}/code/Machine.cpp ${BUILDER_FOLDER}/code/generator.cpp ${BUILDER_FOLDER}/code/analyzer.cpp ${BUILDER_FOLDER}/code/expression.cpp ${LIBRARY_PATH}/collection/collections.cpp ${LIBRARY_PATH}/api/fileAPI.cpp ${LIBRARY_PATH}/api/bufferAPI.cpp ${LIBRARY_PATH}/api/stringAPI.cpp ${LIBRARY_PATH}/api/timeAPI.cpp ${LIBRARY_PATH}/base/SafePointer.cpp"
+	SOURCE_CODE="${BUILDER_FOLDER}/code/main.cpp ${BUILDER_FOLDER}/code/Machine.cpp ${BUILDER_FOLDER}/code/generator.cpp ${BUILDER_FOLDER}/code/analyzer.cpp ${BUILDER_FOLDER}/code/expression.cpp ${LIBRARY_PATH}/collection/collections.cpp ${LIBRARY_PATH}/api/fileAPI.cpp ${LIBRARY_PATH}/api/bufferAPI.cpp ${LIBRARY_PATH}/api/stringAPI.cpp ${LIBRARY_PATH}/api/timeAPI.cpp ${LIBRARY_PATH}/base/SafePointer.cpp ${LIBRARY_PATH}/base/virtualStack.cpp"
 	"${CPP_COMPILER_PATH}" -o "${BUILDER_EXECUTABLE}" ${SOURCE_CODE} -std=c++14 -lstdc++
 	if [ $? -eq 0 ]; then
 		echo "Completed building the Builder build system."