Browse Source

Implemented a real tokenizer for Builder.

David Piuva 3 years ago
parent
commit
8efa0c1079

+ 155 - 82
Source/tools/builder/code/Machine.cpp

@@ -9,6 +9,59 @@ using namespace dsr;
 #define INTEGER_EXPR(FIRST_TOKEN, LAST_TOKEN) expression_interpretAsInteger(STRING_EXPR(FIRST_TOKEN, LAST_TOKEN))
 #define PATH_EXPR(FIRST_TOKEN, LAST_TOKEN) file_getTheoreticalAbsolutePath(STRING_EXPR(FIRST_TOKEN, LAST_TOKEN), fromPath)
 
+static bool isUnique(const List<String> &list) {
+	for (int i = 0; i < list.length() - 1; i++) {
+		for (int j = i + 1; j < list.length(); j++) {
+			if (string_match(list[i], list[j])) {
+				return false;
+			}
+		}
+	}
+	return true;
+}
+
+static bool isUnique(const List<Flag> &list) {
+	for (int i = 0; i < list.length() - 1; i++) {
+		for (int j = i + 1; j < list.length(); j++) {
+			if (string_match(list[i].key, list[j].key)) {
+				return false;
+			}
+		}
+	}
+	return true;
+}
+
+void printSettings(const Machine &settings) {
+	printText(U"    Project name: ", settings.projectName, U"\n");
+	for (int64_t i = 0; i < settings.compilerFlags.length(); i++) {
+		printText(U"    Compiler flag ", settings.compilerFlags[i], U"\n");
+	}
+	for (int64_t i = 0; i < settings.linkerFlags.length(); i++) {
+		printText(U"    Linker flag ", settings.linkerFlags[i], U"\n");
+	}
+	for (int64_t i = 0; i < settings.variables.length(); i++) {
+		printText(U"    Variable ", settings.variables[i].key, U" = ", settings.variables[i].value, U"\n");
+	}
+}
+
+void validateSettings(const Machine &settings, const dsr::ReadableString &eventDescription) {
+	if (!isUnique(settings.compilerFlags)) {
+		printText(U"Duplicate compiler flags:\n");
+		printSettings(settings);
+		throwError(U"Found duplicate compiler flags ", eventDescription, U"!\n");
+	};
+	if (!isUnique(settings.linkerFlags)) {
+		printText(U"Duplicate linker flags:\n");
+		printSettings(settings);
+		throwError(U"Found duplicate linker flags ", eventDescription, U"!\n");
+	};
+	if (!isUnique(settings.variables)) {
+		printText(U"Duplicate variables:\n");
+		printSettings(settings);
+		throwError(U"Found duplicate variables ", eventDescription, U"!\n");
+	};
+}
+
 int64_t findFlag(const Machine &target, const dsr::ReadableString &key) {
 	for (int64_t f = 0; f < target.variables.length(); f++) {
 		if (string_caseInsensitiveMatch(key, target.variables[f].key)) {
@@ -48,21 +101,19 @@ void assignValue(Machine &target, const dsr::ReadableString &key, const dsr::Rea
 	}
 }
 
-static void flushToken(List<String> &targetTokens, String &currentToken) {
-	if (string_length(currentToken) > 0) {
-		targetTokens.push(currentToken);
-		currentToken = U"";
+static String evaluateExpression(Machine &target, const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex) {
+	for (int64_t t = startTokenIndex; t <= endTokenIndex; t++) {
+		if (string_match(tokens[t], U"\n")) {
+			throwError(U"Found a linebreak inside of an expression!");
+		}
 	}
-}
-
-static String evaluateExpression(Machine &target, List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex) {
 	return expression_evaluate(tokens, startTokenIndex, endTokenIndex, [&target](ReadableString identifier) -> String {
 		return getFlag(target, identifier, U"");
 	});
 }
 
 // Copy inherited variables from parent to child.
-static void inheritMachine(Machine &child, const Machine &parent) {
+void inheritMachine(Machine &child, const Machine &parent) {
 	for (int64_t v = 0; v < parent.variables.length(); v++) {
 		String key = string_upperCase(parent.variables[v].key);
 		if (parent.variables[v].inherited) {
@@ -71,30 +122,48 @@ static void inheritMachine(Machine &child, const Machine &parent) {
 	}
 }
 
-static void interpretLine(SessionContext &output, Machine &target, List<String> &tokens, const dsr::ReadableString &fromPath) {
-	if (tokens.length() > 0) {
+static bool validIdentifier(const dsr::ReadableString &identifier) {
+	DsrChar first = identifier[0];
+	if (!((U'a' <= first && first <= U'z') || (U'A' <= first && first <= U'Z'))) {
+		return false;
+	}
+	for (int i = 1; i < string_length(identifier); i++) {
+		DsrChar current = identifier[i];
+		if (!((U'a' <= current && current <= U'z') || (U'A' <= current && current <= U'Z') || (U'0' <= current && current <= U'9'))) {
+			return false;
+		}
+	}
+	return true;
+}
+
+static void interpretLine(Machine &target, const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, const dsr::ReadableString &fromPath) {
+	// Automatically clamp to safe bounds.
+	if (startTokenIndex < 0) startTokenIndex = 0;
+	if (endTokenIndex >= tokens.length()) endTokenIndex = tokens.length() - 1;
+	int64_t tokenCount = endTokenIndex - startTokenIndex + 1;
+	if (tokenCount > 0) {
 		bool activeLine = target.activeStackDepth >= target.currentStackDepth;
 		/*
 		printText(activeLine ? U"interpret:" : U"ignore:");
-		for (int64_t t = 0; t < tokens.length(); t++) {
+		for (int64_t t = startTokenIndex; t <= endTokenIndex; t++) {
 			printText(U" [", tokens[t], U"]");
 		}
 		printText(U"\n");
 		*/
-		ReadableString first = expression_getToken(tokens, 0);
-		ReadableString second = expression_getToken(tokens, 1);
+		ReadableString first = expression_getToken(tokens, startTokenIndex, U"");
+		ReadableString second = expression_getToken(tokens, startTokenIndex + 1, U"");
 		if (activeLine) {
 			// TODO: Implement elseif and else cases using a list as a virtual stack,
 			//       to remember at which layer the else cases have already been consumed by a true evaluation.
 			// TODO: Remember at which depth the script entered, so that importing something can't leave the rest inside of a dangling if or else by accident.
 			if (string_caseInsensitiveMatch(first, U"import")) {
 				// Get path relative to importing script's path.
-				String importPath = PATH_EXPR(1, tokens.length() - 1);
-				evaluateScript(output, target, importPath);
-				if (tokens.length() > 2) { printText(U"Unused tokens after import!\n");}
+				String importPath = PATH_EXPR(startTokenIndex + 1, endTokenIndex);
+				evaluateScript(target, importPath);
+				validateSettings(target, U"in target after importing a project head\n");
 			} else if (string_caseInsensitiveMatch(first, U"if")) {
 				// Being if statement
-				bool active = INTEGER_EXPR(1, tokens.length() - 1);
+				bool active = INTEGER_EXPR(startTokenIndex + 1, endTokenIndex);
 				if (active) {
 					target.activeStackDepth++;
 				}
@@ -105,44 +174,73 @@ static void interpretLine(SessionContext &output, Machine &target, List<String>
 				target.activeStackDepth = target.currentStackDepth;
 			} else if (string_caseInsensitiveMatch(first, U"crawl")) {
 				// The right hand expression is evaluated into a path relative to the build script and used as the root for searching for source code.
-				target.crawlOrigins.push(PATH_EXPR(1, tokens.length() - 1));
+				target.crawlOrigins.push(PATH_EXPR(startTokenIndex + 1, endTokenIndex));
+				validateSettings(target, U"in target after listing a crawl origin\n");
 			} else if (string_caseInsensitiveMatch(first, U"build")) {
 				// Build one or more other projects from a project file or folder path, as dependencies.
 				//   Having the same external project built twice during the same session is not allowed.
 				// Evaluate arguments recursively, but let the analyzer do the work.
-				Machine childSettings;
-				inheritMachine(childSettings, target);
 				String projectPath = file_getTheoreticalAbsolutePath(expression_unwrapIfNeeded(second), fromPath); // Use the second token as the folder path.
-				argumentsToSettings(childSettings, tokens, 2); // Send all tokens after the second token as input arguments to buildProjects.
+				// The arguments may be for a whole folder of projects, so each project still need to clone its own settings.
+				Machine sharedInputFlags(file_getPathlessName(projectPath));
+				validateSettings(target, U"in the parent about to build a child project (build in interpretLine)");
+				inheritMachine(sharedInputFlags, target);
+				validateSettings(sharedInputFlags, U"in the parent after inheriting settings for a build child (build in interpretLine)");
+				validateSettings(sharedInputFlags, U"in the child after inheriting settings as a build child (build in interpretLine)");
+				argumentsToSettings(sharedInputFlags, tokens, startTokenIndex + 2, endTokenIndex); // Send all tokens after the second token as input arguments to buildProjects.
+				validateSettings(sharedInputFlags, U"in the child after parsing arguments (build in interpretLine)");
 				printText("Building ", second, " from ", fromPath, " which is ", projectPath, "\n");
 				target.otherProjectPaths.push(projectPath);
-				target.otherProjectSettings.push(childSettings);
+				target.otherProjectSettings.push(sharedInputFlags);
+				validateSettings(target, U"in target after listing a child project\n");
 			} else if (string_caseInsensitiveMatch(first, U"link")) {
-				// Only the path name itself is needed, so any redundant -l prefixes will be stripped away.
-				String libraryName = STRING_EXPR(1, tokens.length() - 1);
+				// Only the library name itself is needed, because the -l prefix can be added automatically.
+				String libraryName = STRING_EXPR(startTokenIndex + 1, endTokenIndex);
 				if (libraryName[0] == U'-' && (libraryName[1] == U'l' || libraryName[1] == U'L')) {
-					libraryName = string_after(libraryName, 2);
+					target.linkerFlags.push(libraryName);
+				} else {
+					target.linkerFlags.push(string_combine(U"-l", libraryName));
 				}
-				target.linkerFlags.push(libraryName);
+				validateSettings(target, U"in target after adding a linker flag\n");
+			} else if (string_caseInsensitiveMatch(first, U"linkerflag")) {
+				// For linker flags that are not used to
+				target.compilerFlags.push(STRING_EXPR(startTokenIndex + 1, endTokenIndex));
+				validateSettings(target, U"in target after adding a compiler flag\n");
 			} else if (string_caseInsensitiveMatch(first, U"compilerflag")) {
-				target.compilerFlags.push(STRING_EXPR(1, tokens.length() - 1));
+				target.compilerFlags.push(STRING_EXPR(startTokenIndex + 1, endTokenIndex));
+				validateSettings(target, U"in target after adding a compiler flag\n");
 			} else if (string_caseInsensitiveMatch(first, U"message")) {
 				// Print a message while evaluating the build script.
 				//   This is not done while actually compiling, so it will not know if compilation and linking worked or not.
-				printText(STRING_EXPR(1, tokens.length() - 1));
+				printText(STRING_EXPR(startTokenIndex + 1, endTokenIndex));
 			} else {
-				if (tokens.length() == 1) {
+				if (tokenCount == 1) {
 					// Mentioning an identifier without assigning anything will assign it to one as a boolean flag.
-					assignValue(target, first, U"1", false);
+					if (validIdentifier(first)) {
+						assignValue(target, first, U"1", false);
+					} else {
+						throwError(U"The token ", first, " is not a valid identifier for implicit assignment to one.\n");
+					}
+					validateSettings(target, U"in target after implicitly assigning a value to a variable\n");
 				} else if (string_match(second, U"=")) {
 					// TODO: Create in-place math and string operations with different types of assignments.
 					//       Maybe use a different syntax beginning with a keyword?
 					// TODO: Look for the assignment operator dynamically if references to collection elements are allowed as l-value expressions.
 					// Using an equality sign replaces any previous value of the variable.
-					assignValue(target, first, STRING_EXPR(2, tokens.length() - 1), false);
+					if (validIdentifier(first)) {
+						assignValue(target, first, STRING_EXPR(startTokenIndex + 2, endTokenIndex), false);
+					} else {
+						throwError(U"The token ", first, " is not a valid identifier for assignments.\n");
+					}
+					validateSettings(target, U"in target after explicitly assigning a value to a variable\n");
 				} else {
-					// TODO: Give better error messages.
-					printText(U"  Ignored unrecognized statement!\n");
+					String errorMessage = U"Failed to parse statement: ";
+					printText(U"Failed to parse statement of tokens: ");
+					for (int64_t t = startTokenIndex; t <= endTokenIndex; t++) {
+						string_append(errorMessage, U" ", string_mangleQuote(tokens[t]));
+					}
+					string_append(errorMessage, U"\n");
+					throwError(errorMessage);
 				}
 			}
 		} else {
@@ -153,66 +251,41 @@ static void interpretLine(SessionContext &output, Machine &target, List<String>
 			}
 		}
 	}
-	tokens.clear();
 }
 
-void evaluateScript(SessionContext &output, Machine &target, const ReadableString &scriptPath) {
+void evaluateScript(Machine &target, const ReadableString &scriptPath) {
+	//printText(U"Evaluating script at ", scriptPath, U"\n");
+	//printSettings(target);
 	if (file_getEntryType(scriptPath) != EntryType::File) {
 		printText(U"The script path ", scriptPath, U" does not exist!\n");
 	}
-	String projectContent = string_load(scriptPath);
 	// Each new script being imported will have its own simulated current path for accessing files and such.
 	String projectFolderPath = file_getAbsoluteParentFolder(scriptPath);
-	String currentToken;
-	List<String> currentLine; // Keep it fast and simple by only remembering tokens for the current line.
-	bool quoted = false;
-	bool commented = false;
-	for (int64_t i = 0; i <= string_length(projectContent); i++) {
-		DsrChar c = projectContent[i];
-		// Treat end of file as a linebreak to simplify tokenization rules.
-		if (c == U'\0') c == U'\n';
-		// The null terminator does not really exist in projectContent,
-		//   but dsr::String returns a null character safely when requesting a character out of bound,
-		//   which allow interpreting the last line without duplicating code.
-		if (c == U'\n' || c == U'\0') {
-			// Comment removing everything else.
-			flushToken(currentLine, currentToken);
-			interpretLine(output, target, currentLine, projectFolderPath);
-			commented = false; // Automatically end comments at end of line.
-			quoted = false; // Automatically end quotes at end of line.
-		} else if (c == U'\"') {
-			quoted = !quoted;
-			string_appendChar(currentToken, c);
-		} else if (c == U'#') {
-			// Comment removing everything else until a new line comes.
-			flushToken(currentLine, currentToken);
-			interpretLine(output, target, currentLine, projectFolderPath);
-			commented = true;
-		} else if (!commented) {
-			if (quoted) {
-				// Insert character into quote.
-				string_appendChar(currentToken, c);
-			} else {
-				// TODO: Do the tokenization in the expression module to get the correct symbols.
-				if (c == U'(' || c == U')' || c == U'[' || c == U']' || c == U'{' || c == U'}' || c == U'=' || c == U'.' || c == U',' || c == U'|' || c == U'!' || c == U'&' || c == U'+' || c == U'-' || c == U'*' || c == U'/' || c == U'\\') {
-					// Atomic token of a single character
-					flushToken(currentLine, currentToken);
-					string_appendChar(currentToken, c);
-					flushToken(currentLine, currentToken);
-				} else if (c == U' ' || c == U'\t') {
-					// Whitespace
-					flushToken(currentLine, currentToken);
-				} else {
-					// Insert unquoted character into token.
-					string_appendChar(currentToken, c);
-				}
-			}
+	// Tokenize the document to handle string literals.
+	String projectContent = string_load(scriptPath);
+	List<String> tokens;
+	expression_tokenize(tokens, projectContent);
+	// Insert an extra linebreak at the end to avoid special cases for the last line.
+	tokens.push(U"\n");
+	// Segment tokens into logical lines and interpret one at a time.
+	int64_t startTokenIndex = 0;
+	for (int64_t t = 0; t < tokens.length(); t++) {
+		if (string_match(tokens[t], U"\n")) {
+			interpretLine(target, tokens, startTokenIndex, t - 1, projectFolderPath);
+			startTokenIndex = t + 1;
 		}
 	}
+	//printText(U"Evaluated script at ", scriptPath, U"\n");
+	//printSettings(target);
 }
 
-void argumentsToSettings(Machine &settings, const List<String> &arguments, int64_t firstArgument) {
-	for (int64_t a = firstArgument; a < arguments.length(); a++) {
+void argumentsToSettings(Machine &settings, const List<String> &arguments, int64_t firstArgument, int64_t lastArgument) {
+	//printText(U"argumentsToSettings:");
+	//for (int64_t a = firstArgument; a <= lastArgument; a++) {
+	//	printText(U" ", arguments[a]);
+	//}
+	//printText(U"\n");
+	for (int64_t a = firstArgument; a <= lastArgument; a++) {
 		String argument = arguments[a];
 		int64_t assignmentIndex = string_findFirst(argument, U'=');
 		if (assignmentIndex == -1) {

+ 6 - 2
Source/tools/builder/code/Machine.h

@@ -19,8 +19,12 @@ void assignValue(Machine &target, const dsr::ReadableString &key, const dsr::Rea
 
 // Modifies the flags in target, while listing source files to context, using the script in scriptPath.
 // Recursively including other scripts using the script's folder as the origin for relative paths.
-void evaluateScript(SessionContext &output, Machine &target, const ReadableString &scriptPath);
+void evaluateScript(Machine &target, const ReadableString &scriptPath);
 
-void argumentsToSettings(Machine &settings, const List<String> &arguments, int64_t firstArgument);
+void inheritMachine(Machine &child, const Machine &parent);
+void argumentsToSettings(Machine &settings, const List<String> &arguments, int64_t firstArgument, int64_t lastArgument);
+
+void printSettings(const Machine &settings);
+void validateSettings(const Machine &settings, const dsr::ReadableString &eventDescription);
 
 #endif

+ 32 - 20
Source/tools/builder/code/analyzer.cpp

@@ -4,7 +4,7 @@
 
 using namespace dsr;
 
-static Extension extensionFromString(const ReadableString& extensionName) {
+static Extension extensionFromString(ReadableString extensionName) {
 	String upperName = string_upperCase(string_removeOuterWhiteSpace(extensionName));
 	Extension result = Extension::Unknown;
 	if (string_match(upperName, U"H")) {
@@ -19,7 +19,7 @@ static Extension extensionFromString(const ReadableString& extensionName) {
 	return result;
 }
 
-static uint64_t checksum(const ReadableString& text) {
+static uint64_t checksum(ReadableString text) {
 	uint64_t a = 0x8C2A03D4;
 	uint64_t b = 0xF42B1583;
 	uint64_t c = 0xA6815E74;
@@ -48,7 +48,7 @@ static uint64_t checksum(const Buffer& buffer) {
 	return d;
 }
 
-static int64_t findDependency(ProjectContext &context, const ReadableString& findPath) {
+static int64_t findDependency(ProjectContext &context, ReadableString findPath) {
 	for (int64_t d = 0; d < context.dependencies.length(); d++) {
 		if (string_match(context.dependencies[d].path, findPath)) {
 			return d;
@@ -76,7 +76,7 @@ void resolveDependencies(ProjectContext &context) {
 	}
 }
 
-static String findSourceFile(const ReadableString& headerPath, bool acceptC, bool acceptCpp) {
+static String findSourceFile(ReadableString headerPath, bool acceptC, bool acceptCpp) {
 	if (file_hasExtension(headerPath)) {
 		ReadableString extensionlessPath = file_getExtensionless(headerPath);
 		String cPath = extensionlessPath + U".c";
@@ -97,7 +97,7 @@ static void flushToken(List<String> &target, String &currentToken) {
 	}
 }
 
-static void tokenize(List<String> &target, const ReadableString& line) {
+static void tokenize(List<String> &target, ReadableString line) {
 	String currentToken;
 	for (int64_t i = 0; i < string_length(line); i++) {
 		DsrChar c = line[i];
@@ -129,7 +129,7 @@ static void tokenize(List<String> &target, const ReadableString& line) {
 	List<Dependency> analysisCache;
 #endif
 
-void analyzeFile(Dependency &result, const ReadableString& absolutePath, Extension extension) {
+void analyzeFile(Dependency &result, ReadableString absolutePath, Extension extension) {
 	#ifdef CACHED_ANALYSIS
 		// Check if the file has already been analyzed.
 		for (int c = 0; c < analysisCache.length(); c++) {
@@ -183,7 +183,7 @@ void analyzeFile(Dependency &result, const ReadableString& absolutePath, Extensi
 	});
 }
 
-void analyzeFromFile(ProjectContext &context, const ReadableString& absolutePath) {
+void analyzeFromFile(ProjectContext &context, ReadableString absolutePath) {
 	if (findDependency(context, absolutePath) != -1) {
 		// Already analyzed the current entry. Abort to prevent duplicate dependencies.
 		return;
@@ -262,6 +262,7 @@ static int64_t findObject(SessionContext &source, uint64_t identityChecksum) {
 }
 
 void gatherBuildInstructions(SessionContext &output, ProjectContext &context, Machine &settings, ReadableString programPath) {
+	validateSettings(settings, string_combine(U"in settings at the beginning of gatherBuildInstructions, for ", programPath, U"\n"));
 	// The compiler is often a global alias, so the user must supply either an alias or an absolute path.
 	ReadableString compilerName = getFlag(settings, U"Compiler", U"g++"); // Assume g++ as the compiler if not specified.
 	ReadableString compileFrom = getFlag(settings, U"CompileFrom", U"");
@@ -300,6 +301,7 @@ void gatherBuildInstructions(SessionContext &output, ProjectContext &context, Ma
 	ReadableString optimizationLevel = getFlag(settings, U"Optimization", U"2");
 		printText(U"Building with optimization level ", optimizationLevel, U".\n");
 	settings.compilerFlags.push(string_combine(U"-O", optimizationLevel));
+	validateSettings(settings, string_combine(U"in settings after adding flags from settings in gatherBuildInstructions, for ", programPath, U"\n"));
 
 	// Convert lists of linker and compiler flags into strings.
 	// TODO: Give a warning if two contradictory flags are used, such as optimization levels and language versions.
@@ -308,11 +310,13 @@ void gatherBuildInstructions(SessionContext &output, ProjectContext &context, Ma
 	//       This would allow calling the compiler directly when given a folder path for temporary files instead of a script path.
 	String generatedCompilerFlags;
 	for (int64_t i = 0; i < settings.compilerFlags.length(); i++) {
+		printText(U"Build script gave compiler flag:", settings.compilerFlags[i], U"\n");
 		string_append(generatedCompilerFlags, " ", settings.compilerFlags[i]);
 	}
 	String linkerFlags;
 	for (int64_t i = 0; i < settings.linkerFlags.length(); i++) {
-		string_append(linkerFlags, " -l", settings.linkerFlags[i]);
+		printText(U"Build script gave linker flag:", settings.linkerFlags[i], U"\n");
+		string_append(linkerFlags, settings.linkerFlags[i]);
 	}
 	printText(U"Generating build instructions for ", programPath, U" using settings:\n");
 	printText(U"  Compiler flags:", generatedCompilerFlags, U"\n");
@@ -355,11 +359,12 @@ void gatherBuildInstructions(SessionContext &output, ProjectContext &context, Ma
 		bool executeResult = getFlagAsInteger(settings, U"Supressed") == 0;
 		output.linkerSteps.pushConstruct(compilerName, compileFrom, programPath, settings.linkerFlags, sourceObjectIndices, executeResult);
 	} else {
-		printText(U"Filed to find any source code to compile when building ", programPath, U".\n");
+		printText(U"Failed to find any source code to compile when building ", programPath, U".\n");
 	}
+	validateSettings(settings, string_combine(U"in settings at the end of gatherBuildInstructions, for ", programPath, U"\n"));
 }
 
-static void crawlSource(ProjectContext &context, const dsr::ReadableString &absolutePath) {
+static void crawlSource(ProjectContext &context, ReadableString absolutePath) {
 	EntryType pathType = file_getEntryType(absolutePath);
 	if (pathType == EntryType::File) {
 		printText(U"Crawling for source from ", absolutePath, U".\n");
@@ -372,11 +377,14 @@ static void crawlSource(ProjectContext &context, const dsr::ReadableString &abso
 	}
 }
 
-void build(SessionContext &output, const ReadableString &projectPath, Machine &settings);
+void build(SessionContext &output, ReadableString projectPath, Machine &settings);
 
 static List<String> initializedProjects;
 // Using a project file path and input arguments.
-void buildProject(SessionContext &output, const ReadableString &projectFilePath, Machine settings) {
+void buildProject(SessionContext &output, ReadableString projectFilePath, Machine &sharedsettings) {
+	Machine settings(file_getPathlessName(projectFilePath));
+	inheritMachine(settings, sharedsettings);
+	validateSettings(settings, string_combine(U"in settings after inheriting settings from caller, for ", projectFilePath, U"\n"));
 	printText("Building project at ", projectFilePath, "\n");
 	// Check if this project has begun building previously during this session.
 	String absolutePath = file_getAbsolutePath(projectFilePath);
@@ -391,7 +399,8 @@ void buildProject(SessionContext &output, const ReadableString &projectFilePath,
 	// Evaluate compiler settings while searching for source code mentioned in the project and imported headers.
 	printText(U"Executing project file from ", projectFilePath, U".\n");
 	ProjectContext context;
-	evaluateScript(output, settings, projectFilePath);
+	evaluateScript(settings, projectFilePath);
+	validateSettings(settings, string_combine(U"in settings after evaluateScript in buildProject, for ", projectFilePath, U"\n"));
 	// Find out where things are located.
 	String projectPath = file_getAbsoluteParentFolder(projectFilePath);
 	// Get the project's name.
@@ -407,6 +416,7 @@ void buildProject(SessionContext &output, const ReadableString &projectFilePath,
 	for (int64_t b = 0; b < settings.otherProjectPaths.length(); b++) {
 		build(output, settings.otherProjectPaths[b], settings.otherProjectSettings[b]);
 	}
+	validateSettings(settings, string_combine(U"in settings after building other projects in buildProject, for ", projectFilePath, U"\n"));
 	// If the SkipIfBinaryExists flag is given, we will abort as soon as we have handled its external BuildProjects requests and confirmed that the application exists.
 	if (getFlagAsInteger(settings, U"SkipIfBinaryExists") && file_getEntryType(fullProgramPath) == EntryType::File) {
 		// SkipIfBinaryExists was active and the binary exists, so abort here to avoid redundant work.
@@ -417,30 +427,32 @@ void buildProject(SessionContext &output, const ReadableString &projectFilePath,
 	for (int64_t o = 0; o < settings.crawlOrigins.length(); o++) {
 		crawlSource(context, settings.crawlOrigins[o]);
 	}
+	validateSettings(settings, string_combine(U"in settings after crawling source in buildProject, for ", projectFilePath, U"\n"));
 	// Once we are done finding all source files, we can resolve the dependencies to create a graph connected by indices.
 	resolveDependencies(context);
 	if (getFlagAsInteger(settings, U"ListDependencies")) {
 		printDependencies(context);
 	}
 	gatherBuildInstructions(output, context, settings, fullProgramPath);
+	validateSettings(settings, string_combine(U"in settings after gathering build instructions in buildProject, for ", projectFilePath, U"\n"));
 }
 
 // Using a folder path and input arguments for all projects.
-void buildProjects(SessionContext &output, const ReadableString &projectFolderPath, Machine &settings) {
+void buildProjects(SessionContext &output, ReadableString projectFolderPath, Machine &sharedsettings) {
 	printText("Building all projects in ", projectFolderPath, "\n");
-	file_getFolderContent(projectFolderPath, [&settings, &output](const ReadableString& entryPath, const ReadableString& entryName, EntryType entryType) {
+	file_getFolderContent(projectFolderPath, [&sharedsettings, &output](const ReadableString& entryPath, const ReadableString& entryName, EntryType entryType) {
 		if (entryType == EntryType::Folder) {
-			buildProjects(output, entryPath, settings);
+			buildProjects(output, entryPath, sharedsettings);
 		} else if (entryType == EntryType::File) {
 			ReadableString extension = string_upperCase(file_getExtension(entryName));
 			if (string_match(extension, U"DSRPROJ")) {
-				buildProject(output, entryPath, settings);
+				buildProject(output, entryPath, sharedsettings);
 			}
 		}
 	});
 }
 
-void build(SessionContext &output, const ReadableString &projectPath, Machine &settings) {
+void build(SessionContext &output, ReadableString projectPath, Machine &sharedsettings) {
 	EntryType entryType = file_getEntryType(projectPath);
 	printText("Building anything at ", projectPath, " which is ", entryType, "\n");
 	if (entryType == EntryType::File) {
@@ -449,9 +461,9 @@ void build(SessionContext &output, const ReadableString &projectPath, Machine &s
 			printText(U"Can't use the Build keyword with a file that is not a project!\n");
 		} else {
 			// Build the given project
-			buildProject(output, projectPath, settings);
+			buildProject(output, projectPath, sharedsettings);
 		}
 	} else if (entryType == EntryType::Folder) {
-		buildProjects(output, projectPath, settings);
+		buildProjects(output, projectPath, sharedsettings);
 	}
 }

+ 4 - 4
Source/tools/builder/code/analyzer.h

@@ -8,7 +8,7 @@
 using namespace dsr;
 
 // Analyze using calls from the machine
-void analyzeFromFile(ProjectContext &context, const ReadableString& entryPath);
+void analyzeFromFile(ProjectContext &context, ReadableString entryPath);
 // Call from main when done analyzing source files
 void resolveDependencies(ProjectContext &context);
 
@@ -16,14 +16,14 @@ void resolveDependencies(ProjectContext &context);
 void printDependencies(ProjectContext &context);
 
 // Build anything in projectPath.
-void build(SessionContext &output, const ReadableString &projectPath, Machine &settings);
+void build(SessionContext &output, ReadableString projectPath, Machine &sharedsettings);
 
 // Build the project in projectFilePath.
 // Settings must be taken by value to prevent side-effects from spilling over between different scripts.
-void buildProject(SessionContext &output, const ReadableString &projectFilePath, Machine settings);
+void buildProject(SessionContext &output, ReadableString projectFilePath, Machine &sharedsettings);
 
 // Build all projects in projectFolderPath.
-void buildProjects(SessionContext &output, const ReadableString &projectFolderPath, Machine &settings);
+void buildProjects(SessionContext &output, ReadableString projectFolderPath, Machine &sharedsettings);
 
 void gatherBuildInstructions(SessionContext &output, ProjectContext &context, Machine &settings, ReadableString programPath);
 

+ 4 - 2
Source/tools/builder/code/builderTypes.h

@@ -16,6 +16,7 @@ struct Flag {
 };
 
 struct Machine {
+	String projectName;
 	List<Flag> variables;
 	List<String> compilerFlags;
 	List<String> linkerFlags;
@@ -25,6 +26,7 @@ struct Machine {
 	// When activeStackDepth < currentStackDepth, we are skipping false cases.
 	int64_t currentStackDepth = 0; // How many scopes we are inside of, from the root script including all the others.
 	int64_t activeStackDepth = 0;
+	Machine(const ReadableString &projectName) : projectName(projectName) {}
 };
 
 enum class Extension {
@@ -50,8 +52,8 @@ struct Connection {
 struct Dependency {
 	String path;
 	Extension extension;
-	uint64_t contentChecksum;
-	bool visited; // Used to avoid infinite loops while traversing dependencies.
+	uint64_t contentChecksum = 0;
+	bool visited = false; // Used to avoid infinite loops while traversing dependencies.
 	List<Connection> links; // Depends on having these linked after compiling.
 	List<Connection> includes; // Depends on having these included in pre-processing.
 	Dependency(const ReadableString& path, Extension extension)

+ 218 - 33
Source/tools/builder/code/expression.cpp

@@ -7,11 +7,11 @@ using namespace dsr;
 POIndex::POIndex() {}
 POIndex::POIndex(int16_t precedenceIndex, int16_t operationIndex) : precedenceIndex(precedenceIndex), operationIndex(operationIndex) {}
 
-Operation::Operation(int16_t symbolIndex, std::function<dsr::String(dsr::ReadableString, dsr::ReadableString)> action)
+Operation::Operation(int16_t symbolIndex, std::function<String(ReadableString, ReadableString)> action)
 : symbolIndex(symbolIndex), action(action) {
 }
 
-static int16_t addOperation(ExpressionSyntax &targetSyntax, int16_t symbolIndex, std::function<dsr::String(dsr::ReadableString, dsr::ReadableString)> action) {
+static int16_t addOperation(ExpressionSyntax &targetSyntax, int16_t symbolIndex, std::function<String(ReadableString, ReadableString)> action) {
 	int16_t precedenceIndex = targetSyntax.precedences.length() - 1;
 	int16_t operationIndex = targetSyntax.precedences.last().operations.length();
 	// TODO: Only allow assigning a symbol once per prefix, infix and postfix.
@@ -23,18 +23,18 @@ static int16_t addOperation(ExpressionSyntax &targetSyntax, int16_t symbolIndex,
 Precedence::Precedence(Notation notation, Associativity associativity)
 : notation(notation), associativity(associativity) {}
 
-Symbol::Symbol(const dsr::ReadableString &token, bool atomic, int32_t depthOffset)
-: token(token), atomic(atomic), depthOffset(depthOffset) {}
+Symbol::Symbol(const ReadableString &token, SymbolType symbolType, int32_t depthOffset, DsrChar endsWith, DsrChar escapes)
+: token(token), symbolType(symbolType), depthOffset(depthOffset), endsWith(endsWith), escapes(escapes) {}
 
-ReadableString expression_getToken(const List<String> &tokens, int64_t index) {
+ReadableString expression_getToken(const List<String> &tokens, int64_t index, const ReadableString &outside) {
 	if (0 <= index && index < tokens.length()) {
 		return tokens[index];
 	} else {
-		return U"";
+		return outside;
 	}
 }
 
-int64_t expression_interpretAsInteger(const dsr::ReadableString &value) {
+int64_t expression_interpretAsInteger(const ReadableString &value) {
 	if (string_length(value) == 0) {
 		return 0;
 	} else {
@@ -42,7 +42,7 @@ int64_t expression_interpretAsInteger(const dsr::ReadableString &value) {
 	}
 }
 
-String expression_unwrapIfNeeded(const dsr::ReadableString &text) {
+String expression_unwrapIfNeeded(const ReadableString &text) {
 	if (text[0] == U'\"') {
 		return string_unmangleQuote(text);
 	} else {
@@ -50,29 +50,41 @@ String expression_unwrapIfNeeded(const dsr::ReadableString &text) {
 	}
 }
 
-static int16_t createSymbol(ExpressionSyntax &targetSyntax, const dsr::ReadableString &token, bool atomic, int32_t depthOffset) {
-	targetSyntax.symbols.pushConstruct(token, atomic, depthOffset);
-	return targetSyntax.symbols.length() - 1;
+static int16_t createSymbol(ExpressionSyntax &targetSyntax, const ReadableString &token, SymbolType symbolType, int32_t depthOffset, DsrChar endsWith, DsrChar escapes) {
+	int64_t oldCount = targetSyntax.symbols.length();
+	if (oldCount >= 32767) throwError(U"Can't declare more than 32767 symbols in a syntax, because they are referenced using 16-bit integers!\n");
+	if (string_length(token) < 1) throwError(U"Can't declare a symbol without any characters, because the empty symbol exists between every character!\n");
+	if (symbolType != SymbolType::Keyword) {
+		if (targetSyntax.keywordCount > 0) throwError(U"Can't declare atomic symbols after the first keyword!\n");
+		if (targetSyntax.atomicCount > 0 && string_length(targetSyntax.symbols[oldCount - 1].token) < string_length(token)) {
+			throwError(U"Each following atomic token must be shorter or equal to the previous atomic token, so that longest match first can be applied!\n");
+		}
+		targetSyntax.atomicCount++;
+	} else {
+		targetSyntax.keywordCount++;
+	}
+	targetSyntax.symbols.pushConstruct(token, symbolType, depthOffset, endsWith, escapes);
+	return (int16_t)oldCount;
 }
-#define CREATE_KEYWORD(TOKEN) createSymbol(*this, TOKEN, false, 0);
-#define CREATE_ATOMIC(TOKEN) createSymbol(*this, TOKEN, true, 0);
-#define CREATE_LEFT(TOKEN) createSymbol(*this, TOKEN, true, 1);
-#define CREATE_RIGHT(TOKEN) createSymbol(*this, TOKEN, true, -1);
 
+#define CREATE_KEYWORD(TOKEN) createSymbol(*this, TOKEN, SymbolType::Keyword, 0, -1, -1);
+#define CREATE_ATOMIC(TOKEN) createSymbol(*this, TOKEN, SymbolType::Atomic, 0, -1, -1);
+#define CREATE_LEFT(TOKEN) createSymbol(*this, TOKEN, SymbolType::Atomic, 1, -1, -1);
+#define CREATE_RIGHT(TOKEN) createSymbol(*this, TOKEN, SymbolType::Atomic, -1, -1, -1);
+#define CREATE_LITERAL(START_TOKEN, END_CHAR, ESCAPE_CHAR) createSymbol(*this, START_TOKEN, SymbolType::Atomic, 0, END_CHAR, ESCAPE_CHAR);
+#define CREATE_VOID(TOKEN) createSymbol(*this, TOKEN, SymbolType::Nothing, 0, -1, -1);
+#define CREATE_COMMENT(TOKEN, END_CHAR, ESCAPE_CHAR) createSymbol(*this, TOKEN, SymbolType::Nothing, 0, END_CHAR, ESCAPE_CHAR);
+
+// TODO: Create a way to enter symbols, keywords and operations from the outside to define custom syntax.
+//       * Using a file or list of symbols is the easiest way to enter them by sorting automatically, but makes it hard to connect the indices with anything useful.
+//       * Using multiple calls to an API makes it difficult to sort atomic symbols automatically based on length.
 ExpressionSyntax::ExpressionSyntax() {
 	// Symbols must be entered with longest match first, so that they can be used for tokenization.
-	// Keywords
-	int16_t token_string_match = CREATE_KEYWORD(U"matches");
-	int16_t token_logical_and = CREATE_KEYWORD(U"and");
-	int16_t token_logical_xor = CREATE_KEYWORD(U"xor");
-	int16_t token_logical_or = CREATE_KEYWORD(U"or");
 	// Length 2 symbols
-	int16_t token_lesserEqual = CREATE_ATOMIC(U"<=");
-	int16_t token_greaterEqual = CREATE_ATOMIC(U">=");
-	int16_t token_equal = CREATE_ATOMIC(U"==");
-	int16_t token_notEqual = CREATE_ATOMIC(U"!=");
-	int16_t token_leftArrow = CREATE_ATOMIC(U"<-");
-	int16_t token_rightArrow = CREATE_ATOMIC(U"->");
+	int16_t token_lesserEqual = CREATE_ATOMIC(U"<="); // Allowed because both < and = are infix operations, which can not end up on the left or right sides.
+	int16_t token_greaterEqual = CREATE_ATOMIC(U">="); // Allowed because both > and = are infix operations, which can not end up on the left or right sides.
+	int16_t token_equal = CREATE_ATOMIC(U"=="); // Allowed because = is an infix operation, which can not end up on the left or right sides.
+	int16_t token_notEqual = CREATE_ATOMIC(U"!="); // Allowed because ! is a prefix and would not end up on the left side of an assignment.
 	// Length 1 symbols
 	int16_t token_plus = CREATE_ATOMIC(U"+");
 	int16_t token_minus = CREATE_ATOMIC(U"-");
@@ -90,7 +102,24 @@ ExpressionSyntax::ExpressionSyntax() {
 	int16_t token_rightParen = CREATE_RIGHT(U")");
 	int16_t token_rightBracket = CREATE_RIGHT(U"]");
 	int16_t token_rightCurl = CREATE_RIGHT(U"}");
+	// Breaking
+	int16_t token_lineBreak = CREATE_ATOMIC(U"\n");
+	// Nothing
+	CREATE_VOID(U" ");
+	CREATE_VOID(U"\t");
+	CREATE_VOID(U"\v");
+	CREATE_VOID(U"\f");
+	CREATE_VOID(U"\r"); // \r\n becomes \n, \n\r becomes \n and \n remains the same. String only using \r to break lines need to be converted into \n linebreaks before use.
+	// Special tokens
+	int16_t token_comment = CREATE_COMMENT(U"#", U'\n', -1); // # will begin a comment until the end of the line, without any escape character.
+	int16_t token_doubleQuote = CREATE_LITERAL(U"\"", U'\"', U'\\'); // " will begin a literal until the next " not preceded by \.
+	// Keywords that are used in expressions
+	int16_t token_logical_and = CREATE_KEYWORD(U"and");
+	int16_t token_logical_or = CREATE_KEYWORD(U"or");
+	int16_t token_logical_xor = CREATE_KEYWORD(U"xor");
+	int16_t token_string_match = CREATE_KEYWORD(U"matches");
 	// Unidentified tokens are treated as identifiers or values with index -1.
+	// Unlisted keywords can still be tokenized and used for statements, just not used to perform operations in expressions.
 
 	// Each symbol can be tied once to prefix, once to infix and once to postfix.
 	this->precedences.pushConstruct(Notation::Prefix, Associativity::RightToLeft);
@@ -181,7 +210,6 @@ struct TokenInfo {
 	: depth(depth), symbolIndex(symbolIndex) {}
 };
 
-/*
 static String debugTokens(const List<TokenInfo> &info, int64_t infoStart, const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex) {
 	String result;
 	for (int64_t t = startTokenIndex; t <= endTokenIndex; t++) {
@@ -201,17 +229,26 @@ static String debugTokens(const List<TokenInfo> &info, int64_t infoStart, const
 	}
 	return result;
 }
-*/
 
+static String debugTokens(const List<String> &tokens) {
+	String result;
+	for (int64_t t = 0; t < tokens.length(); t++) {
+		if (t > 0) {
+			string_appendChar(result, U' ');
+		}
+		string_append(result, U"[", tokens[t], U"]");
+	}
+	return result;
+}
 static int16_t identifySymbol(const ReadableString &token, const ExpressionSyntax &syntax) {
 	for (int64_t s = 0; s < syntax.symbols.length(); s++) {
-		if (syntax.symbols[s].atomic) {
-			if (string_match(token, syntax.symbols[s].token)) {
+		if (syntax.symbols[s].symbolType == SymbolType::Keyword) {
+			// TODO: Make case insensitive optional for keywords.
+			if (string_caseInsensitiveMatch(token, syntax.symbols[s].token)) {
 				return s;
 			}
 		} else {
-			// TODO: Make case insensitive optional for keywords.
-			if (string_caseInsensitiveMatch(token, syntax.symbols[s].token)) {
+			if (string_match(token, syntax.symbols[s].token)) {
 				return s;
 			}
 		}
@@ -252,10 +289,11 @@ static bool validRightmostToken(int16_t symbolIndex, const ExpressionSyntax &syn
 static String expression_evaluate_helper(const List<TokenInfo> &info, int64_t infoStart, int64_t currentDepth, const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, const ExpressionSyntax &syntax, std::function<String(ReadableString)> identifierEvaluation) {
 	//printText(U"Evaluate: ", debugTokens(info, infoStart, tokens, startTokenIndex, endTokenIndex), U"\n");
 	if (startTokenIndex == endTokenIndex) {
-		ReadableString first = expression_getToken(tokens, startTokenIndex);
+		ReadableString first = expression_getToken(tokens, startTokenIndex, U"");
 		if (string_isInteger(first)) {
 			return first;
 		} else if (first[0] == U'\"') {
+			// TODO: Let the caller unwrap strings.
 			return string_unmangleQuote(first);
 		} else {
 			// Identifier defaulting to empty.
@@ -335,6 +373,7 @@ static String expression_evaluate_helper(const List<TokenInfo> &info, int64_t in
 				opIndex += opStep;
 			}
 		}
+		// TODO: Let the caller create a pattern matching operation for these combinations using longest match first.
 		if (string_match(tokens[startTokenIndex], U"(") && string_match(tokens[endTokenIndex], U")")) {
 			//printText(U"Unwrapping ()\n");
 			return expression_evaluate_helper(info, infoStart, currentDepth + 1, tokens, startTokenIndex + 1, endTokenIndex - 1, syntax, identifierEvaluation);
@@ -373,6 +412,83 @@ String expression_evaluate(const List<String> &tokens, std::function<String(Read
 	return expression_evaluate(tokens, 0, tokens.length() - 1, defaultSyntax, identifierEvaluation);
 }
 
+// Atomic symbols are always case sensitive.
+static bool matchAtomicFrom(const ReadableString &sourceText, int64_t location, const ReadableString &symbol) {
+	for (int64_t l = 0; l < string_length(symbol); l++) {
+		if (sourceText[location + l] != symbol[l]) {
+			return false; // No match if a character deviated.
+		}
+	}
+	return true; // Match if we found no contradicting characters.
+}
+
+void expression_tokenize(List<String> &targetTokens, const ReadableString &sourceText, const ExpressionSyntax &syntax) {
+	//printText(U"expression_tokenize(", sourceText, U")\n");
+	int64_t i = 0;
+	int64_t keywordStart = 0;
+	int64_t sourceLength = string_length(sourceText);
+	while (i < sourceLength) {
+		bool foundSymbol = false;
+		for (int64_t s = 0; s < syntax.atomicCount; s++) {
+			String startToken = syntax.symbols[s].token;
+			if (matchAtomicFrom(sourceText, i, startToken)) {
+				if (keywordStart < i) targetTokens.push(string_exclusiveRange(sourceText, keywordStart, i)); // Consume any previous keyword.
+				int64_t startTokenLength = string_length(startToken);
+				int64_t startIndex = i;
+				int64_t exclusiveEndIndex = i + string_length(startToken);
+				DsrChar endsWith = syntax.symbols[s].endsWith;
+				DsrChar escapes = syntax.symbols[s].escapes;
+				i += string_length(startToken);
+				if (endsWith != -1) {
+					// Find the end if the token is continuing.
+					int64_t j;
+					for (j = i; j < sourceLength; j++) {
+						if (sourceText[j] == endsWith) {
+							// Include the last character before ending
+							j++;
+							break;
+						} else if (sourceText[j] == escapes) {
+							// Jump past the next character when an escape character is met.
+							j++;
+						}
+					}
+					exclusiveEndIndex = j;
+				}
+				if (syntax.symbols[s].symbolType != SymbolType::Nothing) {
+					// Include the token if it's not whitespace.
+					targetTokens.push(string_exclusiveRange(sourceText, startIndex, exclusiveEndIndex));
+				}
+				i = exclusiveEndIndex;
+				// Done identifying the symbol.
+				foundSymbol = true;
+				keywordStart = i;
+				break;
+			}
+		}
+		if (!foundSymbol) {
+			i++;
+		}
+	}
+	if (keywordStart < i) targetTokens.push(string_exclusiveRange(sourceText, keywordStart, i)); // Consume any last keyword.
+	//printText(U"expression_tokenize finished with ", targetTokens.length(), " tokens\n");
+}
+
+void expression_tokenize(List<String> &targetTokens, const ReadableString &sourceText) {
+	expression_tokenize(targetTokens, sourceText, defaultSyntax);
+}
+
+List<String> expression_tokenize(const ReadableString &sourceText, const ExpressionSyntax &syntax) {
+	List<String> result;
+	expression_tokenize(result, sourceText, syntax);
+	return result;
+}
+
+List<String> expression_tokenize(const ReadableString &sourceText) {
+	List<String> result;
+	expression_tokenize(result, sourceText);
+	return expression_tokenize(sourceText, defaultSyntax);
+}
+
 // -------- Regression tests --------
 
 template<typename TYPE>
@@ -400,6 +516,22 @@ static void expectResult(int64_t &errorCount, const ReadableString &result, cons
 	}
 }
 
+static void expectResult(int64_t &errorCount, const List<String> &result, const List<String> &expected) {
+	if (result.length() != expected.length()) {
+		printText(U"    - Failed\n    ", debugTokens(expected), U" with unexpected\n    ", debugTokens(result), U" of different token count\n");
+		errorCount++;
+		return;
+	}
+	for (int64_t t = 0; t < expected.length(); t++) {
+		if (!string_match(expected[t], result[t])) {
+			printText(U"    - Failed\n    ", debugTokens(expected), U" with unexpected\n    ", debugTokens(result), U"\n");
+			errorCount++;
+			return;
+		}
+	}
+	printText(U"* Passed ", debugTokens(expected), U"\n");
+}
+
 void expression_runRegressionTests() {
 	std::function<String(ReadableString)> context = [](ReadableString identifier) -> String {
 		if (string_caseInsensitiveMatch(identifier, U"x")) {
@@ -418,6 +550,20 @@ void expression_runRegressionTests() {
 		if (validRightmostToken(s, defaultSyntax)) printText(U"  Can be rightmost\n");
 	}*/
 	int64_t ec = 0;
+	// Tokenize
+	printText(U"Tokenize test\n");
+	expectResult(ec, expression_tokenize(U"0  "), combineTokens(U"0"));
+	expectResult(ec, expression_tokenize(U"first line\nsecond line"), combineTokens(U"first", U"line", U"\n", U"second", U"line"));
+	expectResult(ec, expression_tokenize(U"#A comment\nfirst line\nsecond line"), combineTokens(U"first", U"line", U"\n", U"second", U"line"));
+	expectResult(ec, expression_tokenize(U"5+(7-8)"), combineTokens(U"5", U"+", U"(", U"7", U"-", U"8", U")"));
+	expectResult(ec, expression_tokenize(U"identifier keyword"), combineTokens(U"identifier", U"keyword"));
+	expectResult(ec, expression_tokenize(U"identifier+keyword"), combineTokens(U"identifier", U"+", U"keyword"));
+	expectResult(ec, expression_tokenize(U"\t\tidentifier +  keyword "), combineTokens(U"identifier", U"+", U"keyword"));
+	expectResult(ec, expression_tokenize(U"\" My string content \" \t+ \"My other string\""), combineTokens(U"\" My string content \"", U"+", U"\"My other string\""));
+	expectResult(ec, expression_tokenize(U"\" My string content\n \" \t+ \"My other\n string\""), combineTokens(U"\" My string content\n \"", U"+", U"\"My other\n string\""));
+	expectResult(ec, expression_tokenize(U"  \" My string content\n \"   # Comment \n + \"My other\n string\"  "), combineTokens(U"\" My string content\n \"", U"+", U"\"My other\n string\""));
+	// Evaluate from tokens
+	printText(U"Evaluate from tokens test\n");
 	expectResult(ec, expression_evaluate(combineTokens(U""), context), U"<ERROR:Unresolved identifier>");
 	expectResult(ec, expression_evaluate(combineTokens(U"0"), context), U"0");
 	expectResult(ec, expression_evaluate(combineTokens(U"(", U"19", U")"), context), U"19");
@@ -456,5 +602,44 @@ void expression_runRegressionTests() {
 	expectResult(ec, expression_evaluate(combineTokens(U"-47", U"{"), context), U"<ERROR:Unbalanced expression depth>");
 	expectResult(ec, expression_evaluate(combineTokens(U"645", U"}"), context), U"<ERROR:Negative expression depth>");
 	expectResult(ec, expression_evaluate(combineTokens(U"5", U")", U"+", U"(", U"-7"), context), U"<ERROR:Negative expression depth>");
+	// Tokenize and evaluate
+	printText(U"Tokenize and evaluate test\n");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"0  "), context), U"0");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"(19)"), context), U"19");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"( 2+4)"), context), U"6");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"3"), context), U"3");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"- 5"), context), U"-5");
+	expectResult(ec, expression_evaluate(expression_tokenize(U" -32"), context), U"-32");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"3+ 6"), context), U"9");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"x\t"), context), U"5");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"doorCount"), context), U"48");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"temperature"), context), U"-18");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"nonsense"), context), U"<ERROR:Unresolved identifier>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"6*2+4"), context), U"16");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"4+ 6*2"), context), U"16");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"4+(6* 2)"), context), U"16");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"(4+6)*2"), context), U"20");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5+- 7"), context), U"-2");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5+(-7)"), context), U"-2");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5+(-7)"), context), U"-2");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5+-7"), context), U"-2");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5--7 "), context), U"12");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5&-7"), context), U"5-7");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"(6+8)/(9-2)"), context), U"2");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"(6+8)*(9-2)"), context), U"98");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"	&-7"), context), U"<ERROR:Invalid expression>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"(-   7"), context), U"<ERROR:Unbalanced expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U")3"), context), U"<ERROR:Negative expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"[8"), context), U"<ERROR:Unbalanced expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"]  65"), context), U"<ERROR:Negative expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"{12"), context), U"<ERROR:Unbalanced expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"}0"), context), U"<ERROR:Negative expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"12("), context), U"<ERROR:Unbalanced expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"2)"), context), U"<ERROR:Negative expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"-5["), context), U"<ERROR:Unbalanced expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"6]"), context), U"<ERROR:Negative expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"-47 {"), context), U"<ERROR:Unbalanced expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"645}"), context), U"<ERROR:Negative expression depth>");
+	expectResult(ec, expression_evaluate(expression_tokenize(U"5)+(-7"), context), U"<ERROR:Negative expression depth>");
 	printText(U"Completed regression tests of expressions with ", ec, U" errors in total.\n");
 }

+ 20 - 5
Source/tools/builder/code/expression.h

@@ -7,8 +7,6 @@
 // The expression module is a slow but generic system for evaluating expressions where all data is stored as strings for simplicity.
 //   No decimal numbers allowed, because it requires both human readable syntax and full determinism without precision loss.
 
-// TODO: Move tokenization from Machine.cpp to expression.cpp
-
 enum Notation {
 	Prefix = 0,
 	Infix = 1,
@@ -40,23 +38,33 @@ struct POIndex {
 	POIndex(int16_t precedenceIndex, int16_t operationIndex);
 };
 
+enum class SymbolType {
+	Nothing, // Whitespace does not produce any tokens, but counts as atomic.
+	Atomic, // Will separate even directly connected to other tokens. These should not contain regular characters, to prevent cutting up identifiers.
+	Keyword // The remains between atomic symbols and whilespace. Two keywords in a row needs to be separated by something else.
+};
+
 struct Symbol {
 	dsr::String token;
-	bool atomic; // Atomic symbols can affect tokenization, the other keywords have to be separated by whitespace or other symbols.
+	SymbolType symbolType;
 	POIndex operations[3]; // prefix, infix and postfix
 	int32_t depthOffset;
-	Symbol(const dsr::ReadableString &token, bool atomic, int32_t depthOffset = 0);
+	dsr::DsrChar endsWith = -1; // If endsWith is not -1, the token will consume everything until the endsWith character not preceded by escapes is found.
+	dsr::DsrChar escapes = -1;
+	Symbol(const dsr::ReadableString &token, SymbolType symbolType, int32_t depthOffset, dsr::DsrChar endsWith, dsr::DsrChar escapes);
 };
 
 struct ExpressionSyntax {
 	dsr::List<Symbol> symbols;
 	dsr::List<Precedence> precedences;
+	int16_t atomicCount = 0;
+	int16_t keywordCount = 0;
 	ExpressionSyntax();
 };
 
 dsr::String expression_unwrapIfNeeded(const dsr::ReadableString &text);
 
-dsr::ReadableString expression_getToken(const dsr::List<dsr::String> &tokens, int64_t index);
+dsr::ReadableString expression_getToken(const dsr::List<dsr::String> &tokens, int64_t index, const dsr::ReadableString &outside);
 
 int64_t expression_interpretAsInteger(const dsr::ReadableString &value);
 
@@ -64,6 +72,13 @@ dsr::String expression_evaluate(const dsr::List<dsr::String> &tokens, std::funct
 dsr::String expression_evaluate(const dsr::List<dsr::String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, std::function<dsr::String(dsr::ReadableString)> identifierEvaluation);
 dsr::String expression_evaluate(const dsr::List<dsr::String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, const ExpressionSyntax &syntax, std::function<dsr::String(dsr::ReadableString)> identifierEvaluation);
 
+// Tokenizing into pure lists of strings is inefficient redundant work,
+//   but a lot more reusable than a list of custom types hard-coded for a specific parser.
+void expression_tokenize(dsr::List<dsr::String> &targetTokens, const dsr::ReadableString &sourceText, const ExpressionSyntax &syntax);
+void expression_tokenize(dsr::List<dsr::String> &targetTokens, const dsr::ReadableString &sourceText);
+dsr::List<dsr::String> expression_tokenize(const dsr::ReadableString &sourceText, const ExpressionSyntax &syntax);
+dsr::List<dsr::String> expression_tokenize(const dsr::ReadableString &sourceText);
+
 void expression_runRegressionTests();
 
 #endif

+ 4 - 3
Source/tools/builder/code/generator.cpp

@@ -40,6 +40,7 @@ static void setCompilationFolder(String &generatedCode, ScriptLanguage language,
 			}
 		}
 	}
+	currentPath = newPath;
 }
 
 void generateCompilationScript(SessionContext &input, const ReadableString &scriptPath) {
@@ -91,9 +92,9 @@ void generateCompilationScript(SessionContext &input, const ReadableString &scri
 		setCompilationFolder(generatedCode, language, currentPath, linkingStep->compileFrom);
 		String linkerFlags;
 		for (int64_t lib = 0; lib < linkingStep->linkerFlags.length(); lib++) {
-			String library = linkingStep->linkerFlags[lib];
-			string_append(linkerFlags, " -l", library);
-			printText(U"\t\t* ", library, U" library\n");
+			String linkerFlag = linkingStep->linkerFlags[lib];
+			string_append(linkerFlags, " ", linkerFlag);
+			printText(U"\t\t* ", linkerFlag, U" library\n");
 		}
 		// Generate a list of object paths from indices.
 		String allObjects;

+ 10 - 2
Source/tools/builder/code/main.cpp

@@ -49,6 +49,12 @@ Project files:
 			Build "../myFolder" SkipIfBinaryExists
 		* Add file.cpp and other implementations found through includes into the list of source code to compile and link.
 			Crawl "folder/file.cpp"
+		* Add a linker flag as is for direct control
+			LinkerFlag -lLibrary
+		* Add a linker flag with automatic prefix for future proofing
+			Link Library
+		* Add a compiler flag as is
+			CompilerFlag -DMACRO
 	Systems:
 		* Linux
 			Set to non-zero on Linux or similar operating systems.
@@ -92,8 +98,9 @@ void dsrMain(List<String> args) {
 		// Calling builder with the extra arguments will interpret them as variables and mark them as inherited, so that they are passed on to any other projects build from the project file.
 		// Other values can be assigned using an equality sign.
 		//   Avoid spaces around the equality sign, because quotes are already used for string arguments in assignments.
-		Machine settings;
-		argumentsToSettings(settings, args, 3);
+		Machine settings(file_getPathlessName(projectPath));
+		argumentsToSettings(settings, args, 3, args.length() - 1);
+		validateSettings(settings, U"in settings after getting application arguments (in main)");
 		// Generate build instructions.
 		String executableExtension;
 		if (getFlagAsInteger(settings, U"Windows")) {
@@ -101,6 +108,7 @@ void dsrMain(List<String> args) {
 		}
 		SessionContext buildContext = SessionContext(tempFolder, executableExtension);
 		build(buildContext, projectPath, settings);
+		validateSettings(settings, U"in settings after executing the root build script (in main)");
 		// Generate a script to execute.
 		// TODO: Store compiler flags in groups of lists to allow taking them directly as program arguments when calling the compiler directly.
 		generateCompilationScript(buildContext, scriptPath);