generator.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. 
  2. #include "generator.h"
  3. using namespace dsr;
  4. static uint64_t checksum(const ReadableString& text) {
  5. uint64_t a = 0x8C2A03D4;
  6. uint64_t b = 0xF42B1583;
  7. uint64_t c = 0xA6815E74;
  8. uint64_t d = 0;
  9. for (int i = 0; i < string_length(text); i++) {
  10. a = (b * c + ((i * 3756 + 2654) & 58043)) & 0xFFFFFFFF;
  11. b = (231 + text[i] * (a & 154) + c * 867 + 28294061) & 0xFFFFFFFF;
  12. c = (a ^ b ^ (text[i] * 1543217521)) & 0xFFFFFFFF;
  13. d = d ^ (a << 32) ^ b ^ (c << 16);
  14. }
  15. return d;
  16. }
  17. static uint64_t checksum(const Buffer& buffer) {
  18. SafePointer<uint8_t> data = buffer_getSafeData<uint8_t>(buffer, "checksum input buffer");
  19. uint64_t a = 0x8C2A03D4;
  20. uint64_t b = 0xF42B1583;
  21. uint64_t c = 0xA6815E74;
  22. uint64_t d = 0;
  23. for (int i = 0; i < buffer_getSize(buffer); i++) {
  24. a = (b * c + ((i * 3756 + 2654) & 58043)) & 0xFFFFFFFF;
  25. b = (231 + data[i] * (a & 154) + c * 867 + 28294061) & 0xFFFFFFFF;
  26. c = (a ^ b ^ (data[i] * 1543217521)) & 0xFFFFFFFF;
  27. d = d ^ (a << 32) ^ b ^ (c << 16);
  28. }
  29. return d;
  30. }
  31. enum class ScriptLanguage {
  32. Unknown,
  33. Batch,
  34. Bash
  35. };
  36. struct Connection {
  37. String path;
  38. int64_t lineNumber = -1;
  39. int64_t dependencyIndex = -1;
  40. Connection(const ReadableString& path)
  41. : path(path) {}
  42. Connection(const ReadableString& path, int64_t lineNumber)
  43. : path(path), lineNumber(lineNumber) {}
  44. };
  45. enum class Extension {
  46. Unknown, H, Hpp, C, Cpp
  47. };
  48. static Extension extensionFromString(const ReadableString& extensionName) {
  49. String upperName = string_upperCase(string_removeOuterWhiteSpace(extensionName));
  50. Extension result = Extension::Unknown;
  51. if (string_match(upperName, U"H")) {
  52. result = Extension::H;
  53. } else if (string_match(upperName, U"HPP")) {
  54. result = Extension::Hpp;
  55. } else if (string_match(upperName, U"C")) {
  56. result = Extension::C;
  57. } else if (string_match(upperName, U"CPP")) {
  58. result = Extension::Cpp;
  59. }
  60. return result;
  61. }
  62. struct Dependency {
  63. String path;
  64. Extension extension;
  65. uint64_t contentChecksum;
  66. bool visited; // Used to avoid infinite loops while traversing dependencies.
  67. List<Connection> links; // Depends on having these linked after compiling.
  68. List<Connection> includes; // Depends on having these included in pre-processing.
  69. Dependency(const ReadableString& path, Extension extension, uint64_t contentChecksum)
  70. : path(path), extension(extension), contentChecksum(contentChecksum) {}
  71. };
  72. List<Dependency> dependencies;
  73. static int64_t findDependency(const ReadableString& findPath);
  74. static void resolveConnection(Connection &connection);
  75. static void resolveDependency(Dependency &dependency);
  76. static String findSourceFile(const ReadableString& headerPath, bool acceptC, bool acceptCpp);
  77. static void flushToken(List<String> &target, String &currentToken);
  78. static void tokenize(List<String> &target, const ReadableString& line);
  79. static void interpretPreprocessing(int64_t parentIndex, const List<String> &tokens, const ReadableString &parentFolder, int64_t lineNumber);
  80. static void interpretPreprocessing(int64_t parentIndex, const List<String> &tokens, const ReadableString &parentFolder, int64_t lineNumber);
  81. static void analyzeCode(int64_t parentIndex, String content, const ReadableString &parentFolder);
  82. static int64_t findDependency(const ReadableString& findPath) {
  83. for (int d = 0; d < dependencies.length(); d++) {
  84. if (string_match(dependencies[d].path, findPath)) {
  85. return d;
  86. }
  87. }
  88. return -1;
  89. }
  90. static void resolveConnection(Connection &connection) {
  91. connection.dependencyIndex = findDependency(connection.path);
  92. }
  93. static void resolveDependency(Dependency &dependency) {
  94. for (int l = 0; l < dependency.links.length(); l++) {
  95. resolveConnection(dependency.links[l]);
  96. }
  97. for (int i = 0; i < dependency.includes.length(); i++) {
  98. resolveConnection(dependency.includes[i]);
  99. }
  100. }
  101. void resolveDependencies() {
  102. for (int d = 0; d < dependencies.length(); d++) {
  103. resolveDependency(dependencies[d]);
  104. }
  105. }
  106. static String findSourceFile(const ReadableString& headerPath, bool acceptC, bool acceptCpp) {
  107. int lastDotIndex = string_findLast(headerPath, U'.');
  108. if (lastDotIndex != -1) {
  109. ReadableString extensionlessPath = string_removeOuterWhiteSpace(string_before(headerPath, lastDotIndex));
  110. String cPath = extensionlessPath + U".c";
  111. String cppPath = extensionlessPath + U".cpp";
  112. if (acceptC && file_getEntryType(cPath) == EntryType::File) {
  113. return cPath;
  114. } else if (acceptCpp && file_getEntryType(cppPath) == EntryType::File) {
  115. return cppPath;
  116. }
  117. }
  118. return U"";
  119. }
  120. static void flushToken(List<String> &target, String &currentToken) {
  121. if (string_length(currentToken) > 0) {
  122. target.push(currentToken);
  123. currentToken = U"";
  124. }
  125. }
  126. static void tokenize(List<String> &target, const ReadableString& line) {
  127. String currentToken;
  128. for (int i = 0; i < string_length(line); i++) {
  129. DsrChar c = line[i];
  130. DsrChar nextC = line[i + 1];
  131. if (c == U'#' && nextC == U'#') {
  132. // Appending tokens using ##
  133. i++;
  134. } else if (c == U'#' || c == U'(' || c == U')' || c == U'[' || c == U']' || c == U'{' || c == U'}') {
  135. // Atomic token of a single character
  136. flushToken(target, currentToken);
  137. string_appendChar(currentToken, c);
  138. flushToken(target, currentToken);
  139. } else if (c == U' ' || c == U'\t') {
  140. // Whitespace
  141. flushToken(target, currentToken);
  142. } else {
  143. string_appendChar(currentToken, c);
  144. }
  145. }
  146. flushToken(target, currentToken);
  147. }
  148. static void interpretPreprocessing(int64_t parentIndex, const List<String> &tokens, const ReadableString &parentFolder, int64_t lineNumber) {
  149. if (tokens.length() >= 3) {
  150. if (string_match(tokens[1], U"include")) {
  151. if (tokens[2][0] == U'\"') {
  152. String relativePath = string_unmangleQuote(tokens[2]);
  153. String absolutePath = file_getTheoreticalAbsolutePath(relativePath, parentFolder, LOCAL_PATH_SYNTAX);
  154. dependencies[parentIndex].includes.pushConstruct(absolutePath, lineNumber);
  155. analyzeFromFile(absolutePath);
  156. }
  157. }
  158. }
  159. }
  160. static void analyzeCode(int64_t parentIndex, String content, const ReadableString &parentFolder) {
  161. List<String> tokens;
  162. bool continuingLine = false;
  163. int64_t lineNumber = 0;
  164. string_split_callback(content, U'\n', true, [&parentIndex, &parentFolder, &tokens, &continuingLine, &lineNumber](ReadableString line) {
  165. lineNumber++;
  166. if (line[0] == U'#' || continuingLine) {
  167. tokenize(tokens, line);
  168. // Continuing pre-processing line using \ at the end.
  169. continuingLine = line[string_length(line) - 1] == U'\\';
  170. } else {
  171. continuingLine = false;
  172. }
  173. if (!continuingLine && tokens.length() > 0) {
  174. interpretPreprocessing(parentIndex, tokens, parentFolder, lineNumber);
  175. tokens.clear();
  176. }
  177. });
  178. }
  179. void analyzeFromFile(const ReadableString& absolutePath) {
  180. if (findDependency(absolutePath) != -1) {
  181. // Already analyzed the current entry. Abort to prevent duplicate dependencies.
  182. return;
  183. }
  184. int lastDotIndex = string_findLast(absolutePath, U'.');
  185. if (lastDotIndex != -1) {
  186. Extension extension = extensionFromString(string_after(absolutePath, lastDotIndex));
  187. if (extension != Extension::Unknown) {
  188. // The old length will be the new dependency's index.
  189. int64_t parentIndex = dependencies.length();
  190. // Get the file's binary content.
  191. Buffer fileBuffer = file_loadBuffer(absolutePath);
  192. // Get the checksum
  193. uint64_t contentChecksum = checksum(fileBuffer);
  194. dependencies.pushConstruct(absolutePath, extension, contentChecksum);
  195. if (extension == Extension::H || extension == Extension::Hpp) {
  196. // The current file is a header, so look for an implementation with the corresponding name.
  197. String sourcePath = findSourceFile(absolutePath, extension == Extension::H, true);
  198. // If found:
  199. if (string_length(sourcePath) > 0) {
  200. // Remember that anything using the header will have to link with the implementation.
  201. dependencies[parentIndex].links.pushConstruct(sourcePath);
  202. // Look for included headers in the implementation file.
  203. analyzeFromFile(sourcePath);
  204. }
  205. }
  206. // Interpret the file's content.
  207. analyzeCode(parentIndex, string_loadFromMemory(fileBuffer), file_getRelativeParentFolder(absolutePath));
  208. }
  209. }
  210. }
  211. static void debugPrintDependencyList(const List<Connection> &connnections, const ReadableString verb) {
  212. for (int c = 0; c < connnections.length(); c++) {
  213. int64_t lineNumber = connnections[c].lineNumber;
  214. if (lineNumber != -1) {
  215. printText(U" @", lineNumber, U"\t");
  216. } else {
  217. printText(U" \t");
  218. }
  219. printText(U" ", verb, U" ", file_getPathlessName(connnections[c].path), U"\n");
  220. }
  221. }
  222. void printDependencies() {
  223. for (int d = 0; d < dependencies.length(); d++) {
  224. printText(U"* ", file_getPathlessName(dependencies[d].path), U"\n");
  225. debugPrintDependencyList(dependencies[d].includes, U"including");
  226. debugPrintDependencyList(dependencies[d].links, U"linking");
  227. }
  228. }
  229. static ScriptLanguage identifyLanguage(const ReadableString filename) {
  230. String scriptExtension = string_upperCase(file_getExtension(filename));
  231. if (string_match(scriptExtension, U"BAT")) {
  232. return ScriptLanguage::Batch;
  233. } else if (string_match(scriptExtension, U"SH")) {
  234. return ScriptLanguage::Bash;
  235. } else {
  236. throwError(U"Could not identify the scripting language of ", filename, U". Use *.bat or *.sh.\n");
  237. return ScriptLanguage::Unknown;
  238. }
  239. }
  240. static void script_printMessage(String &output, ScriptLanguage language, const ReadableString message) {
  241. if (language == ScriptLanguage::Batch) {
  242. string_append(output, U"echo ", message, U"\n");
  243. } else if (language == ScriptLanguage::Bash) {
  244. string_append(output, U"echo ", message, U"\n");
  245. }
  246. }
  247. static void script_executeLocalBinary(String &output, ScriptLanguage language, const ReadableString code) {
  248. if (language == ScriptLanguage::Batch) {
  249. string_append(output, code, ".exe\n");
  250. } else if (language == ScriptLanguage::Bash) {
  251. string_append(output, file_combinePaths(U".", code), U";\n");
  252. }
  253. }
  254. static void traverserHeaderChecksums(uint64_t &target, int64_t dependencyIndex) {
  255. // Use checksums from headers
  256. for (int h = 0; h < dependencies[dependencyIndex].includes.length(); h++) {
  257. int64_t includedIndex = dependencies[dependencyIndex].includes[h].dependencyIndex;
  258. if (!dependencies[includedIndex].visited) {
  259. //printText(U" traverserHeaderChecksums(", includedIndex, U") ", dependencies[includedIndex].path, "\n");
  260. // Bitwise exclusive or is both order independent and entropy preserving for non-repeated content.
  261. target = target ^ dependencies[includedIndex].contentChecksum;
  262. // Just have to make sure that the same checksum is not used twice.
  263. dependencies[includedIndex].visited = true;
  264. // Use checksums from headers recursively
  265. traverserHeaderChecksums(target, includedIndex);
  266. }
  267. }
  268. }
  269. static uint64_t getCombinedChecksum(int64_t dependencyIndex) {
  270. //printText(U"getCombinedChecksum(", dependencyIndex, U") ", dependencies[dependencyIndex].path, "\n");
  271. for (int d = 0; d < dependencies.length(); d++) {
  272. dependencies[d].visited = false;
  273. }
  274. dependencies[dependencyIndex].visited = true;
  275. uint64_t result = dependencies[dependencyIndex].contentChecksum;
  276. traverserHeaderChecksums(result, dependencyIndex);
  277. return result;
  278. }
  279. struct SourceObject {
  280. uint64_t identityChecksum = 0; // Identification number for the object's name.
  281. uint64_t combinedChecksum = 0; // Combined content of the source file and all included headers recursively.
  282. String sourcePath, objectPath;
  283. SourceObject(const ReadableString& sourcePath, const ReadableString& tempFolder, const ReadableString& identity, int64_t dependencyIndex)
  284. : identityChecksum(checksum(identity)), combinedChecksum(getCombinedChecksum(dependencyIndex)), sourcePath(sourcePath) {
  285. // By making the content checksum a part of the name, one can switch back to an older version without having to recompile everything again.
  286. // Just need to clean the temporary folder once in a while because old versions can take a lot of space.
  287. this->objectPath = file_combinePaths(tempFolder, string_combine(U"dfpsr_", this->identityChecksum, U"_", this->combinedChecksum, U".o"));
  288. }
  289. };
  290. void generateCompilationScript(const Machine &settings, const ReadableString& projectPath) {
  291. ReadableString scriptPath = getFlag(settings, U"ScriptPath", U"");
  292. ReadableString tempFolder = file_getAbsoluteParentFolder(scriptPath);
  293. if (string_length(scriptPath) == 0) {
  294. printText(U"No script path was given, skipping script generation\n");
  295. return;
  296. }
  297. ScriptLanguage language = identifyLanguage(scriptPath);
  298. scriptPath = file_getTheoreticalAbsolutePath(scriptPath, projectPath);
  299. // The compiler is often a global alias, so the user must supply either an alias or an absolute path.
  300. ReadableString compilerName = getFlag(settings, U"Compiler", U"g++"); // Assume g++ as the compiler if not specified.
  301. ReadableString compileFrom = getFlag(settings, U"CompileFrom", U"");
  302. // Check if the build system was asked to run the compiler from a specific folder.
  303. bool changePath = (string_length(compileFrom) > 0);
  304. if (changePath) {
  305. printText(U"Using ", compilerName, " as the compiler executed from ", compileFrom, ".\n");
  306. } else {
  307. printText(U"Using ", compilerName, " as the compiler from the current directory.\n");
  308. }
  309. // Convert lists of linker and compiler flags into strings.
  310. // TODO: Give a warning if two contradictory flags are used, such as optimization levels and language versions.
  311. // TODO: Make sure that no spaces are inside of the flags, because that can mess up detection of pre-existing and contradictory arguments.
  312. String compilerFlags;
  313. for (int i = 0; i < settings.compilerFlags.length(); i++) {
  314. string_append(compilerFlags, " ", settings.compilerFlags[i]);
  315. }
  316. // TODO: Warn if -DNDEBUG, -DDEBUG, or optimization levels are given directly.
  317. // Using the variables instead is both more flexible by accepting input arguments
  318. // and keeping the same format to better reuse compiled objects.
  319. ReadableString debugMode = getFlag(settings, U"Debug", U"0");
  320. if (string_match(debugMode, U"0")) {
  321. printText(U"Building with release mode.\n");
  322. string_append(compilerFlags, " -DNDEBUG");
  323. } else {
  324. printText(U"Building with debug mode.\n");
  325. string_append(compilerFlags, " -DDEBUG");
  326. }
  327. ReadableString optimizationLevel = getFlag(settings, U"Optimization", U"2");
  328. printText(U"Building with optimization level ", optimizationLevel, U".\n");
  329. string_append(compilerFlags, " -O", optimizationLevel);
  330. String linkerFlags;
  331. for (int i = 0; i < settings.linkerFlags.length(); i++) {
  332. string_append(linkerFlags, " -l", settings.linkerFlags[i]);
  333. }
  334. // Interpret ProgramPath relative to the project path.
  335. ReadableString programPath = getFlag(settings, U"ProgramPath", language == ScriptLanguage::Batch ? U"program.exe" : U"program");
  336. programPath = file_getTheoreticalAbsolutePath(programPath, projectPath);
  337. String output;
  338. if (language == ScriptLanguage::Batch) {
  339. string_append(output, U"@echo off\n\n");
  340. } else if (language == ScriptLanguage::Bash) {
  341. string_append(output, U"#!/bin/bash\n\n");
  342. } else {
  343. printText(U"The type of script could not be identified for ", scriptPath, U"!\nUse *.bat for Batch or *.sh for Bash.\n");
  344. return;
  345. }
  346. List<SourceObject> sourceObjects;
  347. bool hasSourceCode = false;
  348. bool needCppCompiler = false;
  349. for (int d = 0; d < dependencies.length(); d++) {
  350. Extension extension = dependencies[d].extension;
  351. if (extension == Extension::Cpp) {
  352. needCppCompiler = true;
  353. }
  354. if (extension == Extension::C || extension == Extension::Cpp) {
  355. // Dependency paths are already absolute from the recursive search.
  356. String sourcePath = dependencies[d].path;
  357. String identity = string_combine(sourcePath, compilerFlags);
  358. sourceObjects.pushConstruct(sourcePath, tempFolder, identity, d);
  359. if (file_getEntryType(sourcePath) != EntryType::File) {
  360. throwError(U"The source file ", sourcePath, U" could not be found!\n");
  361. } else {
  362. hasSourceCode = true;
  363. }
  364. }
  365. }
  366. if (hasSourceCode) {
  367. // TODO: Give a warning if a known C compiler incapable of handling C++ is given C++ source code when needCppCompiler is true.
  368. if (changePath) {
  369. // Go into the requested folder.
  370. if (language == ScriptLanguage::Batch) {
  371. string_append(output, "pushd ", compileFrom, "\n");
  372. } else if (language == ScriptLanguage::Bash) {
  373. string_append(output, U"(cd ", compileFrom, ";\n");
  374. }
  375. }
  376. String allObjects;
  377. for (int i = 0; i < sourceObjects.length(); i++) {
  378. if (language == ScriptLanguage::Batch) {
  379. string_append(output, U"if exist ", sourceObjects[i].objectPath, U" (\n");
  380. } else if (language == ScriptLanguage::Bash) {
  381. string_append(output, U"if [ -e \"", sourceObjects[i].objectPath, U"\" ]; then\n");
  382. }
  383. script_printMessage(output, language, string_combine(U"Reusing ", sourceObjects[i].sourcePath, U" ID:", sourceObjects[i].identityChecksum, U"."));
  384. if (language == ScriptLanguage::Batch) {
  385. string_append(output, U") else (\n");
  386. } else if (language == ScriptLanguage::Bash) {
  387. string_append(output, U"else\n");
  388. }
  389. script_printMessage(output, language, string_combine(U"Compiling ", sourceObjects[i].sourcePath, U" ID:", sourceObjects[i].identityChecksum, U" with ", compilerFlags, U"."));
  390. string_append(output, compilerName, compilerFlags, U" -c ", sourceObjects[i].sourcePath, U" -o ", sourceObjects[i].objectPath, U"\n");
  391. if (language == ScriptLanguage::Batch) {
  392. string_append(output, ")\n");
  393. } else if (language == ScriptLanguage::Bash) {
  394. string_append(output, U"fi\n");
  395. }
  396. // Remember each object name for linking.
  397. string_append(allObjects, U" ", sourceObjects[i].objectPath);
  398. }
  399. script_printMessage(output, language, string_combine(U"Linking with ", linkerFlags, U"."));
  400. string_append(output, compilerName, allObjects, linkerFlags, U" -o ", programPath, U"\n");
  401. if (changePath) {
  402. // Get back to the previous folder.
  403. if (language == ScriptLanguage::Batch) {
  404. string_append(output, "popd\n");
  405. } else if (language == ScriptLanguage::Bash) {
  406. string_append(output, U")\n");
  407. }
  408. }
  409. script_printMessage(output, language, U"Done compiling.");
  410. script_printMessage(output, language, string_combine(U"Starting ", programPath));
  411. script_executeLocalBinary(output, language, programPath);
  412. script_printMessage(output, language, U"The program terminated.");
  413. if (language == ScriptLanguage::Batch) {
  414. // Windows might close the window before you have time to read the results or error messages of a CLI application, so pause at the end.
  415. string_append(output, U"pause\n");
  416. }
  417. if (language == ScriptLanguage::Batch) {
  418. string_save(scriptPath, output);
  419. } else if (language == ScriptLanguage::Bash) {
  420. string_save(scriptPath, output, CharacterEncoding::BOM_UTF8, LineEncoding::Lf);
  421. }
  422. } else {
  423. printText("Filed to find any source code to compile.\n");
  424. }
  425. }