expression.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
  1. 
  2. #include "expression.h"
  3. #include "../../../DFPSR/api/stringAPI.h"
  4. using namespace dsr;
  5. POIndex::POIndex() {}
  6. POIndex::POIndex(int16_t precedenceIndex, int16_t operationIndex) : precedenceIndex(precedenceIndex), operationIndex(operationIndex) {}
  7. Operation::Operation(int16_t symbolIndex, std::function<String(ReadableString, ReadableString)> action)
  8. : symbolIndex(symbolIndex), action(action) {
  9. }
  10. static int16_t addOperation(ExpressionSyntax &targetSyntax, int16_t symbolIndex, std::function<String(ReadableString, ReadableString)> action) {
  11. int16_t precedenceIndex = targetSyntax.precedences.length() - 1;
  12. int16_t operationIndex = targetSyntax.precedences.last().operations.length();
  13. // TODO: Only allow assigning a symbol once per prefix, infix and postfix.
  14. targetSyntax.symbols[symbolIndex].operations[targetSyntax.precedences.last().notation] = POIndex(precedenceIndex, operationIndex);
  15. targetSyntax.precedences.last().operations.pushConstruct(symbolIndex, action);
  16. return operationIndex;
  17. }
  18. Precedence::Precedence(Notation notation, Associativity associativity)
  19. : notation(notation), associativity(associativity) {}
  20. Symbol::Symbol(const ReadableString &token, SymbolType symbolType, int32_t depthOffset, DsrChar endsWith, DsrChar escapes)
  21. : token(token), symbolType(symbolType), depthOffset(depthOffset), endsWith(endsWith), escapes(escapes) {}
  22. ReadableString expression_getToken(const List<String> &tokens, int64_t index, const ReadableString &outside) {
  23. if (0 <= index && index < tokens.length()) {
  24. return tokens[index];
  25. } else {
  26. return outside;
  27. }
  28. }
  29. int64_t expression_interpretAsInteger(const ReadableString &value) {
  30. if (string_length(value) == 0) {
  31. return 0;
  32. } else {
  33. return string_toInteger(value);
  34. }
  35. }
  36. String expression_unwrapIfNeeded(const ReadableString &text) {
  37. if (text[0] == U'\"') {
  38. return string_unmangleQuote(text);
  39. } else {
  40. return text;
  41. }
  42. }
  43. static int16_t createSymbol(ExpressionSyntax &targetSyntax, const ReadableString &token, SymbolType symbolType, int32_t depthOffset, DsrChar endsWith, DsrChar escapes) {
  44. int64_t oldCount = targetSyntax.symbols.length();
  45. if (oldCount >= 32767) throwError(U"Can't declare more than 32767 symbols in a syntax, because they are referenced using 16-bit integers!\n");
  46. if (string_length(token) < 1) throwError(U"Can't declare a symbol without any characters, because the empty symbol exists between every character!\n");
  47. if (symbolType != SymbolType::Keyword) {
  48. if (targetSyntax.keywordCount > 0) throwError(U"Can't declare atomic symbols after the first keyword!\n");
  49. if (targetSyntax.atomicCount > 0 && string_length(targetSyntax.symbols[oldCount - 1].token) < string_length(token)) {
  50. throwError(U"Each following atomic token must be shorter or equal to the previous atomic token, so that longest match first can be applied!\n");
  51. }
  52. targetSyntax.atomicCount++;
  53. } else {
  54. targetSyntax.keywordCount++;
  55. }
  56. targetSyntax.symbols.pushConstruct(token, symbolType, depthOffset, endsWith, escapes);
  57. return (int16_t)oldCount;
  58. }
  59. #define CREATE_KEYWORD(TOKEN) createSymbol(*this, TOKEN, SymbolType::Keyword, 0, -1, -1);
  60. #define CREATE_ATOMIC(TOKEN) createSymbol(*this, TOKEN, SymbolType::Atomic, 0, -1, -1);
  61. #define CREATE_LEFT(TOKEN) createSymbol(*this, TOKEN, SymbolType::Atomic, 1, -1, -1);
  62. #define CREATE_RIGHT(TOKEN) createSymbol(*this, TOKEN, SymbolType::Atomic, -1, -1, -1);
  63. #define CREATE_LITERAL(START_TOKEN, END_CHAR, ESCAPE_CHAR) createSymbol(*this, START_TOKEN, SymbolType::Atomic, 0, END_CHAR, ESCAPE_CHAR);
  64. #define CREATE_VOID(TOKEN) createSymbol(*this, TOKEN, SymbolType::Nothing, 0, -1, -1);
  65. #define CREATE_COMMENT(TOKEN, END_CHAR, ESCAPE_CHAR) createSymbol(*this, TOKEN, SymbolType::Nothing, 0, END_CHAR, ESCAPE_CHAR);
  66. // TODO: Create a way to enter symbols, keywords and operations from the outside to define custom syntax.
  67. // * Using a file or list of symbols is the easiest way to enter them by sorting automatically, but makes it hard to connect the indices with anything useful.
  68. // * Using multiple calls to an API makes it difficult to sort atomic symbols automatically based on length.
  69. ExpressionSyntax::ExpressionSyntax() {
  70. // Symbols must be entered with longest match first, so that they can be used for tokenization.
  71. // Length 2 symbols
  72. int16_t token_lesserEqual = CREATE_ATOMIC(U"<="); // Allowed because both < and = are infix operations, which can not end up on the left or right sides.
  73. int16_t token_greaterEqual = CREATE_ATOMIC(U">="); // Allowed because both > and = are infix operations, which can not end up on the left or right sides.
  74. int16_t token_equal = CREATE_ATOMIC(U"=="); // Allowed because = is an infix operation, which can not end up on the left or right sides.
  75. int16_t token_notEqual = CREATE_ATOMIC(U"!="); // Allowed because ! is a prefix and would not end up on the left side of an assignment.
  76. // Length 1 symbols
  77. int16_t token_plus = CREATE_ATOMIC(U"+");
  78. int16_t token_minus = CREATE_ATOMIC(U"-");
  79. int16_t token_star = CREATE_ATOMIC(U"*");
  80. int16_t token_forwardSlash = CREATE_ATOMIC(U"/");
  81. int16_t token_backSlash = CREATE_ATOMIC(U"\\");
  82. int16_t token_exclamation = CREATE_ATOMIC(U"!");
  83. int16_t token_lesser = CREATE_ATOMIC(U"<");
  84. int16_t token_greater = CREATE_ATOMIC(U">");
  85. int16_t token_ampersand = CREATE_ATOMIC(U"&");
  86. // TODO: Connect scopes to each other for matching
  87. int16_t token_leftParen = CREATE_LEFT(U"(");
  88. int16_t token_leftBracket = CREATE_LEFT(U"[");
  89. int16_t token_leftCurl = CREATE_LEFT(U"{");
  90. int16_t token_rightParen = CREATE_RIGHT(U")");
  91. int16_t token_rightBracket = CREATE_RIGHT(U"]");
  92. int16_t token_rightCurl = CREATE_RIGHT(U"}");
  93. // Breaking
  94. int16_t token_lineBreak = CREATE_ATOMIC(U"\n");
  95. // Nothing
  96. CREATE_VOID(U" ");
  97. CREATE_VOID(U"\t");
  98. CREATE_VOID(U"\v");
  99. CREATE_VOID(U"\f");
  100. CREATE_VOID(U"\r"); // \r\n becomes \n, \n\r becomes \n and \n remains the same. String only using \r to break lines need to be converted into \n linebreaks before use.
  101. // Special tokens
  102. int16_t token_comment = CREATE_COMMENT(U"#", U'\n', -1); // # will begin a comment until the end of the line, without any escape character.
  103. int16_t token_doubleQuote = CREATE_LITERAL(U"\"", U'\"', U'\\'); // " will begin a literal until the next " not preceded by \.
  104. // Keywords that are used in expressions
  105. int16_t token_logical_and = CREATE_KEYWORD(U"and");
  106. int16_t token_logical_or = CREATE_KEYWORD(U"or");
  107. int16_t token_logical_xor = CREATE_KEYWORD(U"xor");
  108. int16_t token_string_match = CREATE_KEYWORD(U"matches");
  109. // Unidentified tokens are treated as identifiers or values with index -1.
  110. // Unlisted keywords can still be tokenized and used for statements, just not used to perform operations in expressions.
  111. // Each symbol can be tied once to prefix, once to infix and once to postfix.
  112. this->precedences.pushConstruct(Notation::Prefix, Associativity::RightToLeft);
  113. // Unary negation
  114. addOperation(*this, token_minus, [](ReadableString lhs, ReadableString rhs) -> String {
  115. return string_combine(-expression_interpretAsInteger(rhs));
  116. });
  117. // Unary logical not
  118. addOperation(*this, token_exclamation, [](ReadableString lhs, ReadableString rhs) -> String {
  119. return string_combine((!expression_interpretAsInteger(rhs)) ? 1 : 0);
  120. });
  121. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  122. // Infix integer multiplication
  123. addOperation(*this, token_star, [](ReadableString lhs, ReadableString rhs) -> String {
  124. return string_combine(expression_interpretAsInteger(lhs) * expression_interpretAsInteger(rhs));
  125. });
  126. // Infix integer division
  127. addOperation(*this, token_forwardSlash, [](ReadableString lhs, ReadableString rhs) -> String {
  128. return string_combine(expression_interpretAsInteger(lhs) / expression_interpretAsInteger(rhs));
  129. });
  130. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  131. // Infix integer addition
  132. addOperation(*this, token_plus, [](ReadableString lhs, ReadableString rhs) -> String {
  133. return string_combine(expression_interpretAsInteger(lhs) + expression_interpretAsInteger(rhs));
  134. });
  135. // Infix integer subtraction
  136. addOperation(*this, token_minus, [](ReadableString lhs, ReadableString rhs) -> String {
  137. return string_combine(expression_interpretAsInteger(lhs) - expression_interpretAsInteger(rhs));
  138. });
  139. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  140. // Infix integer lesser than comparison
  141. addOperation(*this, token_lesser, [](ReadableString lhs, ReadableString rhs) -> String {
  142. return string_combine((expression_interpretAsInteger(lhs) < expression_interpretAsInteger(rhs)) ? 1 : 0);
  143. });
  144. // Infix integer greater than comparison
  145. addOperation(*this, token_greater, [](ReadableString lhs, ReadableString rhs) -> String {
  146. return string_combine((expression_interpretAsInteger(lhs) > expression_interpretAsInteger(rhs)) ? 1 : 0);
  147. });
  148. // Infix integer lesser than or equal to comparison
  149. addOperation(*this, token_lesserEqual, [](ReadableString lhs, ReadableString rhs) -> String {
  150. return string_combine((expression_interpretAsInteger(lhs) <= expression_interpretAsInteger(rhs)) ? 1 : 0);
  151. });
  152. // Infix integer greater than or equal to comparison
  153. addOperation(*this, token_greaterEqual, [](ReadableString lhs, ReadableString rhs) -> String {
  154. return string_combine((expression_interpretAsInteger(lhs) >= expression_interpretAsInteger(rhs)) ? 1 : 0);
  155. });
  156. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  157. // Infix case sensitive string match
  158. addOperation(*this, token_string_match, [](ReadableString lhs, ReadableString rhs) -> String {
  159. return string_combine(string_match(lhs, rhs) ? 1 : 0);
  160. });
  161. // Infix integer equal to comparison
  162. addOperation(*this, token_equal, [](ReadableString lhs, ReadableString rhs) -> String {
  163. return string_combine((expression_interpretAsInteger(lhs) == expression_interpretAsInteger(rhs)) ? 1 : 0);
  164. });
  165. // Infix integer not equal to comparison
  166. addOperation(*this, token_notEqual, [](ReadableString lhs, ReadableString rhs) -> String {
  167. return string_combine((expression_interpretAsInteger(lhs) != expression_interpretAsInteger(rhs)) ? 1 : 0);
  168. });
  169. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  170. // Infix logical and
  171. addOperation(*this, token_logical_and, [](ReadableString lhs, ReadableString rhs) -> String {
  172. return string_combine((expression_interpretAsInteger(lhs) && expression_interpretAsInteger(rhs)) ? 1 : 0);
  173. });
  174. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  175. // Infix logical inclusive or
  176. addOperation(*this, token_logical_or, [](ReadableString lhs, ReadableString rhs) -> String {
  177. return string_combine((expression_interpretAsInteger(lhs) || expression_interpretAsInteger(rhs)) ? 1 : 0);
  178. });
  179. // Infix logical exclusive or
  180. addOperation(*this, token_logical_xor, [](ReadableString lhs, ReadableString rhs) -> String {
  181. return string_combine((!expression_interpretAsInteger(lhs) != !expression_interpretAsInteger(rhs)) ? 1 : 0);
  182. });
  183. this->precedences.pushConstruct(Notation::Infix, Associativity::LeftToRight);
  184. // Infix string concatenation
  185. addOperation(*this, token_ampersand, [](ReadableString lhs, ReadableString rhs) -> String {
  186. return string_combine(lhs, rhs);
  187. });
  188. }
  189. ExpressionSyntax defaultSyntax;
  190. struct TokenInfo {
  191. int32_t depth = -1;
  192. int16_t symbolIndex = -1;
  193. TokenInfo() {}
  194. TokenInfo(int32_t depth, int16_t symbolIndex)
  195. : depth(depth), symbolIndex(symbolIndex) {}
  196. };
  197. static String debugTokens(const List<TokenInfo> &info, int64_t infoStart, const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex) {
  198. String result;
  199. for (int64_t t = startTokenIndex; t <= endTokenIndex; t++) {
  200. int64_t infoIndex = t - infoStart;
  201. if (t > startTokenIndex) {
  202. string_appendChar(result, U' ');
  203. }
  204. string_append(result, tokens[t]);
  205. }
  206. string_append(result, U" : ");
  207. for (int64_t t = startTokenIndex; t <= endTokenIndex; t++) {
  208. int64_t infoIndex = t - infoStart;
  209. if (t > startTokenIndex) {
  210. string_appendChar(result, U' ');
  211. }
  212. string_append(result, "[", info[infoIndex].depth, ",", info[infoIndex].symbolIndex, ",", tokens[t], "]");
  213. }
  214. return result;
  215. }
  216. static String debugTokens(const List<String> &tokens) {
  217. String result;
  218. for (int64_t t = 0; t < tokens.length(); t++) {
  219. if (t > 0) {
  220. string_appendChar(result, U' ');
  221. }
  222. string_append(result, U"[", tokens[t], U"]");
  223. }
  224. return result;
  225. }
  226. static int16_t identifySymbol(const ReadableString &token, const ExpressionSyntax &syntax) {
  227. for (int64_t s = 0; s < syntax.symbols.length(); s++) {
  228. if (syntax.symbols[s].symbolType == SymbolType::Keyword) {
  229. // TODO: Make case insensitive optional for keywords.
  230. if (string_caseInsensitiveMatch(token, syntax.symbols[s].token)) {
  231. return s;
  232. }
  233. } else {
  234. if (string_match(token, syntax.symbols[s].token)) {
  235. return s;
  236. }
  237. }
  238. }
  239. return -1; // Pattern to resolve later.
  240. }
  241. // Returns true iff the symbol can be at the leftmost side of a sub-expression.
  242. static bool validLeftmostSymbol(const Symbol &symbol) {
  243. if (symbol.depthOffset > 0) {
  244. return true; // ( [ { as the left side of a right hand side
  245. } else {
  246. return symbol.operations[Notation::Prefix].operationIndex != -1; // Accept prefix operations on the rightmost side
  247. }
  248. }
  249. // Returns true iff the symbol can be at the rightmost side of a sub-expression.
  250. static bool validRightmostSymbol(const Symbol &symbol) {
  251. if (symbol.depthOffset < 0) {
  252. return true; // Accept ) ] } as the right side of a left hand side
  253. } else {
  254. return symbol.operations[Notation::Postfix].operationIndex != -1; // Accept postfix operations on the rightmost side
  255. }
  256. }
  257. // Returns true iff the symbol can be at the leftmost side of a sub-expression.
  258. static bool validLeftmostToken(int16_t symbolIndex, const ExpressionSyntax &syntax) {
  259. return symbolIndex < 0 || validLeftmostSymbol(syntax.symbols[symbolIndex]);
  260. }
  261. // Returns true iff the symbol can be at the rightmost side of a sub-expression.
  262. static bool validRightmostToken(int16_t symbolIndex, const ExpressionSyntax &syntax) {
  263. return symbolIndex < 0 || validRightmostSymbol(syntax.symbols[symbolIndex]);
  264. }
  265. // info is a list of additional information starting with info[0] at tokens[startTokenIndex]
  266. // infoStart is the startTokenIndex of the root evaluation call
  267. static String expression_evaluate_helper(const List<TokenInfo> &info, int64_t infoStart, int64_t currentDepth, const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, const ExpressionSyntax &syntax, std::function<String(ReadableString)> identifierEvaluation) {
  268. //printText(U"Evaluate: ", debugTokens(info, infoStart, tokens, startTokenIndex, endTokenIndex), U"\n");
  269. if (startTokenIndex == endTokenIndex) {
  270. ReadableString first = expression_getToken(tokens, startTokenIndex, U"");
  271. if (string_isInteger(first)) {
  272. return first;
  273. } else if (first[0] == U'\"') {
  274. // TODO: Let the caller unwrap strings.
  275. return string_unmangleQuote(first);
  276. } else {
  277. // Identifier defaulting to empty.
  278. return identifierEvaluation(first);
  279. }
  280. } else {
  281. // Find the outmost operation using recursive descent parsing, in which precedence and direction when going down is reversed relative to order of evaluation when going up.
  282. for (int64_t p = syntax.precedences.length() - 1; p >= 0; p--) {
  283. const Precedence *precedence = &(syntax.precedences[p]);
  284. int64_t leftScanBound = 0;
  285. int64_t rightScanBound = 0;
  286. if (precedence->notation == Notation::Prefix) {
  287. // A prefix can only be used at the start of the current sub-expression
  288. leftScanBound = startTokenIndex;
  289. rightScanBound = startTokenIndex;
  290. //printText("precendence = ", p, U" (prefix)\n");
  291. } else if (precedence->notation == Notation::Infix) {
  292. // Skip ends when looking for infix operations
  293. leftScanBound = startTokenIndex + 1;
  294. rightScanBound = endTokenIndex - 1;
  295. //printText("precendence = ", p, U" (infix)\n");
  296. } else if (precedence->notation == Notation::Postfix) {
  297. // A postfix can only be used at the end of the current sub-expression
  298. leftScanBound = endTokenIndex;
  299. rightScanBound = endTokenIndex;
  300. //printText("precendence = ", p, U" (postfix)\n");
  301. }
  302. int64_t opStep = (precedence->associativity == Associativity::LeftToRight) ? -1 : 1;
  303. int64_t opIndex = (precedence->associativity == Associativity::LeftToRight) ? rightScanBound : leftScanBound;
  304. int64_t stepCount = 1 + rightScanBound - leftScanBound;
  305. for (int64_t i = 0; i < stepCount; i++) {
  306. int64_t infoIndex = opIndex - infoStart;
  307. TokenInfo leftInfo = (opIndex <= startTokenIndex) ? TokenInfo() : info[infoIndex - 1];
  308. TokenInfo currentInfo = info[infoIndex];
  309. TokenInfo rightInfo = (opIndex >= endTokenIndex) ? TokenInfo() : info[infoIndex + 1];
  310. // Only match outmost at currentDepth.
  311. if (currentInfo.depth == currentDepth && currentInfo.symbolIndex > -1) {
  312. // If the current symbol is has an operation in the same notation and precedence, then grab that operation index.
  313. const Symbol *currentSymbol = &(syntax.symbols[currentInfo.symbolIndex]);
  314. if (currentSymbol->operations[precedence->notation].precedenceIndex == p) {
  315. // Resolve the common types of ambiguity that can quickly be resolved and let the other cases fail if the syntax is too ambiguous.
  316. bool validLeft = validRightmostToken(leftInfo.symbolIndex, syntax);
  317. bool validRight = validLeftmostToken(rightInfo.symbolIndex, syntax);
  318. bool valid = true;
  319. if (precedence->notation == Notation::Prefix) {
  320. if (!validRight) valid = false;
  321. } else if (precedence->notation == Notation::Infix) {
  322. if (!validLeft) valid = false;
  323. if (!validRight) valid = false;
  324. } else if (precedence->notation == Notation::Postfix) {
  325. if (!validLeft) valid = false;
  326. }
  327. if (valid) {
  328. const Operation *operation = &(precedence->operations[currentSymbol->operations[precedence->notation].operationIndex]);
  329. String lhs = (precedence->notation == Notation::Prefix) ? U"" : expression_evaluate_helper(info, infoStart, currentDepth, tokens, startTokenIndex, opIndex - 1, syntax, identifierEvaluation);
  330. String rhs = (precedence->notation == Notation::Postfix) ? U"" : expression_evaluate_helper(info, infoStart, currentDepth, tokens, opIndex + 1, endTokenIndex, syntax, identifierEvaluation);
  331. /*
  332. printText(U"Applied ", currentSymbol->token, "\n");
  333. printText(U" currentDepth = ", currentDepth, U"\n");
  334. printText(U" lhs = ", lhs, U"\n");
  335. printText(U" rhs = ", rhs, U"\n");
  336. printText(U" startTokenIndex = ", startTokenIndex, U"\n");
  337. printText(U" leftScanBound = ", leftScanBound, U"\n");
  338. printText(U" rightScanBound = ", rightScanBound, U"\n");
  339. printText(U" endTokenIndex = ", endTokenIndex, U"\n");
  340. printText(U" opStep = ", opStep, U"\n");
  341. printText(U" opIndex = ", opIndex, U"\n");
  342. printText(U" stepCount = ", stepCount, U"\n");
  343. printText(U" notation = ", precedence->notation, U"\n");
  344. printText(U" validLeft(", leftInfo.symbolIndex, U") = ", validLeft, U"\n");
  345. printText(U" validRight(", rightInfo.symbolIndex, U") = ", validRight, U"\n");
  346. */
  347. return operation->action(lhs, rhs);
  348. }
  349. }
  350. }
  351. opIndex += opStep;
  352. }
  353. }
  354. // TODO: Let the caller create a pattern matching operation for these combinations using longest match first.
  355. if (string_match(tokens[startTokenIndex], U"(") && string_match(tokens[endTokenIndex], U")")) {
  356. //printText(U"Unwrapping ()\n");
  357. return expression_evaluate_helper(info, infoStart, currentDepth + 1, tokens, startTokenIndex + 1, endTokenIndex - 1, syntax, identifierEvaluation);
  358. }
  359. }
  360. return U"<ERROR:Invalid expression>";
  361. }
  362. String expression_evaluate(const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, const ExpressionSyntax &syntax, std::function<String(ReadableString)> identifierEvaluation) {
  363. // Scan the whole expression once in the beginning and write useful information into a separate list.
  364. // This allow handling tokens as plain lists of strings while still being able to number what they are.
  365. int32_t depth = 0;
  366. List<TokenInfo> info;
  367. for (int64_t opIndex = startTokenIndex; opIndex <= endTokenIndex; opIndex++) {
  368. String currentToken = tokens[opIndex];
  369. int16_t symbolIndex = identifySymbol(currentToken, syntax);
  370. int32_t depthOffet = (symbolIndex == -1) ? 0 : syntax.symbols[symbolIndex].depthOffset;
  371. if (depthOffet < 0) { // ) ] }
  372. depth += depthOffet;
  373. if (depth < 0) return U"<ERROR:Negative expression depth>";
  374. }
  375. info.pushConstruct(depth, symbolIndex);
  376. if (depthOffet > 0) { // ( [ {
  377. depth += depthOffet;
  378. }
  379. }
  380. if (depth != 0) return U"<ERROR:Unbalanced expression depth>";
  381. return expression_evaluate_helper(info, startTokenIndex, 0, tokens, startTokenIndex, endTokenIndex, syntax, identifierEvaluation);
  382. }
  383. String expression_evaluate(const List<String> &tokens, int64_t startTokenIndex, int64_t endTokenIndex, std::function<String(ReadableString)> identifierEvaluation) {
  384. return expression_evaluate(tokens, startTokenIndex, endTokenIndex, defaultSyntax, identifierEvaluation);
  385. }
  386. String expression_evaluate(const List<String> &tokens, std::function<String(ReadableString)> identifierEvaluation) {
  387. return expression_evaluate(tokens, 0, tokens.length() - 1, defaultSyntax, identifierEvaluation);
  388. }
  389. // Atomic symbols are always case sensitive.
  390. static bool matchAtomicFrom(const ReadableString &sourceText, int64_t location, const ReadableString &symbol) {
  391. for (int64_t l = 0; l < string_length(symbol); l++) {
  392. if (sourceText[location + l] != symbol[l]) {
  393. return false; // No match if a character deviated.
  394. }
  395. }
  396. return true; // Match if we found no contradicting characters.
  397. }
  398. void expression_tokenize(List<String> &targetTokens, const ReadableString &sourceText, const ExpressionSyntax &syntax) {
  399. //printText(U"expression_tokenize(", sourceText, U")\n");
  400. int64_t i = 0;
  401. int64_t keywordStart = 0;
  402. int64_t sourceLength = string_length(sourceText);
  403. while (i < sourceLength) {
  404. bool foundSymbol = false;
  405. for (int64_t s = 0; s < syntax.atomicCount; s++) {
  406. String startToken = syntax.symbols[s].token;
  407. if (matchAtomicFrom(sourceText, i, startToken)) {
  408. if (keywordStart < i) targetTokens.push(string_exclusiveRange(sourceText, keywordStart, i)); // Consume any previous keyword.
  409. int64_t startTokenLength = string_length(startToken);
  410. int64_t startIndex = i;
  411. int64_t exclusiveEndIndex = i + string_length(startToken);
  412. DsrChar endsWith = syntax.symbols[s].endsWith;
  413. DsrChar escapes = syntax.symbols[s].escapes;
  414. i += string_length(startToken);
  415. if (endsWith != -1) {
  416. // Find the end if the token is continuing.
  417. int64_t j;
  418. for (j = i; j < sourceLength; j++) {
  419. if (sourceText[j] == endsWith) {
  420. // Include the last character before ending
  421. j++;
  422. break;
  423. } else if (sourceText[j] == escapes) {
  424. // Jump past the next character when an escape character is met.
  425. j++;
  426. }
  427. }
  428. exclusiveEndIndex = j;
  429. }
  430. if (syntax.symbols[s].symbolType != SymbolType::Nothing) {
  431. // Include the token if it's not whitespace.
  432. targetTokens.push(string_exclusiveRange(sourceText, startIndex, exclusiveEndIndex));
  433. }
  434. i = exclusiveEndIndex;
  435. // Done identifying the symbol.
  436. foundSymbol = true;
  437. keywordStart = i;
  438. break;
  439. }
  440. }
  441. if (!foundSymbol) {
  442. i++;
  443. }
  444. }
  445. if (keywordStart < i) targetTokens.push(string_exclusiveRange(sourceText, keywordStart, i)); // Consume any last keyword.
  446. //printText(U"expression_tokenize finished with ", targetTokens.length(), " tokens\n");
  447. }
  448. void expression_tokenize(List<String> &targetTokens, const ReadableString &sourceText) {
  449. expression_tokenize(targetTokens, sourceText, defaultSyntax);
  450. }
  451. List<String> expression_tokenize(const ReadableString &sourceText, const ExpressionSyntax &syntax) {
  452. List<String> result;
  453. expression_tokenize(result, sourceText, syntax);
  454. return result;
  455. }
  456. List<String> expression_tokenize(const ReadableString &sourceText) {
  457. List<String> result;
  458. expression_tokenize(result, sourceText);
  459. return expression_tokenize(sourceText, defaultSyntax);
  460. }
  461. // -------- Regression tests --------
  462. template<typename TYPE>
  463. inline void appendToken(List<String>& target, const TYPE head) {
  464. target.push(head);
  465. }
  466. template<typename HEAD, typename... TAIL>
  467. inline void appendToken(List<String>& target, const HEAD head, TAIL... tail) {
  468. appendToken(target, head);
  469. appendToken(target, tail...);
  470. }
  471. template<typename... ARGS>
  472. inline List<String> combineTokens(ARGS... args) {
  473. List<String> result;
  474. appendToken(result, args...);
  475. return result;
  476. }
  477. static void expectResult(int64_t &errorCount, const ReadableString &result, const ReadableString &expected) {
  478. if (string_match(result, expected)) {
  479. printText(U"* Passed ", expected, U"\n");
  480. } else {
  481. printText(U" - Failed ", expected, U" with unexpected ", result, U"\n");
  482. errorCount++;
  483. }
  484. }
  485. static void expectResult(int64_t &errorCount, const List<String> &result, const List<String> &expected) {
  486. if (result.length() != expected.length()) {
  487. printText(U" - Failed\n ", debugTokens(expected), U" with unexpected\n ", debugTokens(result), U" of different token count\n");
  488. errorCount++;
  489. return;
  490. }
  491. for (int64_t t = 0; t < expected.length(); t++) {
  492. if (!string_match(expected[t], result[t])) {
  493. printText(U" - Failed\n ", debugTokens(expected), U" with unexpected\n ", debugTokens(result), U"\n");
  494. errorCount++;
  495. return;
  496. }
  497. }
  498. printText(U"* Passed ", debugTokens(expected), U"\n");
  499. }
  500. void expression_runRegressionTests() {
  501. std::function<String(ReadableString)> context = [](ReadableString identifier) -> String {
  502. if (string_caseInsensitiveMatch(identifier, U"x")) {
  503. return U"5";
  504. } else if (string_caseInsensitiveMatch(identifier, U"doorCount")) {
  505. return U"48";
  506. } else if (string_caseInsensitiveMatch(identifier, U"temperature")) {
  507. return U"-18";
  508. } else {
  509. return U"<ERROR:Unresolved identifier>";
  510. }
  511. };
  512. /*for (int64_t s = 0; s < defaultSyntax.symbols.length(); s++) {
  513. printText(U"Symbol ", defaultSyntax.symbols[s].token, U"\n");
  514. if (validLeftmostToken(s, defaultSyntax)) printText(U" Can be leftmost\n");
  515. if (validRightmostToken(s, defaultSyntax)) printText(U" Can be rightmost\n");
  516. }*/
  517. int64_t ec = 0;
  518. // Tokenize
  519. printText(U"Tokenize test\n");
  520. expectResult(ec, expression_tokenize(U"0 "), combineTokens(U"0"));
  521. expectResult(ec, expression_tokenize(U"first line\nsecond line"), combineTokens(U"first", U"line", U"\n", U"second", U"line"));
  522. expectResult(ec, expression_tokenize(U"#A comment\nfirst line\nsecond line"), combineTokens(U"first", U"line", U"\n", U"second", U"line"));
  523. expectResult(ec, expression_tokenize(U"5+(7-8)"), combineTokens(U"5", U"+", U"(", U"7", U"-", U"8", U")"));
  524. expectResult(ec, expression_tokenize(U"identifier keyword"), combineTokens(U"identifier", U"keyword"));
  525. expectResult(ec, expression_tokenize(U"identifier+keyword"), combineTokens(U"identifier", U"+", U"keyword"));
  526. expectResult(ec, expression_tokenize(U"\t\tidentifier + keyword "), combineTokens(U"identifier", U"+", U"keyword"));
  527. expectResult(ec, expression_tokenize(U"\" My string content \" \t+ \"My other string\""), combineTokens(U"\" My string content \"", U"+", U"\"My other string\""));
  528. expectResult(ec, expression_tokenize(U"\" My string content\n \" \t+ \"My other\n string\""), combineTokens(U"\" My string content\n \"", U"+", U"\"My other\n string\""));
  529. expectResult(ec, expression_tokenize(U" \" My string content\n \" # Comment \n + \"My other\n string\" "), combineTokens(U"\" My string content\n \"", U"+", U"\"My other\n string\""));
  530. // Evaluate from tokens
  531. printText(U"Evaluate from tokens test\n");
  532. expectResult(ec, expression_evaluate(combineTokens(U""), context), U"<ERROR:Unresolved identifier>");
  533. expectResult(ec, expression_evaluate(combineTokens(U"0"), context), U"0");
  534. expectResult(ec, expression_evaluate(combineTokens(U"(", U"19", U")"), context), U"19");
  535. expectResult(ec, expression_evaluate(combineTokens(U"(", U"2", U"+", U"4", U")"), context), U"6");
  536. expectResult(ec, expression_evaluate(combineTokens(U"3"), context), U"3");
  537. expectResult(ec, expression_evaluate(combineTokens(U"-5"), context), U"-5");
  538. expectResult(ec, expression_evaluate(combineTokens(U"-", U"32"), context), U"-32");
  539. expectResult(ec, expression_evaluate(combineTokens(U"3", U"+", U"6"), context), U"9");
  540. expectResult(ec, expression_evaluate(combineTokens(U"x"), context), U"5");
  541. expectResult(ec, expression_evaluate(combineTokens(U"doorCount"), context), U"48");
  542. expectResult(ec, expression_evaluate(combineTokens(U"temperature"), context), U"-18");
  543. expectResult(ec, expression_evaluate(combineTokens(U"nonsense"), context), U"<ERROR:Unresolved identifier>");
  544. expectResult(ec, expression_evaluate(combineTokens(U"6", U"*", U"2", U"+", U"4"), context), U"16");
  545. expectResult(ec, expression_evaluate(combineTokens(U"4", U"+", U"6", U"*", U"2"), context), U"16");
  546. expectResult(ec, expression_evaluate(combineTokens(U"4", U"+", U"(", U"6", U"*", U"2", U")"), context), U"16");
  547. expectResult(ec, expression_evaluate(combineTokens(U"(", U"4", U"+", U"6", U")", U"*", U"2"), context), U"20");
  548. expectResult(ec, expression_evaluate(combineTokens(U"5", U"+", U"-", U"7"), context), U"-2");
  549. expectResult(ec, expression_evaluate(combineTokens(U"5", U"+", U"(", U"-", U"7", U")"), context), U"-2");
  550. expectResult(ec, expression_evaluate(combineTokens(U"5", U"+", U"(", U"-7", U")"), context), U"-2");
  551. expectResult(ec, expression_evaluate(combineTokens(U"5", U"+", U"-7"), context), U"-2");
  552. expectResult(ec, expression_evaluate(combineTokens(U"5", U"-", U"-", U"7"), context), U"12");
  553. expectResult(ec, expression_evaluate(combineTokens(U"5", U"&", U"-", U"7"), context), U"5-7");
  554. expectResult(ec, expression_evaluate(combineTokens(U"(", U"6", U"+", U"8", U")", U"/", U"(", U"9", U"-", U"2", U")"), context), U"2");
  555. expectResult(ec, expression_evaluate(combineTokens(U"(", U"6", U"+", U"8", U")", U"*", U"(", U"9", U"-", U"2", U")"), context), U"98");
  556. expectResult(ec, expression_evaluate(combineTokens(U"&", U"-", U"7"), context), U"<ERROR:Invalid expression>");
  557. expectResult(ec, expression_evaluate(combineTokens(U"(", U"-7"), context), U"<ERROR:Unbalanced expression depth>");
  558. expectResult(ec, expression_evaluate(combineTokens(U")", U"3"), context), U"<ERROR:Negative expression depth>");
  559. expectResult(ec, expression_evaluate(combineTokens(U"[", U"8"), context), U"<ERROR:Unbalanced expression depth>");
  560. expectResult(ec, expression_evaluate(combineTokens(U"]", U"65"), context), U"<ERROR:Negative expression depth>");
  561. expectResult(ec, expression_evaluate(combineTokens(U"{", U"12"), context), U"<ERROR:Unbalanced expression depth>");
  562. expectResult(ec, expression_evaluate(combineTokens(U"}", U"0"), context), U"<ERROR:Negative expression depth>");
  563. expectResult(ec, expression_evaluate(combineTokens(U"12", U"("), context), U"<ERROR:Unbalanced expression depth>");
  564. expectResult(ec, expression_evaluate(combineTokens(U"2", U")"), context), U"<ERROR:Negative expression depth>");
  565. expectResult(ec, expression_evaluate(combineTokens(U"-5", U"["), context), U"<ERROR:Unbalanced expression depth>");
  566. expectResult(ec, expression_evaluate(combineTokens(U"6", U"]"), context), U"<ERROR:Negative expression depth>");
  567. expectResult(ec, expression_evaluate(combineTokens(U"-47", U"{"), context), U"<ERROR:Unbalanced expression depth>");
  568. expectResult(ec, expression_evaluate(combineTokens(U"645", U"}"), context), U"<ERROR:Negative expression depth>");
  569. expectResult(ec, expression_evaluate(combineTokens(U"5", U")", U"+", U"(", U"-7"), context), U"<ERROR:Negative expression depth>");
  570. // Tokenize and evaluate
  571. printText(U"Tokenize and evaluate test\n");
  572. expectResult(ec, expression_evaluate(expression_tokenize(U"0 "), context), U"0");
  573. expectResult(ec, expression_evaluate(expression_tokenize(U"(19)"), context), U"19");
  574. expectResult(ec, expression_evaluate(expression_tokenize(U"( 2+4)"), context), U"6");
  575. expectResult(ec, expression_evaluate(expression_tokenize(U"3"), context), U"3");
  576. expectResult(ec, expression_evaluate(expression_tokenize(U"- 5"), context), U"-5");
  577. expectResult(ec, expression_evaluate(expression_tokenize(U" -32"), context), U"-32");
  578. expectResult(ec, expression_evaluate(expression_tokenize(U"3+ 6"), context), U"9");
  579. expectResult(ec, expression_evaluate(expression_tokenize(U"x\t"), context), U"5");
  580. expectResult(ec, expression_evaluate(expression_tokenize(U"doorCount"), context), U"48");
  581. expectResult(ec, expression_evaluate(expression_tokenize(U"temperature"), context), U"-18");
  582. expectResult(ec, expression_evaluate(expression_tokenize(U"nonsense"), context), U"<ERROR:Unresolved identifier>");
  583. expectResult(ec, expression_evaluate(expression_tokenize(U"6*2+4"), context), U"16");
  584. expectResult(ec, expression_evaluate(expression_tokenize(U"4+ 6*2"), context), U"16");
  585. expectResult(ec, expression_evaluate(expression_tokenize(U"4+(6* 2)"), context), U"16");
  586. expectResult(ec, expression_evaluate(expression_tokenize(U"(4+6)*2"), context), U"20");
  587. expectResult(ec, expression_evaluate(expression_tokenize(U"5+- 7"), context), U"-2");
  588. expectResult(ec, expression_evaluate(expression_tokenize(U"5+(-7)"), context), U"-2");
  589. expectResult(ec, expression_evaluate(expression_tokenize(U"5+(-7)"), context), U"-2");
  590. expectResult(ec, expression_evaluate(expression_tokenize(U"5+-7"), context), U"-2");
  591. expectResult(ec, expression_evaluate(expression_tokenize(U"5--7 "), context), U"12");
  592. expectResult(ec, expression_evaluate(expression_tokenize(U"5&-7"), context), U"5-7");
  593. expectResult(ec, expression_evaluate(expression_tokenize(U"(6+8)/(9-2)"), context), U"2");
  594. expectResult(ec, expression_evaluate(expression_tokenize(U"(6+8)*(9-2)"), context), U"98");
  595. expectResult(ec, expression_evaluate(expression_tokenize(U" &-7"), context), U"<ERROR:Invalid expression>");
  596. expectResult(ec, expression_evaluate(expression_tokenize(U"(- 7"), context), U"<ERROR:Unbalanced expression depth>");
  597. expectResult(ec, expression_evaluate(expression_tokenize(U")3"), context), U"<ERROR:Negative expression depth>");
  598. expectResult(ec, expression_evaluate(expression_tokenize(U"[8"), context), U"<ERROR:Unbalanced expression depth>");
  599. expectResult(ec, expression_evaluate(expression_tokenize(U"] 65"), context), U"<ERROR:Negative expression depth>");
  600. expectResult(ec, expression_evaluate(expression_tokenize(U"{12"), context), U"<ERROR:Unbalanced expression depth>");
  601. expectResult(ec, expression_evaluate(expression_tokenize(U"}0"), context), U"<ERROR:Negative expression depth>");
  602. expectResult(ec, expression_evaluate(expression_tokenize(U"12("), context), U"<ERROR:Unbalanced expression depth>");
  603. expectResult(ec, expression_evaluate(expression_tokenize(U"2)"), context), U"<ERROR:Negative expression depth>");
  604. expectResult(ec, expression_evaluate(expression_tokenize(U"-5["), context), U"<ERROR:Unbalanced expression depth>");
  605. expectResult(ec, expression_evaluate(expression_tokenize(U"6]"), context), U"<ERROR:Negative expression depth>");
  606. expectResult(ec, expression_evaluate(expression_tokenize(U"-47 {"), context), U"<ERROR:Unbalanced expression depth>");
  607. expectResult(ec, expression_evaluate(expression_tokenize(U"645}"), context), U"<ERROR:Negative expression depth>");
  608. expectResult(ec, expression_evaluate(expression_tokenize(U"5)+(-7"), context), U"<ERROR:Negative expression depth>");
  609. printText(U"Completed regression tests of expressions with ", ec, U" errors in total.\n");
  610. }