Lexer.cs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. /*
  2. The MIT License (MIT)
  3. Copyright (c) 2015-2017 Secret Lab Pty. Ltd. and Yarn Spinner contributors.
  4. Permission is hereby granted, free of charge, to any person obtaining a copy
  5. of this software and associated documentation files (the "Software"), to deal
  6. in the Software without restriction, including without limitation the rights
  7. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. copies of the Software, and to permit persons to whom the Software is
  9. furnished to do so, subject to the following conditions:
  10. The above copyright notice and this permission notice shall be included in all
  11. copies or substantial portions of the Software.
  12. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18. SOFTWARE.
  19. */
  20. using System;
  21. using System.Text.RegularExpressions;
  22. using System.Collections.Generic;
  23. namespace Yarn {
  24. internal class TokeniserException : InvalidOperationException {
  25. public int lineNumber;
  26. public int columnNumber;
  27. public TokeniserException (string message) : base (message) {}
  28. public TokeniserException (int lineNumber, int columnNumber, string message)
  29. : base(string.Format ("{0}:{1}: {2}", lineNumber, columnNumber, message))
  30. {
  31. this.lineNumber = lineNumber;
  32. this.columnNumber = columnNumber;
  33. }
  34. public static TokeniserException ExpectedTokensFromState (int lineNumber, int columnNumber, Lexer.LexerState state) {
  35. var names = new List<string> ();
  36. foreach (var tokenRule in state.tokenRules) {
  37. names.Add (tokenRule.type.ToString ());
  38. }
  39. string nameList;
  40. if (names.Count > 1) {
  41. nameList = String.Join (", ", names.ToArray (), 0, names.Count - 1);
  42. nameList += ", or " + names [names.Count - 1];
  43. } else {
  44. nameList = names [0];
  45. }
  46. var message = string.Format ("Expected {0}", nameList);
  47. return new TokeniserException (lineNumber, columnNumber, message);
  48. }
  49. }
  50. // save some typing, we deal with lists of tokens a LOT
  51. internal class TokenList : List<Token> {
  52. // quick constructor to make it easier to create
  53. // TokenLists with a list of tokens
  54. public TokenList (params Token[] tokens) : base()
  55. {
  56. AddRange(tokens);
  57. }
  58. }
  59. internal enum TokenType {
  60. // Special tokens
  61. Whitespace,
  62. Indent,
  63. Dedent,
  64. EndOfLine,
  65. EndOfInput,
  66. // Numbers. Everybody loves a number
  67. Number,
  68. // Strings. Everybody also loves a string
  69. String,
  70. // '#'
  71. TagMarker,
  72. // Command syntax ("<<foo>>")
  73. BeginCommand,
  74. EndCommand,
  75. // Variables ("$foo")
  76. Variable,
  77. // Shortcut syntax ("->")
  78. ShortcutOption,
  79. // Option syntax ("[[Let's go here|Destination]]")
  80. OptionStart, // [[
  81. OptionDelimit, // |
  82. OptionEnd, // ]]
  83. // Command types (specially recognised command word)
  84. If,
  85. ElseIf,
  86. Else,
  87. EndIf,
  88. Set,
  89. // Boolean values
  90. True,
  91. False,
  92. // The null value
  93. Null,
  94. // Parentheses
  95. LeftParen,
  96. RightParen,
  97. // Parameter delimiters
  98. Comma,
  99. // Operators
  100. EqualTo, // ==, eq, is
  101. GreaterThan, // >, gt
  102. GreaterThanOrEqualTo, // >=, gte
  103. LessThan, // <, lt
  104. LessThanOrEqualTo, // <=, lte
  105. NotEqualTo, // !=, neq
  106. // Logical operators
  107. Or, // ||, or
  108. And, // &&, and
  109. Xor, // ^, xor
  110. Not, // !, not
  111. // this guy's special because '=' can mean either 'equal to'
  112. // or 'becomes' depending on context
  113. EqualToOrAssign, // =, to
  114. UnaryMinus, // -; this is differentiated from Minus
  115. // when parsing expressions
  116. Add, // +
  117. Minus, // -
  118. Multiply, // *
  119. Divide, // /
  120. Modulo, // %
  121. AddAssign, // +=
  122. MinusAssign, // -=
  123. MultiplyAssign, // *=
  124. DivideAssign, // /=
  125. Comment, // a run of text that we ignore
  126. Identifier, // a single word (used for functions)
  127. Text // a run of text until we hit other syntax
  128. }
  129. // A parsed token.
  130. internal class Token {
  131. // The token itself
  132. public TokenType type;
  133. public string value; // optional
  134. // Where we found this token
  135. public int lineNumber;
  136. public int columnNumber;
  137. public string context;
  138. public bool delimitsText = false;
  139. // If this is a function in an expression, this is the number
  140. // of parameters that were encountered
  141. public int parameterCount;
  142. // The state that the lexer was in when this token was emitted
  143. public string lexerState;
  144. public Token(TokenType type, Lexer.LexerState lexerState, int lineNumber = -1, int columnNumber = -1, string value=null) {
  145. this.type = type;
  146. this.value = value;
  147. this.lineNumber = lineNumber;
  148. this.columnNumber = columnNumber;
  149. this.lexerState = lexerState.name;
  150. }
  151. public override string ToString() {
  152. if (this.value != null) {
  153. return string.Format("{0} ({1}) at {2}:{3} (state: {4})", type.ToString(), value.ToString(), lineNumber, columnNumber, lexerState);
  154. } else {
  155. return string.Format ("{0} at {1}:{2} (state: {3})", type, lineNumber, columnNumber, lexerState);
  156. }
  157. }
  158. }
  159. internal class Lexer {
  160. internal class LexerState {
  161. public string name;
  162. private Dictionary<TokenType, string> patterns;
  163. public LexerState (Dictionary<TokenType, string> patterns)
  164. {
  165. this.patterns = patterns;
  166. }
  167. public List<TokenRule> tokenRules = new List<TokenRule>();
  168. public TokenRule AddTransition(TokenType type, string entersState = null, bool delimitsText = false) {
  169. var pattern = string.Format (@"\G{0}", patterns [type]);
  170. var rule = new TokenRule (type, new Regex(pattern), entersState, delimitsText);
  171. tokenRules.Add(rule);
  172. return rule;
  173. }
  174. // A "text" rule matches everything that it possibly can, up to ANY of
  175. // the rules that already exist.
  176. public TokenRule AddTextRule (TokenType type, string entersState = null)
  177. {
  178. if (containsTextRule) {
  179. throw new InvalidOperationException ("State already contains a text rule");
  180. }
  181. var delimiterRules = new List<string>();
  182. foreach (var otherRule in tokenRules) {
  183. if (otherRule.delimitsText == true)
  184. delimiterRules.Add (string.Format ("({0})", otherRule.regex.ToString().Substring(2)));
  185. }
  186. // Create a regex that matches all text up to but not including
  187. // any of the delimiter rules
  188. var pattern = string.Format (@"\G((?!{0}).)*",
  189. string.Join ("|", delimiterRules.ToArray()));
  190. var rule = AddTransition(type, entersState);
  191. rule.regex = new Regex (pattern);
  192. rule.isTextRule = true;
  193. return rule;
  194. }
  195. public bool containsTextRule {
  196. get {
  197. foreach (var rule in tokenRules) {
  198. if (rule.isTextRule)
  199. return true;
  200. }
  201. return false;
  202. }
  203. }
  204. public bool setTrackNextIndentation = false;
  205. }
  206. internal class TokenRule {
  207. public Regex regex = null;
  208. // set to null if it should stay in the same state
  209. public string entersState;
  210. public TokenType type;
  211. public bool isTextRule = false;
  212. public bool delimitsText = false;
  213. public TokenRule (TokenType type, Regex regex, string entersState = null, bool delimitsText = false)
  214. {
  215. this.regex = regex;
  216. this.entersState = entersState;
  217. this.type = type;
  218. this.delimitsText = delimitsText;
  219. }
  220. public override string ToString ()
  221. {
  222. return string.Format (string.Format ("[TokenRule: {0} - {1}]", type, this.regex));
  223. }
  224. }
  225. // Single-line comments. If this is encountered at any point, the rest of the line is skipped.
  226. const string LINE_COMMENT = "//";
  227. Dictionary<string, LexerState> states;
  228. LexerState defaultState;
  229. LexerState currentState;
  230. // tracks indentation levels, and whether an
  231. // indent token was emitted for each level
  232. Stack<KeyValuePair<int,bool>> indentationStack;
  233. bool shouldTrackNextIndentation;
  234. public Lexer ()
  235. {
  236. CreateStates ();
  237. }
  238. void CreateStates ()
  239. {
  240. var patterns = new Dictionary<TokenType, string> ();
  241. patterns[TokenType.Text] = ".*";
  242. patterns[TokenType.Number] = @"\-?[0-9]+(\.[0-9+])?";
  243. patterns[TokenType.String] = @"""([^""\\]*(?:\\.[^""\\]*)*)""";
  244. patterns[TokenType.TagMarker] = @"\#";
  245. patterns[TokenType.LeftParen] = @"\(";
  246. patterns[TokenType.RightParen] = @"\)";
  247. patterns[TokenType.EqualTo] = @"(==|is(?!\w)|eq(?!\w))";
  248. patterns[TokenType.EqualToOrAssign] = @"(=|to(?!\w))";
  249. patterns[TokenType.NotEqualTo] = @"(\!=|neq(?!\w))";
  250. patterns[TokenType.GreaterThanOrEqualTo] = @"(\>=|gte(?!\w))";
  251. patterns[TokenType.GreaterThan] = @"(\>|gt(?!\w))";
  252. patterns[TokenType.LessThanOrEqualTo] = @"(\<=|lte(?!\w))";
  253. patterns[TokenType.LessThan] = @"(\<|lt(?!\w))";
  254. patterns[TokenType.AddAssign] = @"\+=";
  255. patterns[TokenType.MinusAssign] = @"\-=";
  256. patterns[TokenType.MultiplyAssign] = @"\*=";
  257. patterns[TokenType.DivideAssign] = @"\/=";
  258. patterns[TokenType.Add] = @"\+";
  259. patterns[TokenType.Minus] = @"\-";
  260. patterns[TokenType.Multiply] = @"\*";
  261. patterns[TokenType.Divide] = @"\/";
  262. patterns[TokenType.Modulo] = @"\%";
  263. patterns[TokenType.And] = @"(\&\&|and(?!\w))";
  264. patterns[TokenType.Or] = @"(\|\||or(?!\w))";
  265. patterns[TokenType.Xor] = @"(\^|xor(?!\w))";
  266. patterns[TokenType.Not] = @"(\!|not(?!\w))";
  267. patterns[TokenType.Variable] = @"\$([A-Za-z0-9_\.])+";
  268. patterns[TokenType.Comma] = @",";
  269. patterns[TokenType.True] = @"true(?!\w)";
  270. patterns[TokenType.False] = @"false(?!\w)";
  271. patterns[TokenType.Null] = @"null(?!\w)";
  272. patterns[TokenType.BeginCommand] = @"\<\<";
  273. patterns[TokenType.EndCommand] = @"\>\>";
  274. patterns[TokenType.OptionStart] = @"\[\[";
  275. patterns[TokenType.OptionEnd] = @"\]\]";
  276. patterns[TokenType.OptionDelimit] = @"\|";
  277. patterns[TokenType.Identifier] = @"[a-zA-Z0-9_:\.]+";
  278. patterns[TokenType.If] = @"if(?!\w)";
  279. patterns[TokenType.Else] = @"else(?!\w)";
  280. patterns[TokenType.ElseIf] = @"elseif(?!\w)";
  281. patterns[TokenType.EndIf] = @"endif(?!\w)";
  282. patterns[TokenType.Set] = @"set(?!\w)";
  283. patterns[TokenType.ShortcutOption] = @"\-\>";
  284. states = new Dictionary<string, LexerState> ();
  285. states ["base"] = new LexerState (patterns);
  286. states ["base"].AddTransition(TokenType.BeginCommand, "command", delimitsText:true);
  287. states ["base"].AddTransition(TokenType.OptionStart, "link", delimitsText:true);
  288. states ["base"].AddTransition(TokenType.ShortcutOption, "shortcut-option");
  289. states ["base"].AddTransition (TokenType.TagMarker, "tag", delimitsText: true);
  290. states ["base"].AddTextRule (TokenType.Text);
  291. states ["tag"] = new LexerState (patterns);
  292. states ["tag"].AddTransition (TokenType.Identifier, "base");
  293. states ["shortcut-option"] = new LexerState (patterns);
  294. states ["shortcut-option"].setTrackNextIndentation = true;
  295. states ["shortcut-option"].AddTransition (TokenType.BeginCommand, "expression", delimitsText: true);
  296. states ["shortcut-option"].AddTransition (TokenType.TagMarker, "shortcut-option-tag", delimitsText: true);
  297. states ["shortcut-option"].AddTextRule (TokenType.Text, "base");
  298. states ["shortcut-option-tag"] = new LexerState (patterns);
  299. states ["shortcut-option-tag"].AddTransition (TokenType.Identifier, "shortcut-option");
  300. states ["command"] = new LexerState (patterns);
  301. states ["command"].AddTransition (TokenType.If, "expression");
  302. states ["command"].AddTransition (TokenType.Else);
  303. states ["command"].AddTransition (TokenType.ElseIf, "expression");
  304. states ["command"].AddTransition (TokenType.EndIf);
  305. states ["command"].AddTransition (TokenType.Set, "assignment");
  306. states ["command"].AddTransition (TokenType.EndCommand, "base", delimitsText: true);
  307. states ["command"].AddTransition (TokenType.Identifier, "command-or-expression");
  308. states ["command"].AddTextRule (TokenType.Text);
  309. states ["command-or-expression"] = new LexerState (patterns);
  310. states ["command-or-expression"].AddTransition (TokenType.LeftParen, "expression");
  311. states ["command-or-expression"].AddTransition (TokenType.EndCommand, "base", delimitsText:true);
  312. states ["command-or-expression"].AddTextRule (TokenType.Text);
  313. states ["assignment"] = new LexerState (patterns);
  314. states ["assignment"].AddTransition(TokenType.Variable);
  315. states ["assignment"].AddTransition(TokenType.EqualToOrAssign, "expression");
  316. states ["assignment"].AddTransition(TokenType.AddAssign, "expression");
  317. states ["assignment"].AddTransition(TokenType.MinusAssign, "expression");
  318. states ["assignment"].AddTransition(TokenType.MultiplyAssign, "expression");
  319. states ["assignment"].AddTransition(TokenType.DivideAssign, "expression");
  320. states ["expression"] = new LexerState (patterns);
  321. states ["expression"].AddTransition(TokenType.EndCommand, "base");
  322. states ["expression"].AddTransition(TokenType.Number);
  323. states ["expression"].AddTransition(TokenType.String);
  324. states ["expression"].AddTransition(TokenType.LeftParen);
  325. states ["expression"].AddTransition(TokenType.RightParen);
  326. states ["expression"].AddTransition(TokenType.EqualTo);
  327. states ["expression"].AddTransition(TokenType.EqualToOrAssign);
  328. states ["expression"].AddTransition(TokenType.NotEqualTo);
  329. states ["expression"].AddTransition(TokenType.GreaterThanOrEqualTo);
  330. states ["expression"].AddTransition(TokenType.GreaterThan);
  331. states ["expression"].AddTransition(TokenType.LessThanOrEqualTo);
  332. states ["expression"].AddTransition(TokenType.LessThan);
  333. states ["expression"].AddTransition(TokenType.Add);
  334. states ["expression"].AddTransition(TokenType.Minus);
  335. states ["expression"].AddTransition(TokenType.Multiply);
  336. states ["expression"].AddTransition(TokenType.Divide);
  337. states ["expression"].AddTransition (TokenType.Modulo);
  338. states ["expression"].AddTransition(TokenType.And);
  339. states ["expression"].AddTransition(TokenType.Or);
  340. states ["expression"].AddTransition(TokenType.Xor);
  341. states ["expression"].AddTransition(TokenType.Not);
  342. states ["expression"].AddTransition(TokenType.Variable);
  343. states ["expression"].AddTransition(TokenType.Comma);
  344. states ["expression"].AddTransition(TokenType.True);
  345. states ["expression"].AddTransition(TokenType.False);
  346. states ["expression"].AddTransition(TokenType.Null);
  347. states ["expression"].AddTransition(TokenType.Identifier);
  348. states ["link"] = new LexerState (patterns);
  349. states ["link"].AddTransition (TokenType.OptionEnd, "base", delimitsText:true);
  350. states ["link"].AddTransition (TokenType.OptionDelimit, "link-destination", delimitsText:true);
  351. states ["link"].AddTextRule (TokenType.Text);
  352. states ["link-destination"] = new LexerState (patterns);
  353. states ["link-destination"].AddTransition (TokenType.Identifier);
  354. states ["link-destination"].AddTransition (TokenType.OptionEnd, "base");
  355. defaultState = states ["base"];
  356. // Make all states aware of their names
  357. foreach (KeyValuePair<string, LexerState> entry in states) {
  358. entry.Value.name = entry.Key;
  359. }
  360. }
  361. public TokenList Tokenise (string title, string text)
  362. {
  363. // Do some initial setup
  364. indentationStack = new Stack<KeyValuePair<int,bool>> ();
  365. indentationStack.Push (new KeyValuePair<int, bool>(0, false));
  366. shouldTrackNextIndentation = false;
  367. var tokens = new TokenList();
  368. currentState = defaultState;
  369. // Parse each line
  370. var lines = new List<string>(text.Split ('\n'));
  371. // Add a blank line to ensure that we end with zero indentation
  372. lines.Add("");
  373. int lineNumber = 1;
  374. foreach (var line in lines) {
  375. tokens.AddRange (this.TokeniseLine (line, lineNumber));
  376. lineNumber++;
  377. }
  378. var endOfInput = new Token (TokenType.EndOfInput, currentState, lineNumber, 0);
  379. tokens.Add (endOfInput);
  380. return tokens;
  381. }
  382. TokenList TokeniseLine (string line, int lineNumber)
  383. {
  384. var lineTokens = new Stack<Token> ();
  385. // Replace tabs with four spaces
  386. line = line.Replace ("\t", " ");
  387. // Strip out \r's
  388. line = line.Replace("\r", "");
  389. // Record the indentation level if the previous state wants us to
  390. var thisIndentation = LineIndentation (line);
  391. var previousIndentation = indentationStack.Peek ();
  392. if (shouldTrackNextIndentation && thisIndentation > previousIndentation.Key) {
  393. // If we are more indented than before, emit an
  394. // indent token and record this new indent level
  395. indentationStack.Push (new KeyValuePair<int, bool>(thisIndentation, true));
  396. var indent = new Token (TokenType.Indent, currentState, lineNumber, previousIndentation.Key);
  397. indent.value = "".PadLeft (thisIndentation - previousIndentation.Key);
  398. shouldTrackNextIndentation = false;
  399. lineTokens.Push (indent);
  400. } else if (thisIndentation < previousIndentation.Key) {
  401. // If we are less indented, emit a dedent for every
  402. // indentation level that we passed on the way back to 0 that also
  403. // emitted an indentation token.
  404. // at the same time, remove those indent levels from the stack
  405. while (thisIndentation < indentationStack.Peek ().Key) {
  406. var topLevel = indentationStack.Pop ();
  407. if (topLevel.Value) {
  408. var dedent = new Token (TokenType.Dedent, currentState, lineNumber, 0);
  409. lineTokens.Push (dedent);
  410. }
  411. }
  412. }
  413. // Now that we're past any initial indentation, start
  414. // finding tokens.
  415. int columnNumber = thisIndentation;
  416. var whitespace = new Regex (@"\s*");
  417. while (columnNumber < line.Length) {
  418. // If we're about to hit a line comment, abort processing line
  419. // immediately
  420. if (line.Substring(columnNumber).StartsWith(LINE_COMMENT)) {
  421. break;
  422. }
  423. var matched = false;
  424. foreach (var rule in currentState.tokenRules) {
  425. var match = rule.regex.Match (line, columnNumber);
  426. if (match.Success == false || match.Length == 0)
  427. continue;
  428. string tokenText;
  429. if (rule.type == TokenType.Text) {
  430. // if this is text, then back up to the most recent text
  431. // delimiting token, and treat everything from there as
  432. // the text.
  433. // we do this because we don't want this:
  434. // <<flip Harley3 +1>>
  435. // to get matched as this:
  436. // BeginCommand Identifier("flip") Text("Harley3 +1") EndCommand
  437. // instead, we want to match it as this:
  438. // BeginCommand Text("flip Harley3 +1") EndCommand
  439. int textStartIndex = thisIndentation;
  440. if (lineTokens.Count > 0) {
  441. while (lineTokens.Peek().type == TokenType.Identifier) {
  442. lineTokens.Pop ();
  443. }
  444. var startDelimiterToken = lineTokens.Peek ();
  445. textStartIndex = startDelimiterToken.columnNumber;
  446. if (startDelimiterToken.type == TokenType.Indent)
  447. textStartIndex += startDelimiterToken.value.Length;
  448. if (startDelimiterToken.type == TokenType.Dedent)
  449. textStartIndex = thisIndentation;
  450. }
  451. columnNumber = textStartIndex;
  452. var textEndIndex = match.Index + match.Length;
  453. tokenText = line.Substring (textStartIndex, textEndIndex-textStartIndex);
  454. } else {
  455. tokenText = match.Value;
  456. }
  457. columnNumber += tokenText.Length;
  458. // If this was a string, lop off the quotes at the start and
  459. // end, and un-escape the quotes and slashes
  460. if (rule.type == TokenType.String) {
  461. tokenText = tokenText.Substring (1, tokenText.Length - 2);
  462. tokenText = tokenText.Replace (@"\\", @"\");
  463. tokenText = tokenText.Replace (@"\""", @"""");
  464. }
  465. var token = new Token (rule.type, currentState, lineNumber, columnNumber, tokenText);
  466. token.delimitsText = rule.delimitsText;
  467. lineTokens.Push (token);
  468. if (rule.entersState != null) {
  469. if (states.ContainsKey(rule.entersState) == false) {
  470. throw new TokeniserException (lineNumber, columnNumber, "Unknown tokeniser state " + rule.entersState);
  471. }
  472. EnterState (states [rule.entersState]);
  473. if (shouldTrackNextIndentation == true) {
  474. if (indentationStack.Peek().Key < thisIndentation) {
  475. indentationStack.Push (new KeyValuePair<int, bool>(thisIndentation, false));
  476. }
  477. }
  478. }
  479. matched = true;
  480. break;
  481. }
  482. if (matched == false) {
  483. throw TokeniserException.ExpectedTokensFromState (lineNumber, columnNumber, currentState);
  484. }
  485. // consume any lingering whitespace before the next token
  486. var lastWhitespace = whitespace.Match(line, columnNumber);
  487. if (lastWhitespace != null) {
  488. columnNumber += lastWhitespace.Length;
  489. }
  490. }
  491. var listToReturn = new TokenList (lineTokens.ToArray ());
  492. listToReturn.Reverse ();
  493. return listToReturn;
  494. }
  495. int LineIndentation(string line)
  496. {
  497. var initialIndentRegex = new Regex (@"^(\s*)");
  498. var match = initialIndentRegex.Match (line);
  499. if (match == null || match.Groups [0] == null) {
  500. return 0;
  501. }
  502. return match.Groups [0].Length;
  503. }
  504. void EnterState(LexerState state) {
  505. currentState = state;
  506. if (currentState.setTrackNextIndentation)
  507. shouldTrackNextIndentation = true;
  508. }
  509. }
  510. }