Games
/
DwarfCorp
의 미러 https://github.com/Blecki/dwarfcorp.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
							/*

The MIT License (MIT)

Copyright (c) 2015-2017 Secret Lab Pty. Ltd. and Yarn Spinner contributors.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

*/

using System;
using System.Text.RegularExpressions;
using System.Collections.Generic;

namespace Yarn {

    internal class TokeniserException : InvalidOperationException  {

        public int lineNumber;
        public int columnNumber;

        public TokeniserException (string message) : base (message) {}
        public TokeniserException (int lineNumber, int columnNumber, string message)
            : base(string.Format ("{0}:{1}: {2}", lineNumber, columnNumber, message))
        {
            this.lineNumber = lineNumber;
            this.columnNumber = columnNumber;
        }


        public static TokeniserException ExpectedTokensFromState (int lineNumber, int columnNumber, Lexer.LexerState state) {

            var names = new List<string> ();
            foreach (var tokenRule in state.tokenRules) {
                names.Add (tokenRule.type.ToString ());
            }

            string nameList;
            if (names.Count > 1) {
                nameList = String.Join (", ", names.ToArray (), 0, names.Count - 1);
                nameList += ", or " + names [names.Count - 1];
            } else {
                nameList = names [0];
            }

            var message = string.Format ("Expected {0}", nameList);

            return new TokeniserException (lineNumber, columnNumber, message);
        }
    }

    // save some typing, we deal with lists of tokens a LOT
    internal class TokenList : List<Token> {
        // quick constructor to make it easier to create
        // TokenLists with a list of tokens
        public TokenList (params Token[] tokens) : base()
        {
            AddRange(tokens);
        }

    }

    internal enum TokenType {


        // Special tokens
        Whitespace,
        Indent,
        Dedent,
        EndOfLine,
        EndOfInput,

        // Numbers. Everybody loves a number
        Number,

        // Strings. Everybody also loves a string
        String,

        // '#'
        TagMarker,

        // Command syntax ("<<foo>>")
        BeginCommand,
        EndCommand,

        // Variables ("$foo")
        Variable,

        // Shortcut syntax ("->")
        ShortcutOption,

        // Option syntax ("[[Let's go here|Destination]]")
        OptionStart, // [[
        OptionDelimit, // |
        OptionEnd, // ]]

        // Command types (specially recognised command word)
        If,
        ElseIf,
        Else,
        EndIf,
        Set,

        // Boolean values
        True,
        False,

        // The null value
        Null,

        // Parentheses
        LeftParen,
        RightParen,

        // Parameter delimiters
        Comma,

        // Operators
        EqualTo, // ==, eq, is
        GreaterThan, // >, gt
        GreaterThanOrEqualTo, // >=, gte
        LessThan, // <, lt
        LessThanOrEqualTo, // <=, lte
        NotEqualTo, // !=, neq

        // Logical operators
        Or, // ||, or
        And, // &&, and
        Xor, // ^, xor
        Not, // !, not

        // this guy's special because '=' can mean either 'equal to'
        // or 'becomes' depending on context
        EqualToOrAssign, // =, to

        UnaryMinus, // -; this is differentiated from Minus
                    // when parsing expressions

        Add, // +
        Minus, // -
        Multiply, // *
        Divide, // /
        Modulo, // %

        AddAssign, // +=
        MinusAssign, // -=
        MultiplyAssign, // *=
        DivideAssign, // /=

        Comment, // a run of text that we ignore

        Identifier, // a single word (used for functions)

        Text // a run of text until we hit other syntax
    }

    // A parsed token.
    internal class Token {

        // The token itself
        public TokenType type;
        public string value; // optional

        // Where we found this token
        public int lineNumber;
        public int columnNumber;
        public string context;

        public bool delimitsText = false;

        // If this is a function in an expression, this is the number
        // of parameters that were encountered
        public int parameterCount;

        // The state that the lexer was in when this token was emitted
        public string lexerState;

        public Token(TokenType type, Lexer.LexerState lexerState, int lineNumber = -1, int columnNumber = -1, string value=null) {
            this.type = type;
            this.value = value;
            this.lineNumber = lineNumber;
            this.columnNumber = columnNumber;
            this.lexerState = lexerState.name;
        }

        public override string ToString() {
            if (this.value != null) {
                return string.Format("{0} ({1}) at {2}:{3} (state: {4})", type.ToString(), value.ToString(), lineNumber, columnNumber, lexerState);
            } else {
                return string.Format ("{0} at {1}:{2} (state: {3})", type, lineNumber, columnNumber, lexerState);
            }
        }
    }

    internal class Lexer {

        internal class LexerState {

            public string name;

            private Dictionary<TokenType, string> patterns;

            public LexerState (Dictionary<TokenType, string> patterns)
            {
                this.patterns = patterns;
            }

            public List<TokenRule> tokenRules = new List<TokenRule>();

            public TokenRule AddTransition(TokenType type, string entersState = null, bool delimitsText = false) {

                var pattern = string.Format (@"\G{0}", patterns [type]);

                var rule = new TokenRule (type, new Regex(pattern), entersState, delimitsText);

                tokenRules.Add(rule);

                return rule;
            }

            // A "text" rule matches everything that it possibly can, up to ANY of
            // the rules that already exist.
            public TokenRule AddTextRule (TokenType type, string entersState = null)
            {
                if (containsTextRule) {
                    throw new InvalidOperationException ("State already contains a text rule");
                }

                var delimiterRules = new List<string>();

                foreach (var otherRule in tokenRules) {
                    if (otherRule.delimitsText == true)
                        delimiterRules.Add (string.Format ("({0})", otherRule.regex.ToString().Substring(2)));
                }

                // Create a regex that matches all text up to but not including
                // any of the delimiter rules
                var pattern = string.Format (@"\G((?!{0}).)*",
                    string.Join ("|", delimiterRules.ToArray()));

                var rule = AddTransition(type, entersState);

                rule.regex = new Regex (pattern);
                rule.isTextRule = true;

                return rule;
            }

            public bool containsTextRule {
                get {
                    foreach (var rule in tokenRules) {
                        if (rule.isTextRule)
                            return true;
                    }
                    return false;
                }
            }

            public bool setTrackNextIndentation = false;


        }
        internal class TokenRule {
            public Regex regex = null;

            // set to null if it should stay in the same state
            public string entersState;
            public TokenType type;
            public bool isTextRule = false;
            public bool delimitsText = false;

            public TokenRule (TokenType type, Regex regex, string entersState = null, bool delimitsText = false)
            {
                this.regex = regex;
                this.entersState = entersState;
                this.type = type;
                this.delimitsText = delimitsText;
            }

            public override string ToString ()
            {
                return string.Format (string.Format ("[TokenRule: {0} - {1}]", type, this.regex));
            }

        }

        // Single-line comments. If this is encountered at any point, the rest of the line is skipped.
        const string LINE_COMMENT = "//";

        Dictionary<string, LexerState> states;

        LexerState defaultState;
        LexerState currentState;

        // tracks indentation levels, and whether an
        // indent token was emitted for each level
        Stack<KeyValuePair<int,bool>> indentationStack;
        bool shouldTrackNextIndentation;

        public Lexer ()
        {
            CreateStates ();
        }

        void CreateStates ()
        {

            var patterns = new Dictionary<TokenType, string> ();

            patterns[TokenType.Text] = ".*";

            patterns[TokenType.Number] = @"\-?[0-9]+(\.[0-9+])?";
            patterns[TokenType.String] = @"""([^""\\]*(?:\\.[^""\\]*)*)""";
            patterns[TokenType.TagMarker] = @"\#";
            patterns[TokenType.LeftParen] = @"\(";
            patterns[TokenType.RightParen] = @"\)";
            patterns[TokenType.EqualTo] = @"(==|is(?!\w)|eq(?!\w))";
            patterns[TokenType.EqualToOrAssign] = @"(=|to(?!\w))";
            patterns[TokenType.NotEqualTo] = @"(\!=|neq(?!\w))";
            patterns[TokenType.GreaterThanOrEqualTo] = @"(\>=|gte(?!\w))";
            patterns[TokenType.GreaterThan] = @"(\>|gt(?!\w))";
            patterns[TokenType.LessThanOrEqualTo] = @"(\<=|lte(?!\w))";
            patterns[TokenType.LessThan] = @"(\<|lt(?!\w))";
            patterns[TokenType.AddAssign] = @"\+=";
            patterns[TokenType.MinusAssign] = @"\-=";
            patterns[TokenType.MultiplyAssign] = @"\*=";
            patterns[TokenType.DivideAssign] = @"\/=";
            patterns[TokenType.Add] = @"\+";
            patterns[TokenType.Minus] = @"\-";
            patterns[TokenType.Multiply] = @"\*";
            patterns[TokenType.Divide] = @"\/";
            patterns[TokenType.Modulo] = @"\%";
            patterns[TokenType.And] = @"(\&\&|and(?!\w))";
            patterns[TokenType.Or] = @"(\|\||or(?!\w))";
            patterns[TokenType.Xor] = @"(\^|xor(?!\w))";
            patterns[TokenType.Not] = @"(\!|not(?!\w))";
            patterns[TokenType.Variable] = @"\$([A-Za-z0-9_\.])+";
            patterns[TokenType.Comma] = @",";
            patterns[TokenType.True] = @"true(?!\w)";
            patterns[TokenType.False] = @"false(?!\w)";
            patterns[TokenType.Null] = @"null(?!\w)";

            patterns[TokenType.BeginCommand] = @"\<\<";
            patterns[TokenType.EndCommand] = @"\>\>";

            patterns[TokenType.OptionStart] = @"\[\[";
            patterns[TokenType.OptionEnd] = @"\]\]";
            patterns[TokenType.OptionDelimit] = @"\|";

            patterns[TokenType.Identifier] = @"[a-zA-Z0-9_:\.]+";

            patterns[TokenType.If] = @"if(?!\w)";
            patterns[TokenType.Else] = @"else(?!\w)";
            patterns[TokenType.ElseIf] = @"elseif(?!\w)";
            patterns[TokenType.EndIf] = @"endif(?!\w)";
            patterns[TokenType.Set] = @"set(?!\w)";

            patterns[TokenType.ShortcutOption] = @"\-\>";

            states = new Dictionary<string, LexerState> ();

            states ["base"] = new LexerState (patterns);
            states ["base"].AddTransition(TokenType.BeginCommand, "command", delimitsText:true);
            states ["base"].AddTransition(TokenType.OptionStart, "link", delimitsText:true);
            states ["base"].AddTransition(TokenType.ShortcutOption, "shortcut-option");
            states ["base"].AddTransition (TokenType.TagMarker, "tag", delimitsText: true);
            states ["base"].AddTextRule (TokenType.Text);

            states ["tag"] = new LexerState (patterns);
            states ["tag"].AddTransition (TokenType.Identifier, "base");

            states ["shortcut-option"] = new LexerState (patterns);
            states ["shortcut-option"].setTrackNextIndentation = true;
            states ["shortcut-option"].AddTransition (TokenType.BeginCommand, "expression", delimitsText: true);
            states ["shortcut-option"].AddTransition (TokenType.TagMarker, "shortcut-option-tag", delimitsText: true);
            states ["shortcut-option"].AddTextRule (TokenType.Text, "base");

            states ["shortcut-option-tag"] = new LexerState (patterns);
            states ["shortcut-option-tag"].AddTransition (TokenType.Identifier, "shortcut-option");

            states ["command"] = new LexerState (patterns);
            states ["command"].AddTransition (TokenType.If, "expression");
            states ["command"].AddTransition (TokenType.Else);
            states ["command"].AddTransition (TokenType.ElseIf, "expression");
            states ["command"].AddTransition (TokenType.EndIf);
            states ["command"].AddTransition (TokenType.Set, "assignment");
            states ["command"].AddTransition (TokenType.EndCommand,  "base", delimitsText: true);
            states ["command"].AddTransition (TokenType.Identifier, "command-or-expression");
            states ["command"].AddTextRule (TokenType.Text);

            states ["command-or-expression"] = new LexerState (patterns);
            states ["command-or-expression"].AddTransition (TokenType.LeftParen, "expression");
            states ["command-or-expression"].AddTransition (TokenType.EndCommand, "base", delimitsText:true);
            states ["command-or-expression"].AddTextRule (TokenType.Text);

            states ["assignment"] = new LexerState (patterns);
            states ["assignment"].AddTransition(TokenType.Variable);
            states ["assignment"].AddTransition(TokenType.EqualToOrAssign, "expression");
            states ["assignment"].AddTransition(TokenType.AddAssign, "expression");
            states ["assignment"].AddTransition(TokenType.MinusAssign, "expression");
            states ["assignment"].AddTransition(TokenType.MultiplyAssign, "expression");
            states ["assignment"].AddTransition(TokenType.DivideAssign, "expression");

            states ["expression"] = new LexerState (patterns);
            states ["expression"].AddTransition(TokenType.EndCommand, "base");
            states ["expression"].AddTransition(TokenType.Number);
            states ["expression"].AddTransition(TokenType.String);
            states ["expression"].AddTransition(TokenType.LeftParen);
            states ["expression"].AddTransition(TokenType.RightParen);
            states ["expression"].AddTransition(TokenType.EqualTo);
            states ["expression"].AddTransition(TokenType.EqualToOrAssign);
            states ["expression"].AddTransition(TokenType.NotEqualTo);
            states ["expression"].AddTransition(TokenType.GreaterThanOrEqualTo);
            states ["expression"].AddTransition(TokenType.GreaterThan);
            states ["expression"].AddTransition(TokenType.LessThanOrEqualTo);
            states ["expression"].AddTransition(TokenType.LessThan);
            states ["expression"].AddTransition(TokenType.Add);
            states ["expression"].AddTransition(TokenType.Minus);
            states ["expression"].AddTransition(TokenType.Multiply);
            states ["expression"].AddTransition(TokenType.Divide);
            states ["expression"].AddTransition (TokenType.Modulo);
            states ["expression"].AddTransition(TokenType.And);
            states ["expression"].AddTransition(TokenType.Or);
            states ["expression"].AddTransition(TokenType.Xor);
            states ["expression"].AddTransition(TokenType.Not);
            states ["expression"].AddTransition(TokenType.Variable);
            states ["expression"].AddTransition(TokenType.Comma);
            states ["expression"].AddTransition(TokenType.True);
            states ["expression"].AddTransition(TokenType.False);
            states ["expression"].AddTransition(TokenType.Null);
            states ["expression"].AddTransition(TokenType.Identifier);

            states ["link"] = new LexerState (patterns);
            states ["link"].AddTransition (TokenType.OptionEnd, "base", delimitsText:true);
            states ["link"].AddTransition (TokenType.OptionDelimit, "link-destination", delimitsText:true);
            states ["link"].AddTextRule (TokenType.Text);

            states ["link-destination"] = new LexerState (patterns);
            states ["link-destination"].AddTransition (TokenType.Identifier);
            states ["link-destination"].AddTransition (TokenType.OptionEnd, "base");

            defaultState = states ["base"];

            // Make all states aware of their names
            foreach (KeyValuePair<string, LexerState> entry in states) {
                entry.Value.name = entry.Key;
            }
        }

        public TokenList Tokenise (string title, string text)
        {

            // Do some initial setup
            indentationStack = new Stack<KeyValuePair<int,bool>> ();
            indentationStack.Push (new KeyValuePair<int, bool>(0, false));
            shouldTrackNextIndentation = false;

            var tokens = new TokenList();

            currentState = defaultState;

            // Parse each line
            var lines = new List<string>(text.Split ('\n'));
            // Add a blank line to ensure that we end with zero indentation
            lines.Add("");

            int lineNumber = 1;

            foreach (var line in lines) {
                tokens.AddRange (this.TokeniseLine (line, lineNumber));
                lineNumber++;
            }

            var endOfInput = new Token (TokenType.EndOfInput, currentState, lineNumber, 0);
            tokens.Add (endOfInput);

            return tokens;
        }

        TokenList TokeniseLine (string line, int lineNumber)
        {
            var lineTokens = new Stack<Token> ();

            // Replace tabs with four spaces
            line = line.Replace ("\t", "    ");

            // Strip out \r's
            line = line.Replace("\r", "");

            // Record the indentation level if the previous state wants us to

            var thisIndentation = LineIndentation (line);
            var previousIndentation = indentationStack.Peek ();

            if (shouldTrackNextIndentation && thisIndentation > previousIndentation.Key) {
                // If we are more indented than before, emit an
                // indent token and record this new indent level
                indentationStack.Push (new KeyValuePair<int, bool>(thisIndentation, true));

                var indent = new Token (TokenType.Indent, currentState, lineNumber, previousIndentation.Key);
                indent.value = "".PadLeft (thisIndentation - previousIndentation.Key);

                shouldTrackNextIndentation = false;

                lineTokens.Push (indent);

            } else if (thisIndentation < previousIndentation.Key) {

                // If we are less indented, emit a dedent for every
                // indentation level that we passed on the way back to 0 that also
                // emitted an indentation token.
                // at the same time, remove those indent levels from the stack

                while (thisIndentation < indentationStack.Peek ().Key) {

                    var topLevel = indentationStack.Pop ();

                    if (topLevel.Value) {
                        var dedent = new Token (TokenType.Dedent, currentState, lineNumber, 0);
                        lineTokens.Push (dedent);
                    }

                }
            }

            // Now that we're past any initial indentation, start
            // finding tokens.
            int columnNumber = thisIndentation;

            var whitespace = new Regex (@"\s*");

            while (columnNumber < line.Length) {

                // If we're about to hit a line comment, abort processing line
                // immediately
                if (line.Substring(columnNumber).StartsWith(LINE_COMMENT)) {
                    break;
                }

                var matched = false;

                foreach (var rule in currentState.tokenRules) {

                    var match = rule.regex.Match (line, columnNumber);

                    if (match.Success == false || match.Length == 0)
                        continue;

                    string tokenText;

                    if (rule.type == TokenType.Text) {
                        // if this is text, then back up to the most recent text
                        // delimiting token, and treat everything from there as
                        // the text.
                        // we do this because we don't want this:
                        //    <<flip Harley3 +1>>
                        // to get matched as this:
                        //    BeginCommand Identifier("flip") Text("Harley3 +1") EndCommand
                        // instead, we want to match it as this:
                        //    BeginCommand Text("flip Harley3 +1") EndCommand

                        int textStartIndex = thisIndentation;

                        if (lineTokens.Count > 0) {
                            while (lineTokens.Peek().type == TokenType.Identifier) {
                                lineTokens.Pop ();
                            }

                            var startDelimiterToken = lineTokens.Peek ();
                            textStartIndex = startDelimiterToken.columnNumber;
                            if (startDelimiterToken.type == TokenType.Indent)
                                textStartIndex += startDelimiterToken.value.Length;
                            if (startDelimiterToken.type == TokenType.Dedent)
                                textStartIndex = thisIndentation;
                        }

                        columnNumber = textStartIndex;

                        var textEndIndex = match.Index + match.Length;

                        tokenText = line.Substring (textStartIndex, textEndIndex-textStartIndex);

                    } else {
                        tokenText = match.Value;
                    }

                    columnNumber += tokenText.Length;

                    // If this was a string, lop off the quotes at the start and
                    // end, and un-escape the quotes and slashes
                    if (rule.type == TokenType.String) {
                        tokenText = tokenText.Substring (1, tokenText.Length - 2);

                        tokenText = tokenText.Replace (@"\\", @"\");
                        tokenText = tokenText.Replace (@"\""", @"""");
                    }

                    var token = new Token (rule.type, currentState, lineNumber, columnNumber, tokenText);

                    token.delimitsText = rule.delimitsText;

                    lineTokens.Push (token);

                    if (rule.entersState != null) {
                        if (states.ContainsKey(rule.entersState) == false) {
                            throw new TokeniserException (lineNumber, columnNumber, "Unknown tokeniser state " + rule.entersState);
                        }

                        EnterState (states [rule.entersState]);

                        if (shouldTrackNextIndentation == true) {
                            if (indentationStack.Peek().Key < thisIndentation) {
                                indentationStack.Push (new KeyValuePair<int, bool>(thisIndentation, false));
                            }

                        }
                    }

                    matched = true;

                    break;
                }

                if (matched == false) {

                    throw TokeniserException.ExpectedTokensFromState (lineNumber, columnNumber, currentState);
                }

                // consume any lingering whitespace before the next token
                var lastWhitespace = whitespace.Match(line, columnNumber);
                if (lastWhitespace != null) {
                    columnNumber += lastWhitespace.Length;
                }

            }

            var listToReturn = new TokenList (lineTokens.ToArray ());
            listToReturn.Reverse ();

            return listToReturn;

        }

        int LineIndentation(string line)
        {
            var initialIndentRegex = new Regex (@"^(\s*)");
            var match = initialIndentRegex.Match (line);

            if (match == null || match.Groups [0] == null) {
                return 0;
            }

            return match.Groups [0].Length;
        }

        void EnterState(LexerState state) {
            currentState = state;

            if (currentState.setTrackNextIndentation)
                shouldTrackNextIndentation = true;
        }

    }
}