12 years ago · 8bb8a26cd8
--- a/SquiLu/samples/coroutines2.nut
+++ b/SquiLu/samples/coroutines2.nut
@@ -0,0 +1,60 @@
 
				+local function _tokenizer(str) {
			
 
				+	local yield2 = suspend;
			
 
				+	local i = 0
			
 
				+	local i1,i2
			
 
				+	local find = function(pat) {
			
 
				+		local result = [0,0];
			
 
				+		//local rc = str.find_lua(pat,result,i)
			
 
				+//print(str, pat, rc, result[0], result[1])
			
 
				+		if(str.find_lua(pat,result,i) >= 0) {
			
 
				+			i1 = result[0]
			
 
				+			i2 = result[1]
			
 
				+			return true;
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+	local token = function() {
			
 
				+//print(str, i, i1, i2)
			
 
				+		return str.slice(i,i2+1)
			
 
				+	}
			
 
				+	while (true) {
			
 
				+		if ( find("^%s+") ) {
			
 
				+			//-- ignore
			
 
				+		} else if( find( "^[%+%-]*%d+") ) {
			
 
				+			local ilast = i
			
 
				+			i = i2+1 //-- just after the sequence of digits
			
 
				+			//-- fractional part?
			
 
				+			local result = [0,0];
			
 
				+			if (str.find_lua("^%.%d+",result, i) >= 0) {
			
 
				+				i2 = result[1]
			
 
				+				i = i2+1
			
 
				+			}
			
 
				+			//-- exponent part?
			
 
				+			if (str.find_lua("^[eE][%+%-]*%d+", result, i) >= 0) {
			
 
				+				i2 = result[1]
			
 
				+			}
			
 
				+			i = ilast
			
 
				+			yield2(["number",str.slice(i,i2+1).tofloat()])
			
 
				+		} else if (find( "^[_%a][_%w]*")) {
			
 
				+			yield2(["iden",str.slice(i,i2+1)])
			
 
				+		} else if (find ([==[^"[^"]*"]==]) || find ("^'[^']*'") ) {
			
 
				+			//-- strip the quotes
			
 
				+			yield2(["string",str.slice(i+1,i2)])
			
 
				+		} else { //-- any other character
			
 
				+			if (i >= str.len()) return null;
			
 
				+			local ch = str[i].tochar()
			
 
				+			i2 = i
			
 
				+			yield2([ch,ch])
			
 
				+		}
			
 
				+		i = i2+1
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//for(var i=0; i < 10000; ++i) {
			
 
				+	local T = ::newthread(_tokenizer)
			
 
				+	var result = T.call([==[hello !-20.2e2 +10e1 "dolly"]==]);
			
 
				+	while( result ) {
			
 
				+		print(result[0], result[1]);
			
 
				+		result = T.wakeup()
			
 
				+	}
			
 
				+//}
			
--- a/SquiLu/samples/lexer.nut
+++ b/SquiLu/samples/lexer.nut
@@ -0,0 +1,250 @@
 
				+class Lexer {
			
 
				+	pos = 0;
			
 
				+	line = 0;
			
 
				+	buf = null;
			
 
				+	buflen = 0;
			
 
				+
			
 
				+	// Operator table, mapping operator -> token name
			
 
				+	static optable = {
			
 
				+		"+":  "PLUS",
			
 
				+		"-":  "MINUS",
			
 
				+		"*":  "MULTIPLY",
			
 
				+		".":  "PERIOD",
			
 
				+		"\\": "BACKSLASH",
			
 
				+		":":  "COLON",
			
 
				+		"%":  "PERCENT",
			
 
				+		"|":  "PIPE",
			
 
				+		"!":  "EXCLAMATION",
			
 
				+		"?":  "QUESTION",
			
 
				+		"#":  "POUND",
			
 
				+		"&":  "AMPERSAND",
			
 
				+		";":  "SEMI",
			
 
				+		",":  "COMMA",
			
 
				+		"(":  "L_PAREN",
			
 
				+		")":  "R_PAREN",
			
 
				+		"<":  "L_ANG",
			
 
				+		">":  "R_ANG",
			
 
				+		"{":  "L_BRACE",
			
 
				+		"}":  "R_BRACE",
			
 
				+		"[":  "L_BRACKET",
			
 
				+		"]":  "R_BRACKET",
			
 
				+		"=":  "EQUALS"
			
 
				+	};
			
 
				+
			
 
				+
			
 
				+    // Initialize the Lexer's buffer. This resets the lexer's internal
			
 
				+    // state and subsequent tokens will be returned starting with the
			
 
				+    // beginning of the new buffer.
			
 
				+    function input(buf)
			
 
				+    {
			
 
				+        this.pos = 0;
			
 
				+	this.line = 0;
			
 
				+        this.buf = buf;
			
 
				+        this.buflen = buf.len();
			
 
				+    }
			
 
				+
			
 
				+    // Get the next token from the current buffer. A token is an object with
			
 
				+    // the following properties:
			
 
				+    // - name: name of the pattern that this token matched (taken from rules).
			
 
				+    // - value: actual string value of the token.
			
 
				+    // - pos: offset in the current buffer where the token starts.
			
 
				+    //
			
 
				+    // If there are no more tokens in the buffer, returns null. In case of
			
 
				+    // an error throws Error.
			
 
				+    function token()
			
 
				+    {
			
 
				+        var _isnewline = function(c)
			
 
				+        {
			
 
				+            return c === '\r' || c === '\n';
			
 
				+        }
			
 
				+
			
 
				+        var _isdigit = function (c)
			
 
				+        {
			
 
				+            return c >= '0' && c <= '9';
			
 
				+        }
			
 
				+
			
 
				+        var _isalpha = function (c)
			
 
				+        {
			
 
				+            return (c >= 'a' && c <= 'z') ||
			
 
				+                   (c >= 'A' && c <= 'Z') ||
			
 
				+                   c === '_' || c === '$';
			
 
				+        }
			
 
				+
			
 
				+        var _isalphanum = function(c)
			
 
				+        {
			
 
				+            return (c >= 'a' && c <= 'z') ||
			
 
				+                   (c >= 'A' && c <= 'Z') ||
			
 
				+                   (c >= '0' && c <= '9') ||
			
 
				+                   c === '_' || c === '$';
			
 
				+        }
			
 
				+
			
 
				+        var _process_number = function()
			
 
				+        {
			
 
				+            var endpos = this.pos + 1;
			
 
				+            while (endpos < this.buflen &&
			
 
				+                    _isdigit(this.buf[endpos]))
			
 
				+            {
			
 
				+                endpos++;
			
 
				+            }
			
 
				+
			
 
				+            var tok = {
			
 
				+		name: "NUMBER",
			
 
				+		value:  this.buf.slice(this.pos, endpos),
			
 
				+		pos: this.pos, line: this.line
			
 
				+            };
			
 
				+            this.pos = endpos;
			
 
				+            return tok;
			
 
				+        }
			
 
				+
			
 
				+        var _process_comment = function ()
			
 
				+        {
			
 
				+            var endpos = this.pos + 2;
			
 
				+            // Skip until the end of the line
			
 
				+            var c = this.buf[this.pos + 2];
			
 
				+            while (endpos < this.buflen &&
			
 
				+                    !_isnewline(this.buf[endpos]))
			
 
				+            {
			
 
				+                endpos++;
			
 
				+            }
			
 
				+
			
 
				+            var tok = {
			
 
				+		name: "COMMENT",
			
 
				+		value: this.buf.slice(this.pos, endpos),
			
 
				+		pos: this.pos, line: this.line
			
 
				+            };
			
 
				+            this.pos = endpos + 1;
			
 
				+	    ++this.line;
			
 
				+            return tok;
			
 
				+        }
			
 
				+
			
 
				+        var _process_identifier = function()
			
 
				+        {
			
 
				+            var endpos = this.pos + 1;
			
 
				+            while (endpos < this.buflen &&
			
 
				+                    _isalphanum(this.buf[endpos]))
			
 
				+            {
			
 
				+                endpos++;
			
 
				+            }
			
 
				+
			
 
				+            var tok = {
			
 
				+		name: "IDENTIFIER",
			
 
				+		value: this.buf.slice(this.pos, endpos),
			
 
				+		pos:  this.pos, line: this.line
			
 
				+            };
			
 
				+            this.pos = endpos;
			
 
				+            return tok;
			
 
				+        }
			
 
				+
			
 
				+        var _process_quote = function(quote)
			
 
				+        {
			
 
				+            // this.pos points at the opening quote. Find the ending quote.
			
 
				+            var end_index = this.buf.find(quote, this.pos + 1);
			
 
				+	    while((this.buf[end_index-1] === '\\') && (this.buf[end_index-2] !== '\\')  ){
			
 
				+		end_index = this.buf.find(quote, end_index + 1);
			
 
				+	    }
			
 
				+
			
 
				+            if (end_index === -1)
			
 
				+            {
			
 
				+                throw ("Unterminated quote at " + this.pos);
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                var tok = {
			
 
				+			name: "QUOTE",
			
 
				+			value: this.buf.slice(this.pos, end_index + 1),
			
 
				+			pos: this.pos, line: this.line
			
 
				+                };
			
 
				+                this.pos = end_index + 1;
			
 
				+                return tok;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        var _skipnontokens = function()
			
 
				+        {
			
 
				+            while (this.pos < this.buflen)
			
 
				+            {
			
 
				+                var c = this.buf[this.pos];
			
 
				+                if (c == ' ' || c == '\t')
			
 
				+                {
			
 
				+                    ++this.pos;
			
 
				+                }
			
 
				+                else if (c == '\r' || c == '\n')
			
 
				+                {
			
 
				+                    ++this.pos;
			
 
				+		    ++this.line;
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        _skipnontokens();
			
 
				+        if (this.pos >= this.buflen)
			
 
				+        {
			
 
				+            return null;
			
 
				+        }
			
 
				+
			
 
				+        // The char at this.pos is part of a real token. Figure out which.
			
 
				+        var c = this.buf[this.pos];
			
 
				+
			
 
				+        // '/' is treated specially, because it starts a comment if followed by
			
 
				+        // another '/'. If not followed by another '/', it's the DIVIDE
			
 
				+        // operator.
			
 
				+        if (c === '/')
			
 
				+        {
			
 
				+            var next_c = this.buf[this.pos + 1];
			
 
				+            if (next_c === '/')
			
 
				+            {
			
 
				+                return _process_comment();
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                return {name: "DIVIDE", value: '/', pos: this.pos++, line: this.line};
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            // Look it up in the table of operators
			
 
				+            var op = this.optable.get(c.tochar(), false);
			
 
				+            if (op)
			
 
				+            {
			
 
				+                return {name: op, value: c, pos: this.pos++, line: this.line};
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                // Not an operator - so it's the beginning of another token.
			
 
				+                if (_isalpha(c))
			
 
				+                {
			
 
				+                    return _process_identifier();
			
 
				+                }
			
 
				+                else if (_isdigit(c))
			
 
				+                {
			
 
				+                    return _process_number();
			
 
				+                }
			
 
				+                else if (c === '"' || c === '\'')
			
 
				+                {
			
 
				+                    return _process_quote(c.tochar());
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    throw ("Token error at " + this.pos);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+var txt = "var lex = new Lexer(23, \"dad\");";
			
 
				+var lex = new Lexer();
			
 
				+var fd = file("lexer.nut", "r");
			
 
				+txt = fd.read(fd.len());
			
 
				+fd.close();
			
 
				+
			
 
				+lex.input(txt);
			
 
				+var tok =  lex.token();
			
 
				+while(tok) {
			
 
				+	print(tok.name, tok.value, tok.line+1, tok.pos);
			
 
				+	tok = lex.token();
			
 
				+}