Browse Source

Moved things from exp to lexer stage

Xanathar 10 years ago
parent
commit
3ff78656a8

+ 1 - 0
src/MoonSharp.Interpreter/MoonSharp.Interpreter.csproj

@@ -248,6 +248,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Tree\Expressions\ExprListExpression.cs" />
     <Compile Include="Tree\Expressions\ExprListExpression.cs" />
     <Compile Include="Tree\Expressions\IndexExpression.cs" />
     <Compile Include="Tree\Expressions\IndexExpression.cs" />
+    <Compile Include="Tree\Lexer\LexerUtils.cs" />
     <Compile Include="Tree\Statements\EmptyStatement.cs" />
     <Compile Include="Tree\Statements\EmptyStatement.cs" />
     <Compile Include="Tree\Statements\FunctionCallStatement.cs" />
     <Compile Include="Tree\Statements\FunctionCallStatement.cs" />
     <Compile Include="Tree\Statements\FunctionDefinitionStatement.cs" />
     <Compile Include="Tree\Statements\FunctionDefinitionStatement.cs" />

+ 2 - 194
src/MoonSharp.Interpreter/Tree/Expressions/LiteralExpression.cs

@@ -47,10 +47,10 @@ namespace MoonSharp.Interpreter.Tree.Expressions
 					TryParse(t.Text, s => ParseHexFloat(s));
 					TryParse(t.Text, s => ParseHexFloat(s));
 					break;
 					break;
 				case TokenType.String:
 				case TokenType.String:
-					m_Value = DynValue.NewString(NormalizeNormStr(t.Text, false)).AsReadOnly();
+					m_Value = DynValue.NewString(t.Text).AsReadOnly();
 					break;
 					break;
 				case TokenType.String_Long:
 				case TokenType.String_Long:
-					m_Value = DynValue.NewString(NormalizeLongStr(t.Text)).AsReadOnly();
+					m_Value = DynValue.NewString(t.Text).AsReadOnly();
 					break;
 					break;
 				case TokenType.True:
 				case TokenType.True:
 					m_Value = DynValue.True;
 					m_Value = DynValue.True;
@@ -69,198 +69,6 @@ namespace MoonSharp.Interpreter.Tree.Expressions
 				throw new SyntaxErrorException("unknown number format near '{0}'", t.Text);
 				throw new SyntaxErrorException("unknown number format near '{0}'", t.Text);
 		}
 		}
 
 
-		private string NormalizeNormStr(string str, bool cutPrefix)
-		{
-			if (cutPrefix) // ANTLR ONLY -- TO REMOVE
-				str = str.Substring(1, str.Length - 2); // removes "/'
-
-			if (!str.Contains('\\'))
-				return str;
-
-			StringBuilder sb = new StringBuilder();
-
-			bool escape = false;
-			bool hex = false;
-			int unicode_state = 0;
-			string hexprefix = "";
-			string val = "";
-			bool zmode = false;
-
-			foreach (char c in str)
-			{
-			redo:
-				if (escape)
-				{
-					if (val.Length == 0 && !hex && unicode_state == 0)
-					{
-						if (c == 'a') { sb.Append('\a'); escape = false; zmode = false; }
-						else if (c == '\r') { }  // this makes \\r\n -> \\n
-						else if (c == '\n') { sb.Append('\n'); escape = false; }  
-						else if (c == 'b') { sb.Append('\b'); escape = false; }
-						else if (c == 'f') { sb.Append('\f'); escape = false; }
-						else if (c == 'n') { sb.Append('\n'); escape = false; }
-						else if (c == 'r') { sb.Append('\r'); escape = false; }
-						else if (c == 't') { sb.Append('\t'); escape = false; }
-						else if (c == 'v') { sb.Append('\v'); escape = false; }
-						else if (c == '\\') { sb.Append('\\'); escape = false; zmode = false; }
-						else if (c == '"') { sb.Append('\"'); escape = false; zmode = false; }
-						else if (c == '\'') { sb.Append('\''); escape = false; zmode = false; }
-						else if (c == '[') { sb.Append('['); escape = false; zmode = false; }
-						else if (c == ']') { sb.Append(']'); escape = false; zmode = false; }
-						else if (c == 'x') { hex = true; }
-						else if (c == 'u') { unicode_state = 1; }
-						else if (c == 'z') { zmode = true; escape = false; }
-						else if (char.IsDigit(c)) { val = val + c; }
-						else throw new SyntaxErrorException("invalid escape sequence near '\\{0}'", c);
-					}
-					else
-					{
-						if (unicode_state == 1)
-						{
-							if (c != '{')
-								throw new SyntaxErrorException("'{' expected near '\\u'");
-
-							unicode_state = 2;
-						}
-						else if (unicode_state == 2)
-						{
-							if (c == '}')
-							{
-								int i = int.Parse(val, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
-								sb.Append(char.ConvertFromUtf32(i));
-								unicode_state = 0;
-								val = string.Empty;
-								escape = false;
-							}
-							else if (val.Length >= 8)
-							{
-								throw new SyntaxErrorException("'}' missing, or unicode code point too large after '\\u'");
-							}
-							else
-							{
-								val += c;
-							}
-						}
-						else if (hex)
-						{
-							if (IsHexDigit(c))
-							{
-								val += c;
-								if (val.Length == 2)
-								{
-									int i = int.Parse(val, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
-									sb.Append(char.ConvertFromUtf32(i));
-									zmode = false; 
-									escape = false;
-								}
-							}
-							else
-							{
-								throw new SyntaxErrorException("hexadecimal digit expected near '\\{0}{1}{2}'", hexprefix, val, c);
-							}
-						}
-						else if (val.Length > 0)
-						{
-							if (char.IsDigit(c))
-							{
-								val = val + c;
-							}
-
-							if (val.Length == 3 || !char.IsDigit(c))
-							{
-								int i = int.Parse(val, CultureInfo.InvariantCulture);
-
-								if (i > 255) 
-									throw new SyntaxErrorException("decimal escape too large near '\\{0}'", val);
-
-								sb.Append(char.ConvertFromUtf32(i));
-
-								zmode = false;
-								escape = false;
-
-								if (!char.IsDigit(c))
-									goto redo;
-							}
-						}
-					}
-				}
-				else
-				{
-					if (c == '\\')
-					{
-						escape = true;
-						hex = false;
-						val = "";
-					}
-					else
-					{
-						if (!zmode || !char.IsWhiteSpace(c))
-						{
-							sb.Append(c);
-							zmode = false;
-						}
-					}
-				}
-			}
-
-			if (escape && !hex && val.Length > 0)
-			{
-				int i = int.Parse(val, CultureInfo.InvariantCulture);
-				sb.Append(char.ConvertFromUtf32(i));
-				escape = false;
-			}
-
-			if (escape)
-			{
-				throw new SyntaxErrorException("unfinished string near '\"{0}\"'", sb.ToString());
-			}
-
-			return sb.ToString();
-		}
-
-		private bool IsHexDigit(char c)
-		{
-			return (char.IsDigit(c)) || ("AaBbCcDdEeFf".Contains(c));
-		}
-
-		private string NormalizeLongStr(string str)
-		{
-			if (str.StartsWith("\r\n"))
-				str = str.Substring(2);
-			else if (str.StartsWith("\n"))
-				str = str.Substring(1);
-
-			return str;
-		}
-
-		private string NormalizeLongStr_ANTLR(string str)
-		{
-			int lenOfPrefix = 0;
-			int squareBracketsFound = 0;
-			str = str.Trim();
-
-			for (int i = 0; i < str.Length; i++)
-			{
-				char c = str[i];
-				if (c == '[')
-					++squareBracketsFound;
-
-				++lenOfPrefix;
-
-				if (squareBracketsFound == 2)
-					break;
-			}
-
-			str = str.Substring(lenOfPrefix, str.Length - lenOfPrefix * 2);
-
-			if (str.StartsWith("\r\n"))
-				str = str.Substring(2);
-			else if (str.StartsWith("\n"))
-				str = str.Substring(1);
-
-			return str;
-		}
-
 		private void TryParse(string txt, Func<string, double> parser)
 		private void TryParse(string txt, Func<string, double> parser)
 		{
 		{
 			double val = parser(txt);
 			double val = parser(txt);

+ 3 - 10
src/MoonSharp.Interpreter/Tree/Lexer/Lexer.cs

@@ -289,7 +289,7 @@ namespace MoonSharp.Interpreter.Tree
 					for (int i = 0; i < end_pattern.Length; i++)
 					for (int i = 0; i < end_pattern.Length; i++)
 						CursorCharNext();
 						CursorCharNext();
 
 
-					return text.ToString();
+					return LexerUtils.AdjustLuaLongString(text.ToString());
 				}
 				}
 				else
 				else
 				{
 				{
@@ -346,7 +346,7 @@ namespace MoonSharp.Interpreter.Tree
 					dotAdded = true;
 					dotAdded = true;
 					text.Append(c);
 					text.Append(c);
 				}
 				}
-				else if (Char_IsHexDigit(c) && isHex && !exponentPart)
+				else if (LexerUtils.CharIsHexDigit(c) && isHex && !exponentPart)
 				{
 				{
 					text.Append(c);
 					text.Append(c);
 				}
 				}
@@ -373,13 +373,6 @@ namespace MoonSharp.Interpreter.Tree
 			return CreateToken(numberType, fromLine, fromCol, text.ToString());
 			return CreateToken(numberType, fromLine, fromCol, text.ToString());
 		}
 		}
 
 
-		private bool Char_IsHexDigit(char c)
-		{
-			return char.IsDigit(c) ||
-				c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' ||
-				c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F';
-		}
-
 		private Token CreateSingleCharToken(TokenType tokenType, int fromLine, int fromCol)
 		private Token CreateSingleCharToken(TokenType tokenType, int fromLine, int fromCol)
 		{
 		{
 			char c = CursorChar();
 			char c = CursorChar();
@@ -456,7 +449,7 @@ namespace MoonSharp.Interpreter.Tree
 				else if (c == separator)
 				else if (c == separator)
 				{
 				{
 					CursorCharNext();
 					CursorCharNext();
-					return CreateToken(TokenType.String, fromLine, fromCol, text.ToString());
+					return CreateToken(TokenType.String, fromLine, fromCol, LexerUtils.UnescapeLuaString(text.ToString()));
 				}
 				}
 				else
 				else
 				{
 				{

+ 174 - 0
src/MoonSharp.Interpreter/Tree/Lexer/LexerUtils.cs

@@ -0,0 +1,174 @@
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace MoonSharp.Interpreter.Tree
+{
+	internal static class LexerUtils
+	{
+		public static bool CharIsHexDigit(char c)
+		{
+			return char.IsDigit(c) ||
+				c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f' ||
+				c == 'A' || c == 'B' || c == 'C' || c == 'D' || c == 'E' || c == 'F';
+		}
+
+		public static string AdjustLuaLongString(string str)
+		{
+			if (str.StartsWith("\r\n"))
+				str = str.Substring(2);
+			else if (str.StartsWith("\n"))
+				str = str.Substring(1);
+
+			return str;
+		}
+
+		public static string UnescapeLuaString(string str)
+		{
+			if (!str.Contains('\\'))
+				return str;
+
+			StringBuilder sb = new StringBuilder();
+
+			bool escape = false;
+			bool hex = false;
+			int unicode_state = 0;
+			string hexprefix = "";
+			string val = "";
+			bool zmode = false;
+
+			foreach (char c in str)
+			{
+			redo:
+				if (escape)
+				{
+					if (val.Length == 0 && !hex && unicode_state == 0)
+					{
+						if (c == 'a') { sb.Append('\a'); escape = false; zmode = false; }
+						else if (c == '\r') { }  // this makes \\r\n -> \\n
+						else if (c == '\n') { sb.Append('\n'); escape = false; }
+						else if (c == 'b') { sb.Append('\b'); escape = false; }
+						else if (c == 'f') { sb.Append('\f'); escape = false; }
+						else if (c == 'n') { sb.Append('\n'); escape = false; }
+						else if (c == 'r') { sb.Append('\r'); escape = false; }
+						else if (c == 't') { sb.Append('\t'); escape = false; }
+						else if (c == 'v') { sb.Append('\v'); escape = false; }
+						else if (c == '\\') { sb.Append('\\'); escape = false; zmode = false; }
+						else if (c == '"') { sb.Append('\"'); escape = false; zmode = false; }
+						else if (c == '\'') { sb.Append('\''); escape = false; zmode = false; }
+						else if (c == '[') { sb.Append('['); escape = false; zmode = false; }
+						else if (c == ']') { sb.Append(']'); escape = false; zmode = false; }
+						else if (c == 'x') { hex = true; }
+						else if (c == 'u') { unicode_state = 1; }
+						else if (c == 'z') { zmode = true; escape = false; }
+						else if (char.IsDigit(c)) { val = val + c; }
+						else throw new SyntaxErrorException("invalid escape sequence near '\\{0}'", c);
+					}
+					else
+					{
+						if (unicode_state == 1)
+						{
+							if (c != '{')
+								throw new SyntaxErrorException("'{' expected near '\\u'");
+
+							unicode_state = 2;
+						}
+						else if (unicode_state == 2)
+						{
+							if (c == '}')
+							{
+								int i = int.Parse(val, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
+								sb.Append(char.ConvertFromUtf32(i));
+								unicode_state = 0;
+								val = string.Empty;
+								escape = false;
+							}
+							else if (val.Length >= 8)
+							{
+								throw new SyntaxErrorException("'}' missing, or unicode code point too large after '\\u'");
+							}
+							else
+							{
+								val += c;
+							}
+						}
+						else if (hex)
+						{
+							if (CharIsHexDigit(c))
+							{
+								val += c;
+								if (val.Length == 2)
+								{
+									int i = int.Parse(val, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
+									sb.Append(char.ConvertFromUtf32(i));
+									zmode = false;
+									escape = false;
+								}
+							}
+							else
+							{
+								throw new SyntaxErrorException("hexadecimal digit expected near '\\{0}{1}{2}'", hexprefix, val, c);
+							}
+						}
+						else if (val.Length > 0)
+						{
+							if (char.IsDigit(c))
+							{
+								val = val + c;
+							}
+
+							if (val.Length == 3 || !char.IsDigit(c))
+							{
+								int i = int.Parse(val, CultureInfo.InvariantCulture);
+
+								if (i > 255)
+									throw new SyntaxErrorException("decimal escape too large near '\\{0}'", val);
+
+								sb.Append(char.ConvertFromUtf32(i));
+
+								zmode = false;
+								escape = false;
+
+								if (!char.IsDigit(c))
+									goto redo;
+							}
+						}
+					}
+				}
+				else
+				{
+					if (c == '\\')
+					{
+						escape = true;
+						hex = false;
+						val = "";
+					}
+					else
+					{
+						if (!zmode || !char.IsWhiteSpace(c))
+						{
+							sb.Append(c);
+							zmode = false;
+						}
+					}
+				}
+			}
+
+			if (escape && !hex && val.Length > 0)
+			{
+				int i = int.Parse(val, CultureInfo.InvariantCulture);
+				sb.Append(char.ConvertFromUtf32(i));
+				escape = false;
+			}
+
+			if (escape)
+			{
+				throw new SyntaxErrorException("unfinished string near '\"{0}\"'", sb.ToString());
+			}
+
+			return sb.ToString();
+		}
+	}
+}