Browse Source

Added support for string interpolation. Added support for escape sequences in strings. Fixed several issues in the Pratt parser. Introduced some new assert statements. Raised version to 0.2.6.

Marco Bambini 8 years ago
parent
commit
072006b0c0

+ 19 - 8
src/compiler/gravity_ast.c

@@ -264,11 +264,12 @@ bool gnode_is_equal (gnode_t *node1, gnode_t *node2) {
 		gnode_literal_expr_t *e1 = (gnode_literal_expr_t *)node1;
 		gnode_literal_expr_t *e2 = (gnode_literal_expr_t *)node2;
 		if (e1->type != e2->type) return false;
-		// LITERAL_STRING, LITERAL_FLOAT, LITERAL_INT, LITERAL_BOOL
+		// LITERAL_STRING, LITERAL_FLOAT, LITERAL_INT, LITERAL_BOOL, LITERAL_STRING_INTERPOLATED
 		if (e1->type == LITERAL_BOOL) return (e1->value.n64 == e2->value.n64);
 		if (e1->type == LITERAL_INT) return (e1->value.n64 == e2->value.n64);
 		if (e1->type == LITERAL_FLOAT) return (e1->value.d == e2->value.d);
 		if (e1->type == LITERAL_STRING) return (strcmp(e1->value.str, e2->value.str)==0);
+		// there is no way to check node equality for a LITERAL_STRING_INTERPOLATED at compile time
 	}
 	return false;
 }
@@ -298,7 +299,7 @@ bool gnode_is_literal_string (gnode_t *node) {
 bool gnode_is_literal_number (gnode_t *node) {
 	if (gnode_is_literal(node) == false) return false;
 	gnode_literal_expr_t *_node = (gnode_literal_expr_t *)node;
-	return (_node->type != LITERAL_STRING);
+	return (_node->type != LITERAL_STRING && _node->type != LITERAL_STRING_INTERPOLATED);
 }
 
 gnode_t *gnode_binary_expr_create (gtoken_t op, gnode_t *left, gnode_t *right) {
@@ -343,6 +344,7 @@ gnode_t *gnode_identifier_expr_create (gtoken_s token, const char *identifier, c
 
 void gnode_literal_dump (gnode_literal_expr_t *node, char *buffer, int buffersize) {
 	switch (node->type) {
+		case LITERAL_STRING_INTERPOLATED: snprintf(buffer, buffersize, "INTERPOLATED: %d", (uint32_t)gnode_array_size(node->value.r)); break;
 		case LITERAL_STRING: snprintf(buffer, buffersize, "STRING: %.*s", node->len, node->value.str); break;
 		case LITERAL_FLOAT: snprintf(buffer, buffersize, "FLOAT: %.2f", node->value.d); break;
 		case LITERAL_INT: snprintf(buffer, buffersize, "INT: %" PRId64, (int64_t)node->value.n64); break;
@@ -363,22 +365,27 @@ static gnode_t *gnode_literal_value_expr_create (gtoken_s token, gliteral_t type
 		case LITERAL_FLOAT: node->value.d = d; node->len = (d < FLT_MAX) ? 32 : 64; break;
 		case LITERAL_INT: node->value.n64 = n64; node->len = (n64 < 2147483647) ? 32 : 64; break;
 		case LITERAL_BOOL: node->value.n64 = n64; node->len = 32; break;
+		case LITERAL_STRING_INTERPOLATED: break;
 		default: assert(0); // should never reach this point
 	}
 	
 	return (gnode_t *)node;
 }
 
-gnode_t *gnode_literal_string_expr_create (gtoken_s token, const char *s, uint32_t len) {
-	gnode_literal_expr_t *node = (gnode_literal_expr_t *)gnode_literal_value_expr_create(token, LITERAL_STRING, NULL, 0, 0);
+gnode_t *gnode_string_interpolation_create (gtoken_s token, gnode_r *r) {
+	gnode_literal_expr_t *node = (gnode_literal_expr_t *)gnode_literal_value_expr_create(token, LITERAL_STRING_INTERPOLATED, NULL, 0, 0);
+	node->value.r = r;
+	return (gnode_t *)node;
+}
 	
-	node->len = len;
-	node->value.str = (char *)mem_alloc(len+1);
+gnode_t *gnode_literal_string_expr_create (gtoken_s token, char *s, uint32_t len, bool allocated) {
+	gnode_literal_expr_t *node = (gnode_literal_expr_t *)gnode_literal_value_expr_create(token, LITERAL_STRING, NULL, 0, 0);
 	
-	if (token.escaped) {
-		node->value.str = string_unescape(s, &len, node->value.str);
 		node->len = len;
+	if (allocated) {
+		node->value.str = s;
 	} else {
+		node->value.str = (char *)mem_alloc(len+1);
 		memcpy((void *)node->value.str, (const void *)s, len);
 	}
 	
@@ -658,6 +665,10 @@ static void free_literal_expr (gvisitor_t *self, gnode_literal_expr_t *node) {
 	#pragma unused(self)
 	CHECK_REFCOUNT(node);
 	if (node->type == LITERAL_STRING) mem_free((void *)node->value.str);
+	else if (node->type == LITERAL_STRING_INTERPOLATED) {
+		gnode_array_each(node->value.r, {visit(val);})
+		gnode_array_free(node->value.r);
+	}
 	mem_free((void *)node);
 }
 

+ 5 - 3
src/compiler/gravity_ast.h

@@ -198,12 +198,13 @@ typedef struct {
 
 typedef struct {
 	gnode_t				base;				// LITERAL
-	gliteral_t			type;				// LITERAL_STRING, LITERAL_FLOAT, LITERAL_INT, LITERAL_BOOL
+	gliteral_t			type;				// LITERAL_STRING, LITERAL_FLOAT, LITERAL_INT, LITERAL_BOOL, LITERAL_INTERPOLATION
 	uint32_t			len;				// used only for TYPE_STRING
 	union {
 		char			*str;				// LITERAL_STRING
 		double			d;					// LITERAL_FLOAT
 		int64_t			n64;				// LITERAL_INT or LITERAL_BOOL
+		gnode_r			*r;					// LITERAL_STRING_INTERPOLATED
 	} value;
 } gnode_literal_expr_t;
 
@@ -239,7 +240,7 @@ typedef struct {
 
 typedef struct {
 	gnode_t				base;				// LIST_EXPR
-	bool				ismap;				// flag to check if the node represents a map (otehrwise it is a list)
+	bool				ismap;				// flag to check if the node represents a map (otherwise it is a list)
 	gnode_r				*list1;				// node items (cannot use a symtable here because order is mandatory in array)
 	gnode_r				*list2;				// used only in case of map
 } gnode_list_expr_t;
@@ -263,7 +264,8 @@ gnode_t *gnode_binary_expr_create (gtoken_t op, gnode_t *left, gnode_t *right);
 gnode_t *gnode_unary_expr_create (gtoken_t op, gnode_t *expr);
 gnode_t *gnode_file_expr_create (gtoken_s token, cstring_r *list);
 gnode_t *gnode_identifier_expr_create (gtoken_s token, const char *identifier, const char *identifier2);
-gnode_t *gnode_literal_string_expr_create (gtoken_s token, const char *s, uint32_t len);
+gnode_t *gnode_string_interpolation_create (gtoken_s token, gnode_r *r);
+gnode_t *gnode_literal_string_expr_create (gtoken_s token, char *s, uint32_t len, bool allocated);
 gnode_t *gnode_literal_float_expr_create (gtoken_s token, double f);
 gnode_t *gnode_literal_int_expr_create (gtoken_s token, int64_t n);
 gnode_t *gnode_literal_bool_expr_create (gtoken_s token, int32_t n);

+ 52 - 3
src/compiler/gravity_codegen.c

@@ -58,7 +58,6 @@ typedef struct codegen_t codegen_t;
 #endif
 
 // MARK: -
-
 static void report_error (gvisitor_t *self, gnode_t *node, const char *format, ...) {
 	// increment internal error counter
 	++self->nerr;
@@ -128,9 +127,10 @@ static opcode_t token2opcode (gtoken_t op) {
 		
 		default: assert(0); break;  // should never reach this point
 	}
-
+	
+	// should never reach this point
 	assert(0);
-	return NOT; // huehue, geddit?
+	return NOT;
 }
 
 #if 0
@@ -472,6 +472,7 @@ static void visit_loop_for_stmt (gvisitor_t *self, gnode_loop_stmt_t *node) {
 	
 	uint32_t $expr = ircode_register_push_temp(code);			// ++TEMP => 1
 	uint32_t $value = ircode_register_push_temp(code);			// ++TEMP => 2
+	
 	// $expr and $value are temporary registers that must not be cleared by ircode_register_clear_temps
 	// in visit_compound_statement, so mark them to skip clear
 	ircode_register_set_skip_clear(code, $expr);
@@ -637,6 +638,8 @@ static void store_declaration (gvisitor_t *self, gravity_object_t *obj, bool is_
 		
 		// if it is a function then generate a CLOSURE opcode instead of LOADK
 		if (OBJECT_ISA_FUNCTION(obj)) {
+			assert(node);
+			
 			gravity_function_t *f = (gravity_function_t *)obj;
 			uint32_t regnum = ircode_register_push_temp(code);
 			ircode_add(code, CLOSURE, regnum, index, 0);
@@ -1106,8 +1109,10 @@ static void visit_binary_expr (gvisitor_t *self, gnode_binary_expr_t *node) {
 	// assignment is right associative
 	if (node->op == TOK_OP_ASSIGN) {
 		CODEGEN_COUNT_REGISTERS(n1);
+		
 		visit(node->right);
 		visit(node->left);	// left expression can be: IDENTIFIER, FILE, POSTIFIX (not a call)
+		
 		CODEGEN_COUNT_REGISTERS(n2);
 		CODEGEN_ASSERT_REGISTERS(n1, n2, 0);
 		return;
@@ -1412,6 +1417,50 @@ static void visit_literal_expr (gvisitor_t *self, gnode_literal_expr_t *node) {
 			DEBUG_CODEGEN("visit_literal_expr (bool) %lld", node->value.n64);
 		} break;
 			
+		case LITERAL_STRING_INTERPOLATED: {
+			// codegen for string interpolation is like a list.join()
+			
+			gnode_list_expr_t *list = (gnode_list_expr_t *)gnode_list_expr_create(node->base.token, node->value.r, NULL, false);
+			visit((gnode_t *)list);
+			
+			// list
+			uint32_t listreg = ircode_register_last(code);
+			
+			// LOADK
+			uint16_t index = gravity_function_cpool_add(GET_VM(), context_function, VALUE_FROM_CSTRING(NULL, "join"));
+			ircode_add_constant(code, index);
+			uint32_t temp1 = ircode_register_last(code);
+			
+			// LOAD
+			ircode_add(code, LOAD, temp1, listreg, temp1);
+			
+			// temp1+1 register used for parameter passing
+			uint32_t temp2 = ircode_register_push_temp(code);
+			
+			// MOVE
+			ircode_add(code, MOVE, temp2, listreg, 0);
+			
+			// CALL
+			ircode_add(code, CALL, listreg, temp1, 1);
+			
+			// cleanup
+			mem_free(list);
+			ircode_register_pop(code);	// temp2
+			ircode_register_pop(code);	// temp1
+			
+			/*
+			 
+			 00012	LOADK 6 4
+			 00013	LOAD 6 4 6
+			 00014	MOVE 7 6
+			 00015	MOVE 8 4
+			 00016	CALL 6 7 1
+			 
+			 */
+			
+			break;
+		}
+			
 		default: assert(0);
 	}
 	

+ 0 - 3
src/compiler/gravity_lexer.c

@@ -60,7 +60,6 @@ typedef enum {
 #define INC_TOKUTF8LEN			++lexer->token.length
 #define INC_TOKLEN				INC_TOKBYTES; INC_TOKUTF8LEN
 #define DEC_TOKLEN				--lexer->token.bytes; --lexer->token.length
-#define SET_TOKESCAPED(value)	lexer->token.escaped = value
 #define SET_TOKTYPE(t)			lexer->token.type = t
 
 #define LEXER_CALL_CALLBACK()	if ((lexer->peeking == false) && (lexer->delegate) && (lexer->delegate->parser_callback)) {	\
@@ -340,7 +339,6 @@ static gtoken_t lexer_scan_string(gravity_lexer_t *lexer) {
 	// no memory allocation here
 	c = NEXT;					// save escaped character
 	TOKEN_RESET;				// save offset
-	SET_TOKESCAPED(false);		// set escaped flag to false
 	
 	while ((c2 = (unsigned char)PEEK_CURRENT) != c) {
 		if (IS_EOF) {return lexer_error(lexer, "Unexpected EOF inside a string literal");}
@@ -348,7 +346,6 @@ static gtoken_t lexer_scan_string(gravity_lexer_t *lexer) {
 		
 		// handle escaped characters
 		if (c2 == '\\') {
-			SET_TOKESCAPED(true);
 			INC_OFFSET_POSITION;
 			INC_OFFSET_POSITION;
 			INC_TOKLEN;

+ 0 - 1
src/compiler/gravity_lexer.h

@@ -53,7 +53,6 @@ void				gravity_lexer_token_dump (gtoken_s token);
 void				gravity_lexer_skip_line (gravity_lexer_t *lexer);
 uint32_t			gravity_lexer_lineno (gravity_lexer_t *lexer);
 
-
 #if GRAVITY_LEXER_DEGUB
 void				gravity_lexer_debug (gravity_lexer_t *lexer);
 #endif

+ 130 - 23
src/compiler/gravity_parser.c

@@ -578,6 +578,118 @@ report_node:
 	return NULL;
 }
 
+static gnode_t *parse_analyze_literal_string (gravity_parser_t *parser, gtoken_s token, const char *s, uint32_t len) {
+	// used in string interpolation
+	gnode_r *r = NULL;
+	
+	// analyze s (of length len) for escaped characters or for interpolations
+	char *buffer = mem_alloc(len+1);
+	uint32_t length = 0;
+	
+	for (uint32_t i=0; i<len;) {
+		int c = s[i];
+		if (c == '\\') {
+			// handle escape sequence here
+			if (i+1 >= len) {REPORT_ERROR(token, "Unexpected EOF inside a string literal"); goto return_string;}
+			switch (s[i+1]) {
+				case '\'': c = '\''; ++i; break;
+				case '"':  c = '"'; ++i; break;
+				case '\\': c = '\\'; ++i; break;
+				case 'a': c = '\a'; ++i; break;
+				case 'b': c = '\b'; ++i; break;
+				case 'f': c = '\f'; ++i; break;
+				case 'n': c = '\n'; ++i; break;
+				case 'r': c = '\r'; ++i; break;
+				case 't': c = '\t'; ++i; break;
+				case 'v': c = '\v'; ++i; break;
+				case 'x': {
+					// double hex digits sequence
+					// \XFF
+					if (i+1+2 >= len) {REPORT_ERROR(token, "Unexpected EOF inside a string literal"); goto return_string;}
+					// setup a static buffer assuming the next two characters are hex
+					char b[3] = {s[i+2], s[i+3], 0};
+					// convert from base 16 to base 10 (FF is at maximum 255)
+					c = (int)strtoul(b, NULL, 16);
+					buffer[length] = c;
+					// i+2 is until \x plus 2 hex characters
+					i+=2+2; ++length;
+					continue;
+				}
+				case 'u':  {
+					// 4 digits unicode sequence
+					// \uXXXX
+					if (i+1+4 >= len) {REPORT_ERROR(token, "Unexpected EOF inside a string literal"); goto return_string;}
+					// setup a static buffer assuming the next four characters are hex
+					char b[5] = {s[i+2], s[i+3], s[i+4], s[i+5], 0};
+					// convert from base 16 to base 10 (FFFF is at maximum 65535)
+					uint32_t n = (uint32_t)strtoul(b, NULL, 16);
+					length += utf8_encode(&buffer[length], n);
+					i+=2+4;
+					continue;
+				}
+				case 'U':  {
+					// 8 digits unicode sequence
+					// \uXXXXXXXX
+					if (i+1+8 >= len) {REPORT_ERROR(token, "Unexpected EOF inside a string literal"); goto return_string;}
+					// setup a static buffer assuming the next height characters are hex
+					char b[9] = {s[i+2], s[i+3], s[i+4], s[i+5], s[i+6], s[i+7], s[i+8], s[i+9], 0};
+					// convert from base 16 to base 10 (FFFF is at maximum 4294967295)
+					uint32_t n = (uint32_t)strtoul(b, NULL, 16);
+					length += utf8_encode(&buffer[length], n);
+					i+=2+8;
+					continue;
+				}
+				case '(': {
+					// string interpolation case
+					i+=2; // skip \ and (
+					uint32_t j=i;
+					bool subfound = false;
+					while (i<len) {
+						if (s[i] == ')') subfound = true;
+						++i;
+						if (subfound) break;
+					}
+					if (!subfound) {REPORT_ERROR(token, "Malformed interpolation string not closed by )"); goto return_string;}
+					
+					uint32_t sublen = i - j;
+					
+					// create a new temp lexer
+					gravity_lexer_t	*sublexer = gravity_lexer_create(&s[j], sublen, 0, true);
+					marray_push(gravity_lexer_t*, *parser->lexer, sublexer);
+					
+					// parse interpolated expression
+					gnode_t *subnode = parse_expression(parser);
+					if (!subnode) goto return_string;
+					
+					// add expression to r
+					if (!r) r = gnode_array_create();
+					if (length) gnode_array_push(r, gnode_literal_string_expr_create(token, buffer, length, true));
+					gnode_array_push(r, subnode);
+					
+					// free temp lexer
+					marray_pop(*parser->lexer);
+					gravity_lexer_free(sublexer);
+					
+					buffer = mem_alloc(len+1);
+					length = 0;
+					
+					continue;
+				}
+				default:
+					// ignore unknown sequence
+					break;
+			}
+			
+		}
+		buffer[length] = c;
+		++i; ++length;
+	}
+	
+return_string:
+	// return a node (even in case of error) so its memory will be automatically freed
+	return (r) ? gnode_string_interpolation_create(token, r) : gnode_literal_string_expr_create(token, buffer, length, true);
+}
+
 static gnode_t *parse_literal_expression (gravity_parser_t *parser) {
 	DEBUG_PARSER("parse_literal_expression");
 	DECLARE_LEXER;
@@ -589,7 +701,10 @@ static gnode_t *parse_literal_expression (gravity_parser_t *parser) {
 		uint32_t len = 0;
 		const char *value = token_string(token, &len);
 		DEBUG_PARSER("STRING: %.*s", len, value);
-		return gnode_literal_string_expr_create(token, value, len);
+		// run string analyzer because string is returned as is from the lexer
+		// but string can contains escaping sequences and interpolations that
+		// need to be processed
+		return parse_analyze_literal_string(parser, token, value, len);
 	}
 	
 	if (type == TOK_KEY_TRUE || type == TOK_KEY_FALSE) {
@@ -743,32 +858,37 @@ static gnode_t *parse_precedence(gravity_parser_t *parser, prec_level precedence
 	DEBUG_PARSER("parse_precedence (level %d)", precedence);
 	DECLARE_LEXER;
 	
-	// peek next
+	// peek next and check for EOF
 	gtoken_t type = gravity_lexer_peek(lexer);
 	if (type == TOK_EOF) return NULL;
 	
 	parse_func prefix = rules[type].prefix;
 	if (prefix == NULL) {
-		// gravity_lexer_token reports the latest succesfully scanned token but since we need to report
-		// an error for a peeked token then we force reporting "next" token
-		REPORT_ERROR(gravity_lexer_token_next(lexer), "Expected expression but found %s.", token_name(type));
+		// we need to consume next token because error was triggered in peek
+		gravity_lexer_next(lexer);
+		REPORT_ERROR(gravity_lexer_token(lexer), "Expected expression but found %s.", token_name(type));
 		return NULL;
 	}
+	
+	// execute prefix callback
 	gnode_t *node = prefix(parser);
 	
+	// peek next and check for EOF
 	gtoken_t peek = gravity_lexer_peek(lexer);
-	if (peek == TOK_EOF) return NULL;
+	if (peek == TOK_EOF) return node;
 	
 	while (precedence < rules[peek].precedence) {
 		gtoken_t tok = gravity_lexer_next(lexer);
 		grammar_rule *rule = &rules[tok];
 		
+		// execute infix callback
 		parser->current_token = tok;
 		parser->current_node = node;
 		node = rule->infix(parser);
 		
+		// peek next and check for EOF
 		peek = gravity_lexer_peek(lexer);
-		if (peek == TOK_EOF) return NULL;
+		if (peek == TOK_EOF) break;
 	}
 	
 	return node;
@@ -776,22 +896,7 @@ static gnode_t *parse_precedence(gravity_parser_t *parser, prec_level precedence
 
 static gnode_t *parse_expression (gravity_parser_t *parser) {
 	DEBUG_PARSER("parse_expression");
-	DECLARE_LEXER;
-	
-	// parse_expression is the default case called when no other case in parse_statament can be resolved
-	// due to some syntax errors an infinte loop condition can be verified
-	gtoken_s tok1 = gravity_lexer_token(lexer);
-	
-	gnode_t *expr = parse_precedence(parser, PREC_LOWEST);
-	
-	// expr is NULL means than an error condition has been encountered (and a potential infite loop)
-	if (expr == NULL) {
-		gtoken_s tok2 = gravity_lexer_token(lexer);
-		// if current token is equal to the token saved before the recursion than skip token in order to avoid infinite loop
-		if (token_identical(&tok1, &tok2)) gravity_lexer_next(lexer);
-	}
-	
-	return expr;
+	return parse_precedence(parser, PREC_LOWEST);
 }
 
 static gnode_t *parse_unary (gravity_parser_t *parser) {
@@ -1978,8 +2083,10 @@ static gnode_t *parse_declaration_statement (gravity_parser_t *parser) {
 static gnode_t *parse_import_statement (gravity_parser_t *parser) {
 	#pragma unused(parser)
 	DEBUG_PARSER("parse_import_statement");
+	DECLARE_LEXER;
 	
 	// import is a syntactic sugar for System.import
+	gravity_lexer_next(lexer);
 	return NULL;
 }
 

+ 6 - 0
src/compiler/gravity_semacheck2.c

@@ -1012,6 +1012,12 @@ static void visit_literal_expr (gvisitor_t *self, gnode_literal_expr_t *node) {
 	DEBUG_SEMANTIC("visit_literal_expr %s", value);
 	DEBUG_SEMANTIC("end visit_literal_expr");
 	#endif
+	
+	if (node->type == LITERAL_STRING_INTERPOLATED) {
+		gnode_array_each(node->value.r, {
+			visit(val);
+		});
+	}
 }
 
 static void visit_identifier_expr (gvisitor_t *self, gnode_identifier_expr_t *node) {

+ 3 - 7
src/compiler/gravity_token.c

@@ -199,6 +199,7 @@ const char *token_literal_name (gliteral_t value) {
 	else if (value == LITERAL_FLOAT) return "FLOAT";
 	else if (value == LITERAL_INT) return "INTEGER";
 	else if (value == LITERAL_BOOL) return "BOOLEAN";
+	else if (value == LITERAL_STRING_INTERPOLATED) return "STRING INTERPOLATED";
 	return "N/A";
 }
 
@@ -213,6 +214,8 @@ bool token_isvariable_declaration (gtoken_t token) {
 }
 
 bool token_isstatement (gtoken_t token) {
+	if (token == TOK_EOF) return false;
+	
 	// label_statement (case, default)
 	// expression_statement ('+' | '-' | '!' | 'not' | new | raise | file | isPrimaryExpression)
 	// flow_statement (if, select)
@@ -340,10 +343,3 @@ bool token_iserror (gtoken_t token) {
 bool token_iseof (gtoken_t token) {
 	return (token == TOK_EOF);
 }
-
-bool token_identical (gtoken_s *t1, gtoken_s *t2) {
-	// we can't use memcmp to compare structs for equality due to potential random padding characters between field in structs
-	return ((t1->type == t2->type) && (t1->lineno == t2->lineno) && (t1->colno == t2->colno) && (t1->position == t2->position) &&
-			(t1->bytes == t2->bytes) && (t1->length == t2->length) && (t1->fileid == t2->fileid) && (t1->escaped == t2->escaped) &&
-			(memcmp(t1->value, t2->value, t1->bytes) == 0));
-}

+ 3 - 5
src/compiler/gravity_token.h

@@ -90,7 +90,7 @@ typedef enum {
 } gtoken_t;
 
 typedef enum {
-	LITERAL_STRING, LITERAL_FLOAT, LITERAL_INT, LITERAL_BOOL
+	LITERAL_STRING, LITERAL_FLOAT, LITERAL_INT, LITERAL_BOOL, LITERAL_STRING_INTERPOLATED
 } gliteral_t;
 
 struct gtoken_s {
@@ -101,13 +101,12 @@ struct gtoken_s {
 	uint32_t			bytes;		// token length in bytes
 	uint32_t			length;		// token length (UTF-8)
 	uint32_t			fileid;		// token file id
-	bool				escaped;	// if true then string_unescape is called when token is finalized
 	const char			*value;		// token value (not null terminated)
 };
 typedef struct gtoken_s	gtoken_s;
 
-#define NO_TOKEN				(gtoken_s){0,0,0,0,0,0,0,0,NULL}
-#define UNDEF_TOKEN				(gtoken_s){TOK_KEY_UNDEFINED,0,0,0,0,0,0,0,NULL}
+#define NO_TOKEN				(gtoken_s){0,0,0,0,0,0,0,NULL}
+#define UNDEF_TOKEN				(gtoken_s){TOK_KEY_UNDEFINED,0,0,0,0,0,0,NULL}
 #define TOKEN_BYTES(_tok)		_tok.bytes
 #define TOKEN_VALUE(_tok)		_tok.value
 
@@ -139,6 +138,5 @@ bool			token_isaccess_specifier (gtoken_t token);
 bool			token_isstorage_specifier (gtoken_t token);
 bool			token_isprimary_expression (gtoken_t token);
 bool			token_isexpression_statement (gtoken_t token);
-bool			token_identical (gtoken_s *token1, gtoken_s *token2);
 
 #endif

+ 1 - 0
src/runtime/gravity_vm.c

@@ -1806,6 +1806,7 @@ gravity_closure_t *gravity_vm_loadbuffer (gravity_vm *vm, const char *buffer, si
 	
 abort_load:
 	report_runtime_error(vm, GRAVITY_ERROR_RUNTIME, "%s", "Unable to parse JSON executable file.");
+	marray_destroy(objects);
 	if (json) json_value_free(json);
 	gravity_gc_setenabled(vm, true);
 	return NULL;

+ 1 - 1
src/shared/gravity_value.c

@@ -640,7 +640,7 @@ abort_conversion:
 
 void gravity_function_dump (gravity_function_t *f, code_dump_function codef) {
 	printf("Function: %s\n", (f->identifier) ? f->identifier : "$anon");
-	printf("Params:%d Locals:%d Temp:%d Upvalues:%d\n", f->nparams, f->nlocals, f->ntemps, f->nupvalues);
+	printf("Params:%d Locals:%d Temp:%d Upvalues:%d Tag:%d xdata:%p\n", f->nparams, f->nlocals, f->ntemps, f->nupvalues, f->tag, f->xdata);
 	
 	if (f->tag == EXEC_TYPE_NATIVE) {
 		if (marray_size(f->cpool)) printf("======= CPOOL =======\n");

+ 2 - 2
src/shared/gravity_value.h

@@ -66,8 +66,8 @@
 extern "C" {
 #endif
 
-#define GRAVITY_VERSION						"0.2.5"
-#define GRAVITY_VERSION_NUMBER				0x000205
+#define GRAVITY_VERSION						"0.2.6"
+#define GRAVITY_VERSION_NUMBER				0x000206
 #define GRAVITY_BUILD_DATE					__DATE__
 
 #define GRAVITY_ENABLE_DOUBLE				1			// if 1 enable gravity_float_t to be a double (instead of a float)

+ 1 - 0
src/utils/gravity_json.c

@@ -130,6 +130,7 @@ static void json_write_raw (json_t *json, const char *buffer, size_t len, bool e
 }
 
 static void json_write_escaped (json_t *json, const char *buffer, size_t len, bool escape, bool is_pretty) {
+	if (!len) return;
 	char	*new_buffer = mem_alloc (len*2);
 	size_t	j = 0;
 	assert(new_buffer);

+ 52 - 28
src/utils/gravity_utils.c

@@ -335,34 +335,6 @@ const char *string_ndup (const char *s1, size_t n) {
 	return s;
 }
 
-char *string_unescape (const char *s1, uint32_t *s1len, char *buffer) {
-	uint32_t len = *s1len;
-	uint32_t orig_len = len;
-	
-	for (uint32_t i=0, j=0; i<orig_len; ++i, ++j) {
-		char c = s1[i];
-		if ((c == '\\') && (i+1<orig_len)) {
-			c = s1[i+1];
-			switch (c) {
-				case '"':
-				case '\\':
-				case '\b':
-				case '\f':
-				case '\n':
-				case '\r':
-				case '\t':
-					++i; --len; break;
-				default:
-					c = s1[i]; break;
-			}
-		}
-		buffer[j] = c;
-	}
-	
-	*s1len = len;
-	return buffer;
-}
-
 // From: http://stackoverflow.com/questions/198199/how-do-you-reverse-a-string-in-place-in-c-or-c
 void string_reverse (char *p) {
 	char *q = p;
@@ -406,6 +378,58 @@ inline uint32_t utf8_charbytes (const char *s, uint32_t i) {
 	return 0;
 }
 
+uint32_t utf8_nbytes (uint32_t n) {
+	if (n <= 0x7f) return 1;		// 127
+	if (n <= 0x7ff) return 2;		// 2047
+	if (n <= 0xffff) return 3;		// 65535
+	if (n <= 0x10ffff) return 4;	// 1114111
+
+	return 0;
+}
+
+// from: https://github.com/munificent/wren/blob/master/src/vm/wren_utils.c
+uint32_t utf8_encode(char *buffer, uint32_t value) {
+	char *bytes = buffer;
+	
+	if (value <= 0x7f) {
+		// single byte (i.e. fits in ASCII).
+		*bytes = value & 0x7f;
+		return 1;
+	}
+	
+	if (value <= 0x7ff) {
+		// two byte sequence: 110xxxxx 10xxxxxx.
+		*bytes = 0xc0 | ((value & 0x7c0) >> 6);
+		++bytes;
+		*bytes = 0x80 | (value & 0x3f);
+		return 2;
+	}
+	
+	if (value <= 0xffff) {
+		// three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx.
+		*bytes = 0xe0 | ((value & 0xf000) >> 12);
+		++bytes;
+		*bytes = 0x80 | ((value & 0xfc0) >> 6);
+		++bytes;
+		*bytes = 0x80 | (value & 0x3f);
+		return 3;
+	}
+	
+	if (value <= 0x10ffff) {
+		// four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
+		*bytes = 0xf0 | ((value & 0x1c0000) >> 18);
+		++bytes;
+		*bytes = 0x80 | ((value & 0x3f000) >> 12);
+		++bytes;
+		*bytes = 0x80 | ((value & 0xfc0) >> 6);
+		++bytes;
+		*bytes = 0x80 | (value & 0x3f);
+		return 4;
+	}
+	
+	return 0;
+}
+
 uint32_t utf8_len (const char *s, uint32_t nbytes) {
 	if (nbytes == 0) nbytes = (uint32_t)strlen(s);
 	

+ 2 - 1
src/utils/gravity_utils.h

@@ -48,12 +48,13 @@ int			string_casencmp (const char *s1, const char *s2, size_t n);
 int			string_cmp (const char *s1, const char *s2);
 const char	*string_dup (const char *s1);
 const char	*string_ndup (const char *s1, size_t n);
-char		*string_unescape (const char *s1, uint32_t *s1len, char *buffer);
 void		string_reverse (char *p);
 uint32_t	string_size (const char *p);
 
 // UTF-8
 uint32_t	utf8_charbytes (const char *s, uint32_t i);
+uint32_t	utf8_nbytes (uint32_t n);
+uint32_t	utf8_encode(char *buffer, uint32_t value);
 uint32_t	utf8_len (const char *s, uint32_t nbytes);
 void		utf8_reverse (char *p);
 

+ 24 - 0
test/string_escaped.gravity

@@ -0,0 +1,24 @@
+#unittest {
+	name: "String escaping cases.";
+	error: NONE;
+	result: true;
+};
+
+func main() {
+	// hex escape
+	var s1 = "Hello\x20World";
+	var s2 = "Hello World";
+	var b1 = (s1 == s2);
+	
+	// c-like escape
+	var s3 = "Hello\tWorld";
+	var s4 = "Hello	World";
+	var b2 = (s3 == s4);
+	
+	// UTF-8 4 characters escape
+	var s5 = "Hello \U0001F601 World";
+	var s6 = "Hello 😁 World";
+	var b3 = (s5 == s6);
+	
+	return (b1 && b2 && b3)
+}

+ 19 - 0
test/string_interpolation.gravity

@@ -0,0 +1,19 @@
+#unittest {
+	name: "String interpolation.";
+	error: NONE;
+	result: true;
+};
+
+func main() {
+	var a = 10;
+	var b = "World";
+	var c = false;
+	
+	// string interpolation
+	var s1 = "Hello \(b) is 5 == \(a)? \(c)";
+	
+	// static string
+	var s2 = "Hello World is 5 == 10? false";
+	
+	return s1 == s2;
+}