Browse Source

String support

gingerBill 9 years ago
parent
commit
2aaef48c5c
16 changed files with 738 additions and 247 deletions
  1. 8 0
      examples/basic.odin
  2. 57 0
      examples/test.ll
  3. 2 4
      examples/test.odin
  4. 3 2
      run.bat
  5. 17 20
      src/checker/checker.cpp
  6. 3 2
      src/checker/expr.cpp
  7. 10 12
      src/checker/stmt.cpp
  8. 4 4
      src/codegen/codegen.cpp
  9. 36 12
      src/codegen/print_llvm.cpp
  10. 229 94
      src/codegen/ssa.cpp
  11. 5 75
      src/common.cpp
  12. 39 1
      src/gb/gb.h
  13. 1 1
      src/main.cpp
  14. 1 4
      src/parser.cpp
  15. 262 0
      src/string.cpp
  16. 61 16
      src/tokenizer.cpp

+ 8 - 0
examples/basic.odin

@@ -0,0 +1,8 @@
+putchar :: proc(c : i32) -> i32 #foreign
+
+print_string :: proc(s : string) {
+	for i := 0; i < len(s); i++ {
+		c := cast(i32)s[i];
+		putchar(c);
+	}
+}

+ 57 - 0
examples/test.ll

@@ -0,0 +1,57 @@
+define void @main() {
+"entry - 0":
+	%0 = getelementptr inbounds [13 x i8], [13 x i8]* @.str0, i64 0, i64 0
+	%1 = alloca {i8*, i64}, align 8 
+	store {i8*, i64} zeroinitializer, {i8*, i64}* %1
+	%2 = getelementptr inbounds {i8*, i64}, {i8*, i64}* %1, i64 0, i32 0
+	%3 = getelementptr inbounds {i8*, i64}, {i8*, i64}* %1, i64 0, i32 1
+	store i8* %0, i8** %2
+	store i64 13, i64* %3
+	%4 = load {i8*, i64}, {i8*, i64}* %1
+	call void @print_string({i8*, i64} %4)
+	ret void
+}
+
+declare i32 @putchar(i32 %c) 
+define void @print_string({i8*, i64} %s) {
+"entry - 0":
+	%0 = alloca {i8*, i64}, align 8 ; s
+	store {i8*, i64} zeroinitializer, {i8*, i64}* %0
+	store {i8*, i64} %s, {i8*, i64}* %0
+	%1 = alloca i64, align 8 ; i
+	store i64 zeroinitializer, i64* %1
+	store i64 0, i64* %1
+	br label %"for.loop - 2"
+
+"for.body - 1":
+	%2 = alloca i32, align 4 ; c
+	store i32 zeroinitializer, i32* %2
+	%3 = load i64, i64* %1
+	%4 = getelementptr inbounds {i8*, i64}, {i8*, i64}* %0, i64 0, i32 0
+	%5 = load i8*, i8** %4
+	%6 = getelementptr i8, i8* %5, i64 %3
+	%7 = load i8, i8* %6
+	%8 = zext i8 %7 to i32
+	store i32 %8, i32* %2
+	%9 = load i32, i32* %2
+	%10 = call i32 @putchar(i32 %9)
+	br label %"for.post - 3"
+
+"for.loop - 2":
+	%11 = load i64, i64* %1
+	%12 = getelementptr inbounds {i8*, i64}, {i8*, i64}* %0, i64 0, i32 1
+	%13 = load i64, i64* %12
+	%14 = icmp slt i64 %11, %13
+	br i1 %14, label %"for.body - 1", label %"for.done - 4"
+
+"for.post - 3":
+	%15 = load i64, i64* %1
+	%16 = add i64 %15, 1
+	store i64 %16, i64* %1
+	br label %"for.loop - 2"
+
+"for.done - 4":
+	ret void
+}
+
[email protected] = global [13 x i8] c"Hello\2C\20\E4\B8\96\E7\95\8C"

+ 2 - 4
examples/test.odin

@@ -1,7 +1,5 @@
-add :: proc(x, y : int) -> (int, int) {
-	return x+y, 1;
-}
+import "basic"
 
 main :: proc() {
-
+	print_string("Hello, 世界");
 }

+ 3 - 2
run.bat

@@ -2,7 +2,8 @@
 
 
 rem del "..\examples\test.bc"
-call ..\bin\odin.exe ..\examples/test.odin && lli ..\examples/test.ll
-rem call opt -mem2reg ..\examples/test.ll > ..\examples/test.bc
+call ..\bin\odin.exe ..\examples/test.odin && ..\misc\llvm-bin\lli.exe ..\examples/test.ll
+call ..\misc\llvm-bin\opt.exe -mem2reg ..\examples/test.ll > ..\examples/test.bc
+call llc ..\examples/test.bc
 rem call llvm-dis ..\examples/test.bc -o ..\examples/test.ll
 rem call clang ..\examples/test.c -O0 -S -emit-llvm -o ..\examples/test-c.ll

+ 17 - 20
src/checker/checker.cpp

@@ -219,6 +219,23 @@ void destroy_scope(Scope *scope) {
 	// NOTE(bill): No need to free scope as it "should" be allocated in an arena (except for the global scope)
 }
 
+void add_scope(Checker *c, AstNode *node, Scope *scope) {
+	GB_ASSERT(node != NULL);
+	GB_ASSERT(scope != NULL);
+	map_set(&c->info.scopes, hash_pointer(node), scope);
+}
+
+
+void check_open_scope(Checker *c, AstNode *stmt) {
+	GB_ASSERT(is_ast_node_stmt(stmt) || stmt->kind == AstNode_ProcType);
+	Scope *scope = make_scope(c->context.scope, c->allocator);
+	add_scope(c, stmt, scope);
+	c->context.scope = scope;
+}
+
+void check_close_scope(Checker *c) {
+	c->context.scope = c->context.scope->parent;
+}
 
 void scope_lookup_parent_entity(Scope *s, String name, Scope **scope, Entity **entity) {
 	u64 key = hash_string(name);
@@ -359,7 +376,6 @@ void init_checker(Checker *c, Parser *parser) {
 	c->sizes.word_size = 8;
 	c->sizes.max_align = 8;
 
-
 	gb_array_init(c->procedure_stack, a);
 	gb_array_init(c->procedures, a);
 
@@ -487,25 +503,6 @@ void check_procedure_later(Checker *c, AstFile *file, Token token, DeclInfo *dec
 	gb_array_append(c->procedures, info);
 }
 
-
-
-void add_scope(Checker *c, AstNode *node, Scope *scope) {
-	GB_ASSERT(node != NULL);
-	GB_ASSERT(scope != NULL);
-	map_set(&c->info.scopes, hash_pointer(node), scope);
-}
-
-
-void check_open_scope(Checker *c, AstNode *statement) {
-	Scope *scope = make_scope(c->context.scope, c->allocator);
-	add_scope(c, statement, scope);
-	c->context.scope = scope;
-}
-
-void check_close_scope(Checker *c) {
-	c->context.scope = c->context.scope->parent;
-}
-
 void check_add_deferred_stmt(Checker *c, AstNode *stmt) {
 	GB_ASSERT(stmt != NULL);
 	GB_ASSERT(is_ast_node_stmt(stmt));

+ 3 - 2
src/checker/expr.cpp

@@ -954,6 +954,7 @@ b32 check_index_value(Checker *c, AstNode *index_value, i64 max_count, i64 *valu
 }
 
 Entity *lookup_field(Type *type, AstNode *field_node, isize *index = NULL) {
+	GB_ASSERT(type != NULL);
 	GB_ASSERT(field_node->kind == AstNode_Ident);
 	type = get_base_type(type);
 	if (type->kind == Type_Pointer)
@@ -1192,7 +1193,7 @@ b32 check_builtin_procedure(Checker *c, Operand *operand, AstNode *call, i32 id)
 				if (is_type_string(t)) {
 					if (operand->mode == Addressing_Constant) {
 						mode = Addressing_Constant;
-						value = make_exact_value_integer(operand->value.value_string.len);
+						value = make_exact_value_integer(operand->value.value_string);
 					} else {
 						mode = Addressing_Value;
 					}
@@ -1683,7 +1684,6 @@ ExpressionKind check__expr_base(Checker *c, Operand *o, AstNode *node, Type *typ
 				if (o->mode == Addressing_Constant) {
 					max_count = o->value.value_string.len;
 				}
-				o->mode = Addressing_Value;
 				o->type = t_u8;
 			}
 			break;
@@ -1743,6 +1743,7 @@ ExpressionKind check__expr_base(Checker *c, Operand *o, AstNode *node, Type *typ
 				if (o->mode == Addressing_Constant) {
 					max_count = o->value.value_string.len;
 				}
+				o->type = t_string;
 				o->mode = Addressing_Value;
 			}
 			break;

+ 10 - 12
src/checker/stmt.cpp

@@ -408,11 +408,15 @@ void check_proc_decl(Checker *c, Entity *e, DeclInfo *d, b32 check_body_later) {
 	e->type = proc_type;
 	ast_node(pd, ProcDecl, d->proc_decl);
 
-#if 1
 	Scope *original_curr_scope = c->context.scope;
 	c->context.scope = c->global_scope;
 	check_open_scope(c, pd->type);
-#endif
+	defer ({
+		check_close_scope(c);
+		c->context.scope = original_curr_scope;
+	});
+
+
 	check_procedure_type(c, proc_type, pd->type);
 	b32 is_foreign   = false;
 	b32 is_inline    = false;
@@ -455,11 +459,6 @@ void check_proc_decl(Checker *c, Entity *e, DeclInfo *d, b32 check_body_later) {
 		}
 	}
 
-#if 1
-	check_close_scope(c);
-	c->context.scope = original_curr_scope;
-#endif
-
 }
 
 void check_var_decl(Checker *c, Entity *e, Entity **entities, isize entity_count, AstNode *type_expr, AstNode *init_expr) {
@@ -554,8 +553,7 @@ void check_stmt(Checker *c, AstNode *node, u32 flags) {
 	case_end;
 
 	case_ast_node(ids, IncDecStmt, node);
-		Token op = {};
-		op = ids->op;
+		Token op = ids->op;
 		switch (ids->op.kind) {
 		case Token_Increment:
 			op.kind = Token_Add;
@@ -717,9 +715,9 @@ void check_stmt(Checker *c, AstNode *node, u32 flags) {
 			result_count = proc_type->procedure.results->tuple.variable_count;
 		if (result_count != rs->result_count) {
 			error(&c->error_collector, rs->token, "Expected %td return %s, got %td",
-			            result_count,
-			            (result_count != 1 ? "values" : "value"),
-			            rs->result_count);
+			      result_count,
+			      (result_count != 1 ? "values" : "value"),
+			      rs->result_count);
 		} else if (result_count > 0) {
 			auto *tuple = &proc_type->procedure.results->tuple;
 			check_init_variables(c, tuple->variables, tuple->variable_count,

+ 4 - 4
src/codegen/codegen.cpp

@@ -1,5 +1,5 @@
 #include "ssa.cpp"
-#include "print.cpp"
+#include "print_llvm.cpp"
 
 struct ssaGen {
 	ssaModule module;
@@ -7,14 +7,14 @@ struct ssaGen {
 };
 
 b32 ssa_gen_init(ssaGen *s, Checker *c) {
-	if (c->error_collector.count != 0)
+	if (c->error_collector.count > 0)
 		return false;
 
 	gb_for_array(i, c->parser->files) {
 		AstFile *f = &c->parser->files[i];
-		if (f->error_collector.count != 0)
+		if (f->error_collector.count > 0)
 			return false;
-		if (f->tokenizer.error_count != 0)
+		if (f->tokenizer.error_count > 0)
 			return false;
 	}
 

+ 36 - 12
src/codegen/print.cpp → src/codegen/print_llvm.cpp

@@ -1,15 +1,26 @@
+#define SSA_PRINT_TO_STDOUT 0
+
 void ssa_fprintf(gbFile *f, char *fmt, ...) {
 	va_list va;
 	va_start(va, fmt);
 	gb_fprintf_va(f, fmt, va);
-#if 1
+#if SSA_PRINT_TO_STDOUT
 	gb_printf_va(fmt, va);
 #endif
 	va_end(va);
 }
 
+void ssa_file_write(gbFile *f, void *data, isize len) {
+	gb_file_write(f, data, len);
+#if SSA_PRINT_TO_STDOUT
+	gb_file_write(gb_file_get_standard(gbFileStandard_Output), data, len);
+#endif
+}
 
 b32 ssa_valid_char(u8 c) {
+	if (c >= 0x80)
+		return false;
+
 	if (gb_char_is_alphanumeric(c))
 		return true;
 
@@ -55,7 +66,7 @@ void ssa_print_escape_string(gbFile *f, String name) {
 		}
 	}
 
-	gb_file_write(f, buf, buf_len);
+	ssa_file_write(f, buf, buf_len);
 }
 
 
@@ -92,8 +103,8 @@ void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) {
 		case Basic_f64:    ssa_fprintf(f, "double");                  break;
 		case Basic_rawptr: ssa_fprintf(f, "void*");                   break;
 		case Basic_string: ssa_fprintf(f, "{i8*, i%lld}", word_bits); break;
-		case Basic_int:    ssa_fprintf(f, "i%lld", word_bits);        break;
 		case Basic_uint:   ssa_fprintf(f, "i%lld", word_bits);        break;
+		case Basic_int:    ssa_fprintf(f, "i%lld", word_bits);        break;
 		}
 		break;
 	case Type_Array:
@@ -109,7 +120,9 @@ void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) {
 	case Type_Structure:
 		ssa_fprintf(f, "{");
 		for (isize i = 0; i < t->structure.field_count; i++) {
-			if (i > 0) ssa_fprintf(f, ", ");
+			if (i > 0) {
+				ssa_fprintf(f, ", ");
+			}
 			ssa_print_type(f, s, t->structure.fields[i]->type);
 		}
 		ssa_fprintf(f, "}");
@@ -137,13 +150,16 @@ void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) {
 		}
 		break;
 	case Type_Procedure:
-		if (t->procedure.result_count == 0)
+		if (t->procedure.result_count == 0) {
 			ssa_fprintf(f, "void");
-		else
+		} else {
 			ssa_print_type(f, s, t->procedure.results);
+		}
 		ssa_fprintf(f, " (");
 		for (isize i = 0; i < t->procedure.param_count; i++) {
-			if (i > 0) ssa_fprintf(f, ", ");
+			if (i > 0) {
+				ssa_fprintf(f, ", ");
+			}
 			ssa_print_type(f, s, &t->procedure.params[i]);
 		}
 		ssa_fprintf(f, ")*");
@@ -158,11 +174,7 @@ void ssa_print_exact_value(gbFile *f, ssaModule *m, ExactValue value, Type *type
 		break;
 	case ExactValue_String: {
 		ssa_fprintf(f, "c\"");
-		// TODO(bill): Make unquote string function
-		String unquoted = value.value_string;
-		unquoted.text++;
-		unquoted.len -= 2;
-		ssa_print_escape_string(f, unquoted);
+		ssa_print_escape_string(f, value.value_string);
 		ssa_fprintf(f, "\"");
 	} break;
 	case ExactValue_Integer:
@@ -334,6 +346,18 @@ void ssa_print_instr(gbFile *f, ssaModule *m, ssaValue *value) {
 
 	} break;
 
+	case ssaInstr_Conv: {
+		auto *c = &instr->conv;
+		ssa_fprintf(f, "%%%d = %.*s ", value->id, LIT(ssa_conv_strings[c->kind]));
+		ssa_print_type(f, m->sizes, c->from);
+		ssa_fprintf(f, " ");
+		ssa_print_value(f, m, c->value, c->from);
+		ssa_fprintf(f, " to ");
+		ssa_print_type(f, m->sizes, c->to);
+		ssa_fprintf(f, "\n");
+
+	} break;
+
 	case ssaInstr_Unreachable: {
 		ssa_fprintf(f, "unreachable\n");
 	} break;

+ 229 - 94
src/codegen/ssa.cpp

@@ -59,7 +59,7 @@ struct ssaProcedure {
 	SSA_INSTR_KIND(Store), \
 	SSA_INSTR_KIND(Load), \
 	SSA_INSTR_KIND(GetElementPtr), \
-	SSA_INSTR_KIND(Convert), \
+	SSA_INSTR_KIND(Conv), \
 	SSA_INSTR_KIND(Br), \
 	SSA_INSTR_KIND(Ret), \
 	SSA_INSTR_KIND(Unreachable), \
@@ -79,20 +79,31 @@ String const ssa_instr_strings[] = {
 #undef SSA_INSTR_KIND
 };
 
-enum ssaConversionKind {
-	ssaConversion_Invalid,
-
-	ssaConversion_ZExt,
-	ssaConversion_FPExt,
-	ssaConversion_FPToUI,
-	ssaConversion_FPToSI,
-	ssaConversion_UIToFP,
-	ssaConversion_SIToFP,
-	ssaConversion_PtrToInt,
-	ssaConversion_IntToPtr,
-	ssaConversion_BitCast,
+#define SSA_CONV_KINDS \
+	SSA_CONV_KIND(Invalid), \
+	SSA_CONV_KIND(trunc), \
+	SSA_CONV_KIND(zext), \
+	SSA_CONV_KIND(fptrunc), \
+	SSA_CONV_KIND(fpext), \
+	SSA_CONV_KIND(fptoui), \
+	SSA_CONV_KIND(fptosi), \
+	SSA_CONV_KIND(uitofp), \
+	SSA_CONV_KIND(sitofp), \
+	SSA_CONV_KIND(ptrtoint), \
+	SSA_CONV_KIND(inttoptr), \
+	SSA_CONV_KIND(bitcast), \
+	SSA_CONV_KIND(Count)
+
+enum ssaConvKind {
+#define SSA_CONV_KIND(x) GB_JOIN2(ssaConv_, x)
+	SSA_CONV_KINDS
+#undef SSA_CONV_KIND
+};
 
-	ssaConversion_Count,
+String const ssa_conv_strings[] = {
+#define SSA_CONV_KIND(x) {cast(u8 *)#x, gb_size_of(#x)-1}
+	SSA_CONV_KINDS
+#undef SSA_CONV_KIND
 };
 
 struct ssaInstr {
@@ -124,10 +135,10 @@ struct ssaInstr {
 			b32       inbounds;
 		} get_element_ptr;
 		struct {
-			ssaConversionKind kind;
+			ssaConvKind kind;
 			ssaValue *value;
 			Type *from, *to;
-		} conversion;
+		} conv;
 		struct {
 			ssaValue *cond;
 			ssaBlock *true_block;
@@ -266,6 +277,8 @@ Type *ssa_instr_type(ssaInstr *instr) {
 		return instr->get_element_ptr.result_type;
 	case ssaInstr_BinaryOp:
 		return instr->binary_op.type;
+	case ssaInstr_Conv:
+		return instr->conv.to;
 	}
 	return NULL;
 }
@@ -287,6 +300,9 @@ void ssa_instr_set_type(ssaInstr *instr, Type *type) {
 	case ssaInstr_BinaryOp:
 		instr->binary_op.type = type;
 		break;
+	case ssaInstr_Conv:
+		instr->conv.to = type;
+		break;
 	}
 }
 
@@ -481,6 +497,18 @@ ssaValue *ssa_make_instr_call(ssaProcedure *p, ssaValue *value, ssaValue **args,
 	return v;
 }
 
+ssaValue *ssa_make_instr_conv(ssaProcedure *p, ssaConvKind kind, ssaValue *value, Type *from, Type *to) {
+	ssaValue *v = ssa_alloc_instr(p->module->allocator, ssaInstr_Conv);
+	v->instr.conv.kind = kind;
+	v->instr.conv.value = value;
+	v->instr.conv.from = from;
+	v->instr.conv.to = to;
+	if (p->curr_block) {
+		gb_array_append(p->curr_block->values, v);
+	}
+	return v;
+}
+
 
 
 
@@ -736,16 +764,73 @@ ssaValue *ssa_emit_conv(ssaProcedure *proc, ssaValue *value, Type *t) {
 	if (are_types_identical(t, src_type))
 		return value;
 
-	Type *dst = get_base_type(t);
 	Type *src = get_base_type(src_type);
+	Type *dst = get_base_type(t);
 
 	if (value->kind == ssaValue_Constant) {
 		if (dst->kind == Type_Basic)
 			return ssa_make_value_constant(proc->module->allocator, t, value->constant.value);
 	}
 
+	// integer -> integer
+	if (is_type_integer(src) && is_type_integer(dst)) {
+		i64 sz = basic_type_sizes[src->basic.kind];
+		i64 dz = basic_type_sizes[dst->basic.kind];
+		ssaConvKind kind = ssaConv_trunc;
+		if (dz >= sz) {
+			kind = ssaConv_zext;
+		}
+		return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst));
+	}
+
+	// float -> float
+	if (is_type_float(src) && is_type_float(dst)) {
+		i64 sz = basic_type_sizes[src->basic.kind];
+		i64 dz = basic_type_sizes[dst->basic.kind];
+		ssaConvKind kind = ssaConv_fptrunc;
+		if (dz >= sz) {
+			kind = ssaConv_fpext;
+		}
+		return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst));
+	}
+
+	// float -> integer
+	if (is_type_float(src) && is_type_integer(dst)) {
+		ssaConvKind kind = ssaConv_fptosi;
+		if (is_type_unsigned(dst)) {
+			kind = ssaConv_fptoui;
+		}
+		return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst));
+	}
+
+	// integer -> float
+	if (is_type_integer(src) && is_type_float(dst)) {
+		ssaConvKind kind = ssaConv_sitofp;
+		if (is_type_unsigned(dst)) {
+			kind = ssaConv_uitofp;
+		}
+		return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst));
+	}
+
+	// Pointer to int
+	if (is_type_pointer(src) && is_type_integer(dst)) {
+		return ssa_emit(proc, ssa_make_instr_conv(proc, ssaConv_ptrtoint, value, src, dst));
+	}
+
+	// int to Pointer
+	if (is_type_integer(src) && is_type_pointer(dst)) {
+		return ssa_emit(proc, ssa_make_instr_conv(proc, ssaConv_inttoptr, value, src, dst));
+	}
+
+	// Pointer to Pointer
+	if (is_type_pointer(src) && is_type_pointer(dst)) {
+		return ssa_emit(proc, ssa_make_instr_conv(proc, ssaConv_bitcast, value, src, dst));
+	}
+
 
 	GB_PANIC("TODO(bill): ssa_emit_conv");
+	GB_PANIC("TODO(bill): string -> []byte");
+	GB_PANIC("TODO(bill): []byte -> string");
 
 	return NULL;
 }
@@ -930,6 +1015,32 @@ ssaValue *ssa_emit_slice(ssaProcedure *proc, Type *slice_type, ssaValue *base, s
 	return ssa_emit_load(proc, slice);
 }
 
+ssaValue *ssa_emit_substring(ssaProcedure *proc, ssaValue *base, ssaValue *low, ssaValue *high) {
+	Type *bt = get_base_type(ssa_value_type(base));
+	GB_ASSERT(bt == t_string);
+	if (low == NULL) {
+		low = v_zero;
+	}
+	if (high == NULL) {
+		high = ssa_string_len(proc, base);
+	}
+
+	Token op_sub = {Token_Sub};
+	ssaValue *len = ssa_emit_arith(proc, op_sub, high, low, t_int);
+
+	ssaValue *elem = ssa_string_elem(proc, base);
+	elem = ssa_emit_ptr_offset(proc, elem, low);
+
+	ssaValue *str = ssa_add_local_generated(proc, t_string);
+	ssaValue *gep = NULL;
+	gep = ssa_emit_struct_gep(proc, str, v_zero32, ssa_value_type(elem));
+	ssa_emit_store(proc, gep, elem);
+
+	gep = ssa_emit_struct_gep(proc, str, v_one32, t_int);
+	ssa_emit_store(proc, gep, len);
+
+	return ssa_emit_load(proc, str);
+}
 
 
 ssaValue *ssa_add_global_string_array(ssaProcedure *proc, ExactValue value) {
@@ -944,13 +1055,11 @@ ssaValue *ssa_add_global_string_array(ssaProcedure *proc, ExactValue value) {
 	String name = make_string(str, len-1);
 	Token token = {Token_String};
 	token.string = name;
-	// TODO(bill): unquote function
-	Type *type = make_type_array(a, t_u8, value.value_string.len-2);
+	Type *type = make_type_array(a, t_u8, value.value_string.len);
 	Entity *entity = make_entity_constant(a, NULL, token, type, value);
 	ssaValue *v = ssa_make_value_constant(a, type, value);
 
 	ssaValue *g = ssa_make_value_global(a, entity, v);
-	g->global.is_constant = true;
 
 	map_set(&proc->module->values, hash_pointer(entity), g);
 	map_set(&proc->module->members, hash_string(name), g);
@@ -971,39 +1080,6 @@ ssaValue *ssa_emit_string(ssaProcedure *proc, ssaValue *elem, ssaValue *len) {
 	return ssa_emit_load(proc, str);
 }
 
-ssaValue *ssa_emit_call(ssaProcedure *proc, AstNode *expr, Type *result_type) {
-	ast_node(ce, CallExpr, expr);
-
-	ssaValue *value = ssa_build_expr(proc, ce->proc);
-	Type *proc_type_ = ssa_value_type(value);
-	GB_ASSERT(proc_type_->kind == Type_Procedure);
-	auto *type = &proc_type_->procedure;
-
-	isize arg_index = 0;
-	isize arg_count = type->param_count;
-	ssaValue **args = gb_alloc_array(proc->module->allocator, ssaValue *, arg_count);
-
-	for (AstNode *arg = ce->arg_list; arg != NULL; arg = arg->next) {
-		ssaValue *a = ssa_build_expr(proc, arg);
-		Type *at = ssa_value_type(a);
-		if (at->kind == Type_Tuple) {
-			GB_PANIC("TODO(bill): tuple call arguments");
-		} else {
-			args[arg_index++] = a;
-		}
-	}
-
-	for (isize i = 0; i < arg_count; i++) {
-		Entity *e = type->params->tuple.variables[i];
-		args[i] = ssa_emit_conv(proc, args[i], e->type);
-	}
-
-	ssaValue *call = ssa_make_instr_call(proc, value, args, arg_count, result_type);
-	return ssa_emit(proc, call);
-}
-
-
-
 
 ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue *tv) {
 	switch (expr->kind) {
@@ -1105,7 +1181,8 @@ ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue
 	case_end;
 
 	case_ast_node(ce, CastExpr, expr);
-		GB_PANIC("TODO(bill): ssa_build_single_expr CastExpr");
+		ssaValue *v = ssa_build_expr(proc, ce->expr);
+		return ssa_emit_conv(proc, v, tv->type);
 	case_end;
 
 	case_ast_node(ce, CallExpr, expr);
@@ -1113,33 +1190,90 @@ ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue
 		if (p->kind == AstNode_Ident) {
 			Entity **found = map_get(&proc->module->info->uses, hash_pointer(p));
 			if (found && (*found)->kind == Entity_Builtin) {
-				GB_PANIC("TODO(bill): CallExpr Builtin");
+				Entity *e = *found;
+				switch (e->builtin.id) {
+				case BuiltinProcedure_len: {
+					ssaValue *v = ssa_lvalue_address(ssa_build_addr(proc, ce->arg_list), proc);
+					Type *t = get_base_type(ssa_value_type(v));
+					if (t == t_string)
+						return ssa_string_len(proc, v);
+					else if (t->kind == Type_Slice)
+						return ssa_slice_len(proc, v);
+				} break;
+				case BuiltinProcedure_cap: {
+					ssaValue *v = ssa_lvalue_address(ssa_build_addr(proc, ce->arg_list), proc);
+					Type *t = get_base_type(ssa_value_type(v));
+					if (t == t_string)
+						return ssa_string_cap(proc, v);
+					else if (t->kind == Type_Slice)
+						return ssa_slice_cap(proc, v);
+				} break;
+				case BuiltinProcedure_copy: {
+					GB_PANIC("TODO(bill): BuiltinProcedure_copy");
+				} break;
+				case BuiltinProcedure_print: {
+					GB_PANIC("TODO(bill): BuiltinProcedure_print");
+				} break;
+				case BuiltinProcedure_println: {
+					GB_PANIC("TODO(bill): BuiltinProcedure_println");
+				} break;
+				}
 			}
 		}
 
-		return ssa_emit_call(proc, expr, tv->type);
+
+		// NOTE(bill): Regular call
+		ssaValue *value = ssa_build_expr(proc, ce->proc);
+		Type *proc_type_ = ssa_value_type(value);
+		GB_ASSERT(proc_type_->kind == Type_Procedure);
+		auto *type = &proc_type_->procedure;
+
+		isize arg_index = 0;
+		isize arg_count = type->param_count;
+		ssaValue **args = gb_alloc_array(proc->module->allocator, ssaValue *, arg_count);
+
+		for (AstNode *arg = ce->arg_list; arg != NULL; arg = arg->next) {
+			ssaValue *a = ssa_build_expr(proc, arg);
+			Type *at = ssa_value_type(a);
+			if (at->kind == Type_Tuple) {
+				GB_PANIC("TODO(bill): tuple call arguments");
+			} else {
+				args[arg_index++] = a;
+			}
+		}
+
+		for (isize i = 0; i < arg_count; i++) {
+			Entity *e = type->params->tuple.variables[i];
+			args[i] = ssa_emit_conv(proc, args[i], e->type);
+		}
+
+		ssaValue *call = ssa_make_instr_call(proc, value, args, arg_count, tv->type);
+		return ssa_emit(proc, call);
 	case_end;
 
 	case_ast_node(se, SliceExpr, expr);
-		ssaValue *base = NULL;
 		ssaValue *low  = NULL;
 		ssaValue *high = NULL;
 		ssaValue *max  = NULL;
-		switch (tv->type->kind) {
-		case Type_Slice:
-		case Type_Array:
-			base = ssa_lvalue_address(ssa_build_addr(proc, se->expr), proc);
-			break;
-		case Type_Basic:
-			GB_PANIC("SliceExpr Type_Basic");
-			break;
-		}
 
 		if (se->low  != NULL)    low  = ssa_build_expr(proc, se->low);
 		if (se->high != NULL)    high = ssa_build_expr(proc, se->high);
 		if (se->triple_indexed)  max  = ssa_build_expr(proc, se->max);
 
-		return ssa_emit_slice(proc, tv->type, base, low, high, max);
+		switch (tv->type->kind) {
+		case Type_Slice:
+		case Type_Array: {
+			ssaValue *base = ssa_lvalue_address(ssa_build_addr(proc, se->expr), proc);
+			return ssa_emit_slice(proc, tv->type, base, low, high, max);
+		} break;
+		case Type_Basic: {
+			// NOTE(bill): max is not needed
+			ssaValue *base = ssa_lvalue_address(ssa_build_addr(proc, se->expr), proc);
+			return ssa_emit_substring(proc, base, low, high);
+		} break;
+		}
+
+		GB_PANIC("Unknown slicable type");
 	case_end;
 
 	case_ast_node(ie, IndexExpr, expr);
@@ -1234,17 +1368,17 @@ ssaLvalue ssa_build_addr(ssaProcedure *proc, AstNode *expr) {
 			ssaValue *elem = ssa_slice_elem(proc, slice);
 			v = ssa_emit_ptr_offset(proc, elem, index);
 		} break;
-		case Type_Pointer: {
-			ssaValue *ptr = ssa_emit_load(proc, ssa_lvalue_address(ssa_build_addr(proc, ie->expr), proc));
-			ssaValue *index = ssa_emit_conv(proc, ssa_build_expr(proc, ie->index), t_int);
-			v = ssa_emit_ptr_offset(proc, ptr, index);
-		} break;
-		case Type_Basic: { // string
+		case Type_Basic: { // Basic_string
 			ssaValue *str = ssa_lvalue_address(ssa_build_addr(proc, ie->expr), proc);
 			ssaValue *index = ssa_emit_conv(proc, ssa_build_expr(proc, ie->index), t_int);
 			ssaValue *elem = ssa_string_elem(proc, str);
 			v = ssa_emit_ptr_offset(proc, elem, index);
 		} break;
+		case Type_Pointer: {
+			ssaValue *ptr = ssa_emit_load(proc, ssa_lvalue_address(ssa_build_addr(proc, ie->expr), proc));
+			ssaValue *index = ssa_emit_conv(proc, ssa_build_expr(proc, ie->index), t_int);
+			v = ssa_emit_ptr_offset(proc, ptr, index);
+		} break;
 		}
 
 		// NOTE(bill): lvalue address encodes the pointer, thus the deref
@@ -1318,12 +1452,12 @@ void ssa_build_stmt_list(ssaProcedure *proc, AstNode *list) {
 		ssa_build_stmt(proc, stmt);
 }
 
-void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
-	switch (s->kind) {
-	case_ast_node(bs, EmptyStmt, s);
+void ssa_build_stmt(ssaProcedure *proc, AstNode *node) {
+	switch (node->kind) {
+	case_ast_node(bs, EmptyStmt, node);
 	case_end;
 
-	case_ast_node(vd, VarDecl, s);
+	case_ast_node(vd, VarDecl, node);
 		if (vd->kind == Declaration_Mutable) {
 			if (vd->name_count == vd->value_count) { // 1:1 assigment
 				gbArray(ssaLvalue)  lvals;
@@ -1365,7 +1499,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 		}
 	case_end;
 
-	case_ast_node(ids, IncDecStmt, s);
+	case_ast_node(ids, IncDecStmt, node);
 		Token op = ids->op;
 		if (op.kind == Token_Increment) {
 			op.kind = Token_Add;
@@ -1378,7 +1512,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 
 	case_end;
 
-	case_ast_node(as, AssignStmt, s);
+	case_ast_node(as, AssignStmt, node);
 		switch (as->op.kind) {
 		case Token_Eq: {
 			gbArray(ssaLvalue) lvals;
@@ -1397,7 +1531,6 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 
 			if (as->lhs_count == as->rhs_count) {
 				if (as->lhs_count == 1) {
-					AstNode *lhs = as->lhs_list;
 					AstNode *rhs = as->rhs_list;
 					ssaValue *init = ssa_build_expr(proc, rhs);
 					ssa_lvalue_store(lvals[0], proc, init);
@@ -1435,19 +1568,20 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 		}
 	case_end;
 
-	case_ast_node(es, ExprStmt, s);
-		ssaValue *value = ssa_build_expr(proc, es->expr);
+	case_ast_node(es, ExprStmt, node);
+		// NOTE(bill): No need to use return value
+		ssa_build_expr(proc, es->expr);
 	case_end;
 
-	case_ast_node(bs, BlockStmt, s)
+	case_ast_node(bs, BlockStmt, node);
 		ssa_build_stmt_list(proc, bs->list);
 	case_end;
 
-	case_ast_node(bs, DeferStmt, s);
+	case_ast_node(bs, DeferStmt, node);
 		GB_PANIC("DeferStmt");
 	case_end;
 
-	case_ast_node(rs, ReturnStmt, s);
+	case_ast_node(rs, ReturnStmt, node);
 		ssaValue *v = NULL;
 		auto *return_type_tuple  = &proc->type->procedure.results->tuple;
 		isize return_count = proc->type->procedure.result_count;
@@ -1482,12 +1616,12 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 
 	case_end;
 
-	case_ast_node(is, IfStmt, s);
+	case_ast_node(is, IfStmt, node);
 		if (is->init != NULL) {
 			ssa_build_stmt(proc, is->init);
 		}
-		ssaBlock *then = ssa_add_block(proc, s, make_string("if.then"));
-		ssaBlock *done = ssa__make_block(proc, s, make_string("if.done")); // NOTE(bill): Append later
+		ssaBlock *then = ssa_add_block(proc, node, make_string("if.then"));
+		ssaBlock *done = ssa__make_block(proc, node, make_string("if.done")); // NOTE(bill): Append later
 		ssaBlock *else_ = done;
 		if (is->else_stmt != NULL) {
 			else_ = ssa_add_block(proc, is->else_stmt, make_string("if.else"));
@@ -1507,21 +1641,21 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 		proc->curr_block = done;
 	case_end;
 
-	case_ast_node(fs, ForStmt, s);
+	case_ast_node(fs, ForStmt, node);
 		if (fs->init != NULL) {
 			ssa_build_stmt(proc, fs->init);
 		}
-		ssaBlock *body = ssa_add_block(proc, s, make_string("for.body"));
-		ssaBlock *done = ssa__make_block(proc, s, make_string("for.done")); // NOTE(bill): Append later
+		ssaBlock *body = ssa_add_block(proc, node, make_string("for.body"));
+		ssaBlock *done = ssa__make_block(proc, node, make_string("for.done")); // NOTE(bill): Append later
 
 		ssaBlock *loop = body;
 
 		if (fs->cond != NULL) {
-			loop = ssa_add_block(proc, fs->cond, make_string("for.loop"));
+			loop = ssa_add_block(proc, node, make_string("for.loop"));
 		}
 		ssaBlock *cont = loop;
 		if (fs->post != NULL) {
-			cont = ssa_add_block(proc, fs->cond, make_string("for.post"));
+			cont = ssa_add_block(proc, node, make_string("for.post"));
 		}
 		ssa_emit_jump(proc, loop);
 		proc->curr_block = loop;
@@ -1545,7 +1679,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 
 	case_end;
 
-	case_ast_node(bs, BranchStmt, s);
+	case_ast_node(bs, BranchStmt, node);
 		ssaBlock *block = NULL;
 		switch (bs->token.kind) {
 		#define BRANCH_GET_BLOCK(kind_) \
@@ -1557,6 +1691,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) {
 		BRANCH_GET_BLOCK(break);
 		BRANCH_GET_BLOCK(continue);
 		BRANCH_GET_BLOCK(fallthrough);
+		#undef BRANCH_GET_BLOCK
 		}
 		ssa_emit_jump(proc, block);
 		ssa_emit_unreachable(proc);

+ 5 - 75
src/common.cpp

@@ -2,78 +2,7 @@
 #define GB_IMPLEMENTATION
 #include "gb/gb.h"
 
-// NOTE(bill): Used for UTF-8 strings
-typedef struct String {
-	u8 *text;
-	isize len;
-} String;
-// NOTE(bill): used for printf style arguments
-#define LIT(x) (x).len, (x).text
-
-
-
-
-gb_inline String make_string(u8 *text, isize len) {
-	String s;
-	s.text = text;
-	if (len < 0)
-		len = gb_strlen(cast(char *)text);
-	s.len = len;
-	return s;
-}
-
-gb_inline String make_string(char *text) {
-	return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
-}
-
-gb_inline b32 are_strings_equal(String a, String b) {
-	if (a.len == b.len) {
-		return gb_memcompare(a.text, b.text, a.len) == 0;
-	}
-	return false;
-}
-
-gb_inline b32 are_strings_equal_ignore_case(String a, String b) {
-	if (a.len == b.len) {
-		for (isize i = 0; i < a.len; i++) {
-			char x = cast(char)a.text[i];
-			char y = cast(char)b.text[i];
-			if (gb_char_to_lower(x) != gb_char_to_lower(y))
-				return false;
-		}
-		return true;
-	}
-	return false;
-}
-
-
-gb_inline isize string_extension_position(String str) {
-	isize dot_pos = -1;
-	isize i = str.len;
-	b32 seen_dot = false;
-	while (i --> 0) {
-		if (str.text[i] == GB_PATH_SEPARATOR)
-			break;
-		if (str.text[i] == '.') {
-			dot_pos = i;
-			break;
-		}
-	}
-
-	return dot_pos;
-}
-
-gb_inline b32 string_has_extension(String str, String ext) {
-	if (str.len > ext.len+1) {
-		u8 *s = str.text+str.len - ext.len-1;
-		if (s[0] == '.') {
-			s++;
-			return gb_memcompare(s, ext.text, ext.len) == 0;
-		}
-		return false;
-	}
-	return false;
-}
+#include "string.cpp"
 
 // Hasing
 
@@ -86,7 +15,8 @@ gb_inline u64 hash_string(String s) {
 }
 
 gb_inline u64 hash_pointer(void *ptr) {
-	u64 p = cast(u64)cast(uintptr)ptr;
+	uintptr u = cast(uintptr)ptr;
+	u64 p = cast(u64)u;
 	return p;
 }
 
@@ -125,9 +55,9 @@ typedef struct MapFindResult {
 
 template <typename T>
 struct MapEntry {
-	u64 key;
+	u64   key;
 	isize next;
-	T value;
+	T     value;
 };
 
 template <typename T>

+ 39 - 1
src/gb/gb.h

@@ -1370,6 +1370,7 @@ GB_DEF u8 * gb_ucs2_to_utf8_buf(u16 const *str); // NOTE(bill): Uses locally per
 // NOTE(bill): Returns size of codepoint in bytes
 GB_DEF isize gb_utf8_decode        (u8 const *str, isize str_len, Rune *codepoint);
 GB_DEF isize gb_utf8_codepoint_size(u8 const *str, isize str_len);
+GB_DEF isize gb_utf8_encode_rune   (u8 buf[4], Rune r);
 
 ////////////////////////////////////////////////////////////////
 //
@@ -5871,7 +5872,7 @@ gb_inline i32 gb_hex_digit_to_int(char c) {
 		return c - 'a' + 10;
 	else if (gb_is_between(c, 'A', 'F'))
 		return c - 'A' + 10;
-	return 0;
+	return -1;
 }
 
 
@@ -6691,6 +6692,43 @@ isize gb_utf8_codepoint_size(u8 const *str, isize str_len) {
 	return i+1;
 }
 
+isize gb_utf8_encode_rune(u8 buf[4], Rune r) {
+	u32 i = cast(u32)r;
+	u8 mask = 0x3f;
+	if (i <= (1<<7)-1) {
+		buf[0] = cast(u8)r;
+		return 1;
+	}
+	if (i <= (1<<11)-1) {
+		buf[0] = 0xc0 | cast(u8)(r>>6);
+		buf[1] = 0x80 | cast(u8)(r)&mask;
+		return 2;
+	}
+
+	// Invalid or Surrogate range
+	if (i > GB_RUNE_MAX ||
+	    gb_is_between(i, 0xd800, 0xdfff)) {
+		r = GB_RUNE_INVALID;
+
+		buf[0] = 0xe0 | cast(u8)(r>>12);
+		buf[1] = 0x80 | cast(u8)(r>>6)&mask;
+		buf[2] = 0x80 | cast(u8)(r)&mask;
+		return 3;
+	}
+
+	if (i <= (1<<16)-1) {
+		buf[0] = 0xe0 | cast(u8)(r>>12);
+		buf[1] = 0x80 | cast(u8)(r>>6)&mask;
+		buf[2] = 0x80 | cast(u8)(r)&mask;
+		return 3;
+	}
+
+	buf[0] = 0xf0 | cast(u8)(r>>18);
+	buf[1] = 0x80 | cast(u8)(r>>12)&mask;
+	buf[2] = 0x80 | cast(u8)(r>>6)&mask;
+	buf[3] = 0x80 | cast(u8)(r)&mask;
+	return 4;
+}
 
 
 

+ 1 - 1
src/main.cpp

@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
 				check_parsed_files(&checker);
 
 				ssaGen ssa = {};
-				if (false && ssa_gen_init(&ssa, &checker)) {
+				if (ssa_gen_init(&ssa, &checker)) {
 					defer (ssa_gen_destroy(&ssa));
 
 					ssa_gen_code(&ssa);

+ 1 - 4
src/parser.cpp

@@ -2063,10 +2063,7 @@ void parse_file(Parser *p, AstFile *f) {
 		} else {
 			if (node->kind == AstNode_ImportDecl) {
 				auto *id = &node->ImportDecl;
-				String file = id->filepath.string;
-				String file_str = {};
-				if (file.text[0] == '"')
-					file_str = make_string(file.text+1, file.len-2);
+				String file_str = id->filepath.string;
 
 				char ext[] = ".odin";
 				isize ext_len = gb_size_of(ext)-1;

+ 262 - 0
src/string.cpp

@@ -0,0 +1,262 @@
+
+// NOTE(bill): Used for UTF-8 strings
+typedef struct String {
+	u8 *text;
+	isize len;
+} String;
+// NOTE(bill): used for printf style arguments
+#define LIT(x) (x).len, (x).text
+
+
+
+
+gb_inline String make_string(u8 *text, isize len) {
+	String s;
+	s.text = text;
+	if (len < 0)
+		len = gb_strlen(cast(char *)text);
+	s.len = len;
+	return s;
+}
+
+gb_inline String make_string(char *text) {
+	return make_string(cast(u8 *)cast(void *)text, gb_strlen(text));
+}
+
+gb_inline b32 are_strings_equal(String a, String b) {
+	if (a.len == b.len) {
+		return gb_memcompare(a.text, b.text, a.len) == 0;
+	}
+	return false;
+}
+
+gb_inline b32 are_strings_equal_ignore_case(String a, String b) {
+	if (a.len == b.len) {
+		for (isize i = 0; i < a.len; i++) {
+			char x = cast(char)a.text[i];
+			char y = cast(char)b.text[i];
+			if (gb_char_to_lower(x) != gb_char_to_lower(y))
+				return false;
+		}
+		return true;
+	}
+	return false;
+}
+
+
+gb_inline isize string_extension_position(String str) {
+	isize dot_pos = -1;
+	isize i = str.len;
+	b32 seen_dot = false;
+	while (i --> 0) {
+		if (str.text[i] == GB_PATH_SEPARATOR)
+			break;
+		if (str.text[i] == '.') {
+			dot_pos = i;
+			break;
+		}
+	}
+
+	return dot_pos;
+}
+
+gb_inline b32 string_has_extension(String str, String ext) {
+	if (str.len > ext.len+1) {
+		u8 *s = str.text+str.len - ext.len-1;
+		if (s[0] == '.') {
+			s++;
+			return gb_memcompare(s, ext.text, ext.len) == 0;
+		}
+		return false;
+	}
+	return false;
+}
+
+b32 string_contains_char(String s, u8 c) {
+	for (isize i = 0; i < s.len; i++) {
+		if (s.text[i] == c)
+			return true;
+	}
+	return false;
+}
+
+b32 unquote_char(String s, u8 quote, Rune *rune, b32 *multi, String *tail_string) {
+	if (s.text[0] == quote &&
+	    (quote == '\'' || quote == '"')) {
+		return false;
+	} else if (s.text[0] >= 0x80) {
+		Rune r = -1;
+		isize size = gb_utf8_decode(s.text, s.len, &r);
+		*rune = r;
+		*tail_string = make_string(s.text+size, s.len-size);
+		return true;
+	} else if (s.text[0] != '\\') {
+		*rune = s.text[0];
+		*tail_string = make_string(s.text+1, s.len-1);
+		return true;
+	}
+
+	if (s.len <= 1) {
+		return false;
+	}
+	u8 c = s.text[1];
+	s = make_string(s.text+2, s.len-2);
+
+	switch (c) {
+	default: return false;
+
+	case 'a':  *rune = '\a'; break;
+	case 'b':  *rune = '\b'; break;
+	case 'f':  *rune = '\f'; break;
+	case 'n':  *rune = '\n'; break;
+	case 'r':  *rune = '\r'; break;
+	case 't':  *rune = '\t'; break;
+	case 'v':  *rune = '\v'; break;
+	case '\\': *rune = '\\'; break;
+
+
+	case '\'':
+	case '"':
+		if (c != quote) {
+			return false;
+		}
+		*rune = c;
+		break;
+
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7': {
+		i32 r = c - '0';
+		if (s.len < 2) {
+			return false;
+		}
+		for (isize i = 0; i < 2; i++) {
+			i32 d = s.text[i] - '0';
+			if (d < 0 || d > 7) {
+				return false;
+			}
+			r = (r<<3) | d;
+		}
+		s = make_string(s.text+2, s.len-2);
+		if (r > 0xff) {
+			return false;
+		}
+		*rune = r;
+	} break;
+
+	case 'x':
+	case 'u':
+	case 'U': {
+		isize n = 0;
+		switch (c) {
+		case 'x': n = 2; break;
+		case 'u': n = 4; break;
+		case 'U': n = 8; break;
+		}
+
+		Rune r = 0;
+		if (s.len < n) {
+			return false;
+		}
+		for (isize i = 0; i < n; i++) {
+			i32 d = gb_hex_digit_to_int(s.text[i]);
+			if (d < 0) {
+				return false;
+			}
+			r = (r<<4) | d;
+		}
+		s = make_string(s.text+n, s.len-n);
+		if (c == 'x') {
+			*rune = r;
+			break;
+		}
+		if (r > GB_RUNE_MAX) {
+			return false;
+		}
+		*rune = r;
+		*multi = true;
+	} break;
+	}
+	*tail_string = s;
+	return true;
+}
+
+
+// 0 == failure
+// 1 == original memory
+// 2 == new allocation
+i32 unquote_string(gbAllocator a, String *s_) {
+	GB_ASSERT(s_ != NULL);
+	String s = *s_;
+	isize n = s.len;
+	if (n < 2)
+		return 0;
+	u8 quote = s.text[0];
+	if (quote != s.text[n-1])
+		return 0;
+	s.text += 1;
+	s.len -= 2;
+
+	if (quote == '`') {
+		if (string_contains_char(s, '`')) {
+			return 0;
+		}
+		*s_ = s;
+		return 1;
+	}
+	if (quote != '"' && quote != '\'')
+		return 0;
+
+	if (string_contains_char(s, '\n'))
+		return 0;
+
+	if (!string_contains_char(s, '\\') && !string_contains_char(s, quote)) {
+		if (quote == '"') {
+			*s_ = s;
+			return 1;
+		} else if (quote == '\'') {
+			Rune r = GB_RUNE_INVALID;
+			isize size = gb_utf8_decode(s.text, s.len, &r);
+			if ((size == s.len) && (r != -1 || size != 1)) {
+				*s_ = s;
+				return 1;
+			}
+		}
+	}
+
+	u8 rune_temp[4] = {};
+	isize buf_len = 3*s.len / 2;
+	u8 *buf = gb_alloc_array(a, u8, buf_len);
+	isize len = 0;
+	while (s.len > 0) {
+		String tail_string = {};
+		Rune r = 0;
+		b32 multi = false;
+		b32 success = unquote_char(s, quote, &r, &multi, &tail_string);
+		if (!success) {
+			gb_free(a, buf);
+			return 0;
+		}
+		s = tail_string;
+
+		if (r < 0x80 || !multi) {
+			buf[len++] = cast(u8)r;
+		} else {
+			isize size = gb_utf8_encode_rune(rune_temp, r);
+			gb_memcopy(buf+len, rune_temp, size);
+			len += size;
+		}
+
+		if (quote == '\'' && s.len != 0) {
+			gb_free(a, buf);
+			return 0;
+		}
+	}
+	*s_ = make_string(buf, len);
+	return 2;
+}

+ 61 - 16
src/tokenizer.cpp

@@ -114,9 +114,7 @@ TOKEN_KIND(_KeywordBegin, "_KeywordBegin"), \
 	TOKEN_KIND(union, "union"), \
 	TOKEN_KIND(enum, "enum"), \
 TOKEN_KIND(_KeywordEnd, "_KeywordEnd"), \
-\
-	TOKEN_KIND(Count, ""), \
-
+	TOKEN_KIND(Count, "")
 
 enum TokenKind {
 #define TOKEN_KIND(e, s) GB_JOIN2(Token_, e)
@@ -168,6 +166,7 @@ struct ErrorCollector {
 };
 
 void error(ErrorCollector *ec, Token token, char *fmt, ...) {
+	ec->count++;
 	// NOTE(bill): Duplicate error, skip it
 	if (!token_pos_are_equal(ec->prev, token.pos)) {
 		ec->prev = token.pos;
@@ -180,7 +179,6 @@ void error(ErrorCollector *ec, Token token, char *fmt, ...) {
 		va_end(va);
 
 	}
-	ec->count++;
 }
 
 void warning(Token token, char *fmt, ...) {
@@ -266,6 +264,7 @@ struct Tokenizer {
 	isize line_count;
 
 	isize error_count;
+	gbArray(String) allocated_strings;
 };
 
 
@@ -342,6 +341,9 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
 		advance_to_next_rune(t);
 		if (t->curr_rune == GB_RUNE_BOM)
 			advance_to_next_rune(t); // Ignore BOM at file beginning
+
+		gb_array_init(t->allocated_strings, gb_heap_allocator());
+
 		return TokenizerInit_None;
 	}
 
@@ -360,12 +362,18 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
 
 	if (gb_file_size(&f) == 0)
 		return TokenizerInit_Empty;
+
+
 	return TokenizerInit_None;
 }
 
 gb_inline void destroy_tokenizer(Tokenizer *t) {
-	if (t->start != NULL)
+	if (t->start != NULL) {
 		gb_free(gb_heap_allocator(), t->start);
+	}
+	if (t->allocated_strings != NULL) {
+		gb_array_free(t->allocated_strings);
+	}
 }
 
 void tokenizer_skip_whitespace(Tokenizer *t) {
@@ -624,21 +632,48 @@ Token tokenizer_get_token(Tokenizer *t) {
 		case GB_RUNE_EOF:
 			token.kind = Token_EOF;
 			break;
+
+		case '`': // Raw String Literal
 		case '"': // String Literal
+		{
+			Rune quote = curr_rune;
 			token.kind = Token_String;
-			for (;;) {
-				Rune r = t->curr_rune;
-				if (r == '\n' || r < 0) {
-					tokenizer_err(t, "String literal not terminated");
-					break;
+			if (curr_rune == '"') {
+				for (;;) {
+					Rune r = t->curr_rune;
+					if (r == '\n' || r < 0) {
+						tokenizer_err(t, "String literal not terminated");
+						break;
+					}
+					advance_to_next_rune(t);
+					if (r == quote)
+						break;
+					if (r == '\\')
+						scan_escape(t, '"');
+				}
+			} else {
+				for (;;) {
+					Rune r = t->curr_rune;
+					if (r < 0) {
+						tokenizer_err(t, "String literal not terminated");
+						break;
+					}
+					advance_to_next_rune(t);
+					if (r == quote)
+						break;
 				}
-				advance_to_next_rune(t);
-				if (r == '"')
-					break;
-				if (r == '\\')
-					scan_escape(t, '"');
 			}
-			break;
+			token.string.len = t->curr - token.string.text;
+			i32 success = unquote_string(gb_heap_allocator(), &token.string);
+			if (success > 0) {
+				if (success == 2) {
+					gb_array_append(t->allocated_strings, token.string);
+				}
+				return token;
+			} else {
+				tokenizer_err(t, "Invalid string literal");
+			}
+		} break;
 
 		case '\'': { // Rune Literal
 			b32 valid = true;
@@ -663,6 +698,16 @@ Token tokenizer_get_token(Tokenizer *t) {
 
 			if (valid && len != 1)
 				tokenizer_err(t, "Illegal rune literal");
+			token.string.len = t->curr - token.string.text;
+			i32 success = unquote_string(gb_heap_allocator(), &token.string);
+			if (success > 0) {
+				if (success == 2) {
+					gb_array_append(t->allocated_strings, token.string);
+				}
+				return token;
+			} else {
+				tokenizer_err(t, "Invalid rune literal");
+			}
 		} break;
 
 		case '.':