5 years ago · 6ac0fb80a6
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -158,7 +158,15 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) {
 
				 
			
 
				 #include "range_cache.cpp"
			
 
				 
			
 
				-
			
 
				+u32 fnv32a(void const *data, isize len) {
			
 
				+	u8 const *bytes = cast(u8 const *)data;
			
 
				+	u32 h = 0x811c9dc5;
			
 
				+	for (isize i = 0; i < len; i++) {
			
 
				+		u32 b = cast(u32)bytes[i];
			
 
				+		h = (h ^ b) * 0x01000193;
			
 
				+	}
			
 
				+	return h;
			
 
				+}
			
 
				 
			
 
				 u64 fnv64a(void const *data, isize len) {
			
 
				 	u8 const *bytes = cast(u8 const *)data;
			
--- a/src/llvm_backend.cpp
+++ b/src/llvm_backend.cpp
@@ -2034,6 +2034,11 @@ lbProcedure *lb_create_procedure(lbModule *m, Entity *entity) {
 
				 		LLVMSetLinkage(p->value, LLVMDLLExportLinkage);
			
 
				 		LLVMSetDLLStorageClass(p->value, LLVMDLLExportStorageClass);
			
 
				 		LLVMSetVisibility(p->value, LLVMDefaultVisibility);
			
 
				+
			
 
				+		if (build_context.metrics.os == TargetOs_js) {
			
 
				+			LLVMAddTargetDependentFunctionAttr(p->value, "wasm-export-name", alloc_cstring(heap_allocator(), p->name));
			
 
				+			LLVMAddTargetDependentFunctionAttr(p->value, "wasm-exported", nullptr);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	// NOTE(bill): offset==0 is the return value
			
@@ -12173,7 +12178,9 @@ void lb_generate_code(lbGenerator *gen) {
 
				 
			
 
				 	TIME_SECTION("LLVM Object Generation");
			
 
				 
			
 
				-	LLVMBool was_an_error = LLVMTargetMachineEmitToFile(target_machine, mod, cast(char *)filepath_obj.text, LLVMObjectFile, &llvm_error);
			
 
				+	LLVMCodeGenFileType code_gen_file_type = LLVMObjectFile;
			
 
				+
			
 
				+	LLVMBool was_an_error = LLVMTargetMachineEmitToFile(target_machine, mod, cast(char *)filepath_obj.text, code_gen_file_type, &llvm_error);
			
 
				 	if (was_an_error) {
			
 
				 		gb_printf_err("LLVM Error: %s\n", llvm_error);
			
 
				 		gb_exit(1);
			
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1513,10 +1513,10 @@ int main(int arg_count, char const **arg_ptr) {
 
				 	init_string_buffer_memory();
			
 
				 	init_string_interner();
			
 
				 	init_global_error_collector();
			
 
				+	init_keyword_hash_table();
			
 
				 	global_big_int_init();
			
 
				 	arena_init(&global_ast_arena, heap_allocator());
			
 
				 
			
 
				-
			
 
				 	array_init(&library_collections, heap_allocator());
			
 
				 	// NOTE(bill): 'core' cannot be (re)defined by the user
			
 
				 	add_library_collection(str_lit("core"), get_fullpath_relative(heap_allocator(), odin_root_dir(), str_lit("core")));
			
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -4284,8 +4284,15 @@ ParseFileError init_ast_file(AstFile *f, String fullpath, TokenPos *err_pos) {
 
				 	}
			
 
				 
			
 
				 	isize file_size = f->tokenizer.end - f->tokenizer.start;
			
 
				-	isize init_token_cap = cast(isize)gb_max(next_pow2(cast(i64)(file_size/2ll)), 16);
			
 
				+
			
 
				+	// NOTE(bill): Determine allocation size required for tokens
			
 
				+	isize token_cap = file_size/3ll;
			
 
				+	isize pow2_cap = gb_max(cast(isize)prev_pow2(cast(i64)token_cap)/2, 16);
			
 
				+	token_cap = ((token_cap + pow2_cap-1)/pow2_cap) * pow2_cap;
			
 
				+
			
 
				+	isize init_token_cap = gb_max(token_cap, 16);
			
 
				 	array_init(&f->tokens, heap_allocator(), 0, gb_max(init_token_cap, 16));
			
 
				+	isize cap0 = f->tokens.capacity;
			
 
				 
			
 
				 	if (err == TokenizerInit_Empty) {
			
 
				 		Token token = {Token_EOF};
			
@@ -4314,8 +4321,8 @@ ParseFileError init_ast_file(AstFile *f, String fullpath, TokenPos *err_pos) {
 
				 	f->prev_token = f->tokens[f->curr_token_index];
			
 
				 	f->curr_token = f->tokens[f->curr_token_index];
			
 
				 
			
 
				-	array_init(&f->comments, heap_allocator());
			
 
				-	array_init(&f->imports, heap_allocator());
			
 
				+	array_init(&f->comments, heap_allocator(), 0, 0);
			
 
				+	array_init(&f->imports,  heap_allocator(), 0, 0);
			
 
				 
			
 
				 	f->curr_proc = nullptr;
			
 
				 
			
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -135,6 +135,49 @@ String const token_strings[] = {
 
				 };
			
 
				 
			
 
				 
			
 
				+struct KeywordHashEntry {
			
 
				+	u32       hash;
			
 
				+	TokenKind kind;
			
 
				+};
			
 
				+
			
 
				+enum {
			
 
				+	KEYWORD_HASH_TABLE_COUNT = 1<<9,
			
 
				+	KEYWORD_HASH_TABLE_MASK = KEYWORD_HASH_TABLE_COUNT-1,
			
 
				+};
			
 
				+gb_global KeywordHashEntry keyword_hash_table[KEYWORD_HASH_TABLE_COUNT] = {};
			
 
				+GB_STATIC_ASSERT(Token__KeywordEnd-Token__KeywordBegin <= gb_count_of(keyword_hash_table));
			
 
				+
			
 
				+gb_inline u32 keyword_hash(u8 const *text, isize len) {
			
 
				+	return fnv32a(text, len);
			
 
				+}
			
 
				+void add_keyword_hash_entry(String const &s, TokenKind kind) {
			
 
				+	u32 hash = keyword_hash(s.text, s.len);
			
 
				+
			
 
				+	// NOTE(bill): This is a bit of an empirical hack in order to speed things up
			
 
				+	u32 index = hash & KEYWORD_HASH_TABLE_MASK;
			
 
				+	KeywordHashEntry *entry = &keyword_hash_table[index];
			
 
				+	GB_ASSERT_MSG(entry->kind == Token_Invalid, "Keyword hash table initialtion collision: %.*s %.*s %08x %08x", LIT(s), LIT(token_strings[entry->kind]), hash, entry->hash);
			
 
				+	entry->hash = hash;
			
 
				+	entry->kind = kind;
			
 
				+}
			
 
				+void init_keyword_hash_table(void) {
			
 
				+	for (i32 kind = Token__KeywordBegin+1; kind < Token__KeywordEnd; kind++) {
			
 
				+		add_keyword_hash_entry(token_strings[kind], cast(TokenKind)kind);
			
 
				+	}
			
 
				+
			
 
				+	static struct {
			
 
				+		String s;
			
 
				+		TokenKind kind;
			
 
				+	} const legacy_keywords[] = {
			
 
				+		{str_lit("notin"), Token_not_in},
			
 
				+	};
			
 
				+
			
 
				+	for (i32 i = 0; i < gb_count_of(legacy_keywords); i++) {
			
 
				+		add_keyword_hash_entry(legacy_keywords[i].s, legacy_keywords[i].kind);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				 struct TokenPos {
			
 
				 	String file;
			
 
				 	isize  offset; // starting at 0
			
@@ -215,7 +258,8 @@ void end_error_block(void) {
 
				 		u8 *text = gb_alloc_array(heap_allocator(), u8, n+1);
			
 
				 		gb_memmove(text, global_error_collector.error_buffer.data, n);
			
 
				 		text[n] = 0;
			
 
				-		array_add(&global_error_collector.errors, make_string(text, n));
			
 
				+		String s = {text, n};
			
 
				+		array_add(&global_error_collector.errors, s);
			
 
				 		global_error_collector.error_buffer.count = 0;
			
 
				 
			
 
				 		// gbFile *f = gb_file_get_standard(gbFileStandard_Error);
			
@@ -539,10 +583,11 @@ void advance_to_next_rune(Tokenizer *t) {
 
				 			tokenizer_err(t, "Illegal character NUL");
			
 
				 		} else if (rune >= 0x80) { // not ASCII
			
 
				 			width = gb_utf8_decode(t->read_curr, t->end-t->read_curr, &rune);
			
 
				-			if (rune == GB_RUNE_INVALID && width == 1)
			
 
				+			if (rune == GB_RUNE_INVALID && width == 1) {
			
 
				 				tokenizer_err(t, "Illegal UTF-8 encoding");
			
 
				-			else if (rune == GB_RUNE_BOM && t->curr-t->start > 0)
			
 
				+			} else if (rune == GB_RUNE_BOM && t->curr-t->start > 0){
			
 
				 				tokenizer_err(t, "Illegal byte order mark");
			
 
				+			}
			
 
				 		}
			
 
				 		t->read_curr += width;
			
 
				 		t->curr_rune = rune;
			
@@ -609,21 +654,13 @@ gb_inline void destroy_tokenizer(Tokenizer *t) {
 
				 	array_free(&t->allocated_strings);
			
 
				 }
			
 
				 
			
 
				-void tokenizer_skip_whitespace(Tokenizer *t) {
			
 
				-	while (t->curr_rune == ' ' ||
			
 
				-	       t->curr_rune == '\t' ||
			
 
				-	       t->curr_rune == '\n' ||
			
 
				-	       t->curr_rune == '\r') {
			
 
				-		advance_to_next_rune(t);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 gb_inline i32 digit_value(Rune r) {
			
 
				-	if (gb_char_is_digit(cast(char)r)) {
			
 
				+	switch (r) {
			
 
				+	case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
			
 
				 		return r - '0';
			
 
				-	} else if (gb_is_between(cast(char)r, 'a', 'f')) {
			
 
				+	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
			
 
				 		return r - 'a' + 10;
			
 
				-	} else if (gb_is_between(cast(char)r, 'A', 'F')) {
			
 
				+	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
			
 
				 		return r - 'A' + 10;
			
 
				 	}
			
 
				 	return 16; // NOTE(bill): Larger than highest possible
			
@@ -645,7 +682,7 @@ u8 peek_byte(Tokenizer *t, isize offset=0) {
 
				 Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
			
 
				 	Token token = {};
			
 
				 	token.kind = Token_Integer;
			
 
				-	token.string = make_string(t->curr, 1);
			
 
				+	token.string = {t->curr, 1};
			
 
				 	token.pos.file = t->fullpath;
			
 
				 	token.pos.line = t->line_count;
			
 
				 	token.pos.column = t->curr-t->line+1;
			
@@ -662,37 +699,43 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
 
				 	if (t->curr_rune == '0') {
			
 
				 		u8 *prev = t->curr;
			
 
				 		advance_to_next_rune(t);
			
 
				-		if (t->curr_rune == 'b') { // Binary
			
 
				+		switch (t->curr_rune) {
			
 
				+		case 'b': // Binary
			
 
				 			advance_to_next_rune(t);
			
 
				 			scan_mantissa(t, 2);
			
 
				 			if (t->curr - prev <= 2) {
			
 
				 				token.kind = Token_Invalid;
			
 
				 			}
			
 
				-		} else if (t->curr_rune == 'o') { // Octal
			
 
				+			goto end;
			
 
				+		case 'o': // Octal
			
 
				 			advance_to_next_rune(t);
			
 
				 			scan_mantissa(t, 8);
			
 
				 			if (t->curr - prev <= 2) {
			
 
				 				token.kind = Token_Invalid;
			
 
				 			}
			
 
				-		} else if (t->curr_rune == 'd') { // Decimal
			
 
				+			goto end;
			
 
				+		case 'd': // Decimal
			
 
				 			advance_to_next_rune(t);
			
 
				 			scan_mantissa(t, 10);
			
 
				 			if (t->curr - prev <= 2) {
			
 
				 				token.kind = Token_Invalid;
			
 
				 			}
			
 
				-		} else if (t->curr_rune == 'z') { // Dozenal
			
 
				+			goto end;
			
 
				+		case 'z': // Dozenal
			
 
				 			advance_to_next_rune(t);
			
 
				 			scan_mantissa(t, 12);
			
 
				 			if (t->curr - prev <= 2) {
			
 
				 				token.kind = Token_Invalid;
			
 
				 			}
			
 
				-		} else if (t->curr_rune == 'x') { // Hexadecimal
			
 
				+			goto end;
			
 
				+		case 'x': // Hexadecimal
			
 
				 			advance_to_next_rune(t);
			
 
				 			scan_mantissa(t, 16);
			
 
				 			if (t->curr - prev <= 2) {
			
 
				 				token.kind = Token_Invalid;
			
 
				 			}
			
 
				-		} else if (t->curr_rune == 'h') { // Hexadecimal Float
			
 
				+			goto end;
			
 
				+		case 'h': // Hexadecimal Float
			
 
				 			token.kind = Token_Float;
			
 
				 			advance_to_next_rune(t);
			
 
				 			scan_mantissa(t, 16);
			
@@ -716,13 +759,11 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
 
				 					break;
			
 
				 				}
			
 
				 			}
			
 
				-
			
 
				-		} else {
			
 
				+			goto end;
			
 
				+		default:
			
 
				 			scan_mantissa(t, 10);
			
 
				 			goto fraction;
			
 
				 		}
			
 
				-
			
 
				-		goto end;
			
 
				 	}
			
 
				 
			
 
				 	scan_mantissa(t, 10);
			
@@ -762,36 +803,47 @@ end:
 
				 	return token;
			
 
				 }
			
 
				 
			
 
				+
			
 
				 bool scan_escape(Tokenizer *t) {
			
 
				 	isize len = 0;
			
 
				 	u32 base = 0, max = 0, x = 0;
			
 
				 
			
 
				 	Rune r = t->curr_rune;
			
 
				-	if (r == 'a'  ||
			
 
				-	    r == 'b'  ||
			
 
				-	    r == 'e'  ||
			
 
				-	    r == 'f'  ||
			
 
				-	    r == 'n'  ||
			
 
				-	    r == 'r'  ||
			
 
				-	    r == 't'  ||
			
 
				-	    r == 'v'  ||
			
 
				-	    r == '\\' ||
			
 
				-	    r == '\'' ||
			
 
				-	    r == '\"') {
			
 
				+	switch (r) {
			
 
				+	case 'a':
			
 
				+	case 'b':
			
 
				+	case 'e':
			
 
				+	case 'f':
			
 
				+	case 'n':
			
 
				+	case 'r':
			
 
				+	case 't':
			
 
				+	case 'v':
			
 
				+	case '\\':
			
 
				+	case '\'':
			
 
				+	case '\"':
			
 
				 		advance_to_next_rune(t);
			
 
				 		return true;
			
 
				-	} else if (gb_is_between(r, '0', '7')) {
			
 
				+
			
 
				+	case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
			
 
				 		len = 3; base = 8; max = 255;
			
 
				-	} else if (r == 'x') {
			
 
				+		break;
			
 
				+
			
 
				+	case 'x':
			
 
				 		advance_to_next_rune(t);
			
 
				 		len = 2; base = 16; max = 255;
			
 
				-	} else if (r == 'u') {
			
 
				+		break;
			
 
				+
			
 
				+	case 'u':
			
 
				 		advance_to_next_rune(t);
			
 
				 		len = 4; base = 16; max = GB_RUNE_MAX;
			
 
				-	} else if (r == 'U') {
			
 
				+		break;
			
 
				+
			
 
				+	case 'U':
			
 
				 		advance_to_next_rune(t);
			
 
				 		len = 8; base = 16; max = GB_RUNE_MAX;
			
 
				-	} else {
			
 
				+		break;
			
 
				+
			
 
				+	default:
			
 
				 		if (t->curr_rune < 0) {
			
 
				 			tokenizer_err(t, "Escape sequence was not terminated");
			
 
				 		} else {
			
@@ -871,10 +923,21 @@ gb_inline TokenKind token_kind_dub_eq(Tokenizer *t, Rune sing_rune, TokenKind si
 
				 
			
 
				 
			
 
				 Token tokenizer_get_token(Tokenizer *t) {
			
 
				-	tokenizer_skip_whitespace(t);
			
 
				+	// Skip whitespace
			
 
				+	for (;;) {
			
 
				+		switch (t->curr_rune) {
			
 
				+		case ' ':
			
 
				+		case '\t':
			
 
				+		case '\n':
			
 
				+		case '\r':
			
 
				+			advance_to_next_rune(t);
			
 
				+			continue;
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				 
			
 
				 	Token token = {};
			
 
				-	token.string = make_string(t->curr, 1);
			
 
				+	token.string = {t->curr, 1};
			
 
				 	token.pos.file = t->fullpath;
			
 
				 	token.pos.line = t->line_count;
			
 
				 	token.pos.offset = t->curr - t->start;
			
@@ -891,16 +954,15 @@ Token tokenizer_get_token(Tokenizer *t) {
 
				 
			
 
				 		// NOTE(bill): All keywords are > 1
			
 
				 		if (token.string.len > 1) {
			
 
				-			for (i32 k = Token__KeywordBegin+1; k < Token__KeywordEnd; k++) {
			
 
				-				if (token.string == token_strings[k]) {
			
 
				-					token.kind = cast(TokenKind)k;
			
 
				-					break;
			
 
				+			u32 hash = keyword_hash(token.string.text, token.string.len);
			
 
				+			u32 index = hash & KEYWORD_HASH_TABLE_MASK;
			
 
				+			KeywordHashEntry *entry = &keyword_hash_table[index];
			
 
				+			if (entry->kind != Token_Invalid) {
			
 
				+				String const &entry_text = token_strings[entry->kind];
			
 
				+				if (str_eq(entry_text, token.string)) {
			
 
				+					token.kind = entry->kind;
			
 
				 				}
			
 
				 			}
			
 
				-
			
 
				-			if (token.kind == Token_Ident && token.string == "notin") {
			
 
				-				token.kind = Token_not_in;
			
 
				-			}
			
 
				 		}
			
 
				 
			
 
				 	} else if (gb_is_between(curr_rune, '0', '9')) {
			
@@ -1142,7 +1204,7 @@ Token tokenizer_get_token(Tokenizer *t) {
 
				 		case '|': token.kind = token_kind_dub_eq(t, '|', Token_Or, Token_OrEq, Token_CmpOr, Token_CmpOrEq); break;
			
 
				 
			
 
				 		default:
			
 
				-		if (curr_rune != GB_RUNE_BOM) {
			
 
				+			if (curr_rune != GB_RUNE_BOM) {
			
 
				 				u8 str[4] = {};
			
 
				 				int len = cast(int)gb_utf8_encode_rune(str, curr_rune);
			
 
				 				tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune);