4 年前 · 31c4a9d770
--- a/core/c/frontend/preprocessor/const_expr.odin
+++ b/core/c/frontend/preprocessor/const_expr.odin
@@ -0,0 +1,25 @@
 
				+package c_frontend_preprocess
			
 
				+
			
 
				+import "core:c/frontend/tokenizer"
			
 
				+
			
 
				+const_expr :: proc(rest: ^^Token, tok: ^Token) -> i64 {
			
 
				+	// TODO(bill): Handle const_expr correctly
			
 
				+	// This is effectively a mini-parser
			
 
				+
			
 
				+	assert(rest != nil);
			
 
				+	assert(tok != nil);
			
 
				+	rest^ = tokenizer.new_eof(tok);
			
 
				+	switch v in tok.val {
			
 
				+	case i64:
			
 
				+		return v;
			
 
				+	case f64:
			
 
				+		return i64(v);
			
 
				+	case string:
			
 
				+		return 0;
			
 
				+	case []u16:
			
 
				+		// TODO
			
 
				+	case []u32:
			
 
				+		// TODO
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
--- a/core/c/frontend/preprocessor/preprocess.odin
+++ b/core/c/frontend/preprocessor/preprocess.odin
@@ -0,0 +1,1498 @@
 
				+package c_frontend_preprocess
			
 
				+
			
 
				+import "../tokenizer"
			
 
				+
			
 
				+import "core:strings"
			
 
				+import "core:strconv"
			
 
				+import "core:path/filepath"
			
 
				+import "core:unicode/utf8"
			
 
				+import "core:unicode/utf16"
			
 
				+import "core:os"
			
 
				+
			
 
				+@(private)
			
 
				+Tokenizer :: tokenizer.Tokenizer;
			
 
				+@(private)
			
 
				+Token :: tokenizer.Token;
			
 
				+
			
 
				+Error_Handler :: tokenizer.Error_Handler;
			
 
				+
			
 
				+Macro_Param :: struct {
			
 
				+	next: ^Macro_Param,
			
 
				+	name: string,
			
 
				+}
			
 
				+
			
 
				+Macro_Arg :: struct {
			
 
				+	next: ^Macro_Arg,
			
 
				+	name: string,
			
 
				+	tok: ^Token,
			
 
				+	is_va_args: bool,
			
 
				+}
			
 
				+
			
 
				+Macro_Kind :: enum u8 {
			
 
				+	Function_Like,
			
 
				+	Value_Like,
			
 
				+}
			
 
				+
			
 
				+Macro_Handler :: #type proc(^Preprocessor, ^Token) -> ^Token;
			
 
				+
			
 
				+Macro :: struct {
			
 
				+	name: string,
			
 
				+	kind: Macro_Kind,
			
 
				+	params: ^Macro_Param,
			
 
				+	va_args_name: string,
			
 
				+	body: ^Token,
			
 
				+	handler: Macro_Handler,
			
 
				+}
			
 
				+
			
 
				+Cond_Incl_State :: enum u8 {
			
 
				+	In_Then,
			
 
				+	In_Elif,
			
 
				+	In_Else,
			
 
				+}
			
 
				+
			
 
				+Cond_Incl :: struct {
			
 
				+	next: ^Cond_Incl,
			
 
				+	tok:  ^Token,
			
 
				+	state:    Cond_Incl_State,
			
 
				+	included: bool,
			
 
				+}
			
 
				+
			
 
				+Pragma_Handler :: #type proc(^Preprocessor, ^Token);
			
 
				+
			
 
				+Preprocessor :: struct {
			
 
				+	// Lookup tables
			
 
				+	macros:         map[string]^Macro,
			
 
				+	pragma_once:    map[string]bool,
			
 
				+	include_guards: map[string]string,
			
 
				+	filepath_cache: map[string]string,
			
 
				+
			
 
				+	// Include path data
			
 
				+	include_paths: []string,
			
 
				+
			
 
				+	// Counter for __COUNTER__ macro
			
 
				+	counter: i64,
			
 
				+
			
 
				+	// Include information
			
 
				+	cond_incl: ^Cond_Incl,
			
 
				+	include_level: int,
			
 
				+	include_next_index: int,
			
 
				+
			
 
				+	wide_char_size: int,
			
 
				+
			
 
				+	// Mutable data
			
 
				+	err:  Error_Handler,
			
 
				+	warn: Error_Handler,
			
 
				+	pragma_handler: Pragma_Handler,
			
 
				+	error_count:   int,
			
 
				+	warning_count: int,
			
 
				+}
			
 
				+
			
 
				+MAX_INCLUDE_LEVEL :: 1024;
			
 
				+
			
 
				+error :: proc(cpp: ^Preprocessor, tok: ^Token, msg: string, args: ..any) {
			
 
				+	if cpp.err != nil {
			
 
				+		cpp.err(tok.pos, msg, ..args);
			
 
				+	}
			
 
				+	cpp.error_count += 1;
			
 
				+}
			
 
				+
			
 
				+warn :: proc(cpp: ^Preprocessor, tok: ^Token, msg: string, args: ..any) {
			
 
				+	if cpp.warn != nil {
			
 
				+		cpp.warn(tok.pos, msg, ..args);
			
 
				+	}
			
 
				+	cpp.warning_count += 1;
			
 
				+}
			
 
				+
			
 
				+is_hash :: proc(tok: ^Token) -> bool {
			
 
				+	return tok.at_bol && tok.lit == "#";
			
 
				+}
			
 
				+
			
 
				+skip_line :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
			
 
				+	tok := tok;
			
 
				+	if tok.at_bol {
			
 
				+		return tok;
			
 
				+	}
			
 
				+	warn(cpp, tok, "extra token");
			
 
				+	for tok.at_bol {
			
 
				+		tok = tok.next;
			
 
				+	}
			
 
				+	return tok;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+append_token :: proc(a, b: ^Token) -> ^Token {
			
 
				+	if a.kind == .EOF {
			
 
				+		return b;
			
 
				+	}
			
 
				+
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	for tok := a; tok.kind != .EOF; tok = tok.next {
			
 
				+		curr.next = tokenizer.copy_token(tok);
			
 
				+		curr = curr.next;
			
 
				+	}
			
 
				+	curr.next = b;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+is_hex_digit :: proc(x: byte) -> bool {
			
 
				+	switch x {
			
 
				+	case '0'..'9', 'a'..'f', 'A'..'F':
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+from_hex :: proc(x: byte) -> i32 {
			
 
				+	switch x {
			
 
				+	case '0'..'9':
			
 
				+		return i32(x) - '0';
			
 
				+	case 'a'..'f':
			
 
				+		return i32(x) - 'a' + 10;
			
 
				+	case 'A'..'F':
			
 
				+		return i32(x) - 'A' + 10;
			
 
				+	}
			
 
				+	return 16;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+convert_pp_number :: proc(tok: ^Token) {
			
 
				+	convert_pp_int :: proc(tok: ^Token) -> bool {
			
 
				+		p := tok.lit;
			
 
				+		base := 10;
			
 
				+		if len(p) > 2 {
			
 
				+			if strings.equal_fold(p[:2], "0x") && is_hex_digit(p[2]) {
			
 
				+				p = p[2:];
			
 
				+				base = 16;
			
 
				+			} else if strings.equal_fold(p[:2], "0b") && p[2] == '0' || p[2] == '1' {
			
 
				+				p = p[2:];
			
 
				+				base = 2;
			
 
				+			}
			
 
				+		}
			
 
				+		if base == 10 && p[0] == '0' {
			
 
				+			base = 8;
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		tok.val, _ = strconv.parse_i64_of_base(p, base);
			
 
				+
			
 
				+		l, u: int;
			
 
				+
			
 
				+		suf: [3]byte;
			
 
				+		suf_n := 0;
			
 
				+		i := len(p)-1;
			
 
				+		for /**/; i >= 0 && suf_n < len(suf); i -= 1 {
			
 
				+			switch p[i] {
			
 
				+			case 'l', 'L':
			
 
				+				suf[suf_n] = 'l';
			
 
				+				l += 1;
			
 
				+				suf_n += 1;
			
 
				+			case 'u', 'U':
			
 
				+				suf[suf_n] = 'u';
			
 
				+				u += 1;
			
 
				+				suf_n += 1;
			
 
				+			}
			
 
				+		}
			
 
				+		if i < len(p) {
			
 
				+			if !is_hex_digit(p[i]) && p[i] != '.' {
			
 
				+				return false;
			
 
				+			}
			
 
				+		}
			
 
				+		if u > 1 {
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		if l > 2 {
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		if u == 1 {
			
 
				+			switch l {
			
 
				+			case 0: tok.type_hint = .Unsigned_Int;
			
 
				+			case 1: tok.type_hint = .Unsigned_Long;
			
 
				+			case 2: tok.type_hint = .Unsigned_Long_Long;
			
 
				+			}
			
 
				+		} else {
			
 
				+			switch l {
			
 
				+			case 0: tok.type_hint = .Int;
			
 
				+			case 1: tok.type_hint = .Long;
			
 
				+			case 2: tok.type_hint = .Long_Long;
			
 
				+			}
			
 
				+		}
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	if convert_pp_int(tok) {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	fval, _ := strconv.parse_f64(tok.lit);
			
 
				+	tok.val = fval;
			
 
				+
			
 
				+	end := tok.lit[len(tok.lit)-1];
			
 
				+	switch end {
			
 
				+	case 'f', 'F':
			
 
				+		tok.type_hint = .Float;
			
 
				+	case 'l', 'L':
			
 
				+		tok.type_hint = .Long_Double;
			
 
				+	case:
			
 
				+		tok.type_hint = .Double;
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+convert_pp_char :: proc(tok: ^Token) {
			
 
				+	assert(len(tok.lit) >= 2);
			
 
				+	r, _, _, _ := unquote_char(tok.lit, tok.lit[0]);
			
 
				+	tok.val = i64(r);
			
 
				+
			
 
				+	tok.type_hint = .Int;
			
 
				+	switch tok.prefix {
			
 
				+	case "u": tok.type_hint = .UTF_16;
			
 
				+	case "U": tok.type_hint = .UTF_32;
			
 
				+	case "L": tok.type_hint = .UTF_Wide;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+wide_char_size :: proc(cpp: ^Preprocessor) -> int {
			
 
				+	char_size := 4;
			
 
				+	if cpp.wide_char_size > 0 {
			
 
				+		char_size = clamp(cpp.wide_char_size, 1, 4);
			
 
				+		assert(char_size & (char_size-1) == 0);
			
 
				+	}
			
 
				+	return char_size;
			
 
				+}
			
 
				+
			
 
				+convert_pp_string :: proc(cpp: ^Preprocessor, tok: ^Token) {
			
 
				+	assert(len(tok.lit) >= 2);
			
 
				+	str, _, _ := unquote_string(tok.lit);
			
 
				+	tok.val = str;
			
 
				+
			
 
				+	char_size := 1;
			
 
				+
			
 
				+	switch tok.prefix {
			
 
				+	case "u8":
			
 
				+		tok.type_hint = .UTF_8;
			
 
				+		char_size = 1;
			
 
				+	case "u":
			
 
				+		tok.type_hint = .UTF_16;
			
 
				+		char_size = 2;
			
 
				+	case "U":
			
 
				+		tok.type_hint = .UTF_32;
			
 
				+		char_size = 4;
			
 
				+	case "L":
			
 
				+		tok.type_hint = .UTF_Wide;
			
 
				+		char_size = wide_char_size(cpp);
			
 
				+	}
			
 
				+
			
 
				+	switch char_size {
			
 
				+	case 2:
			
 
				+		n: int;
			
 
				+		buf := make([]u16, len(str));
			
 
				+		for c in str {
			
 
				+			ch := c;
			
 
				+			if ch < 0x10000 {
			
 
				+				buf[n] = u16(ch);
			
 
				+				n += 1;
			
 
				+			} else {
			
 
				+				ch -= 0x10000;
			
 
				+				buf[n+0] = 0xd800 + u16((ch >> 10) & 0x3ff);
			
 
				+				buf[n+1] = 0xdc00 + u16(ch & 0x3ff);
			
 
				+				n += 2;
			
 
				+			}
			
 
				+		}
			
 
				+		tok.val = buf[:n];
			
 
				+	case 4:
			
 
				+		n: int;
			
 
				+		buf := make([]u32, len(str));
			
 
				+		for ch in str {
			
 
				+			buf[n] = u32(ch);
			
 
				+			n += 1;
			
 
				+		}
			
 
				+		tok.val = buf[:n];
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+convert_pp_token :: proc(cpp: ^Preprocessor, t: ^Token, is_keyword: tokenizer.Is_Keyword_Proc) {
			
 
				+	switch {
			
 
				+	case t.kind == .Char:
			
 
				+		convert_pp_char(t);
			
 
				+	case t.kind == .String:
			
 
				+		convert_pp_string(cpp, t);
			
 
				+	case is_keyword != nil && is_keyword(t):
			
 
				+		t.kind = .Keyword;
			
 
				+	case t.kind == .PP_Number:
			
 
				+		convert_pp_number(t);
			
 
				+	}
			
 
				+}
			
 
				+convert_pp_tokens :: proc(cpp: ^Preprocessor, tok: ^Token, is_keyword: tokenizer.Is_Keyword_Proc) {
			
 
				+	for t := tok; t != nil && t.kind != .EOF; t = t.next {
			
 
				+		convert_pp_token(cpp, tok, is_keyword);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+join_adjacent_string_literals :: proc(cpp: ^Preprocessor, initial_tok: ^Token) {
			
 
				+	for tok1 := initial_tok; tok1.kind != .EOF; /**/ {
			
 
				+		if tok1.kind != .String || tok1.next.kind != .String {
			
 
				+			tok1 = tok1.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		type_hint := tokenizer.Token_Type_Hint.None;
			
 
				+		char_size := 1;
			
 
				+
			
 
				+		start := tok1;
			
 
				+		for t := tok1; t != nil && t.kind == .String; t = t.next {
			
 
				+			if t.val == nil {
			
 
				+				convert_pp_string(cpp, t);
			
 
				+			}
			
 
				+			tok1 = t.next;
			
 
				+			if type_hint != t.type_hint {
			
 
				+				if t.type_hint != .None && type_hint != .None {
			
 
				+					error(cpp, t, "unsupported non-standard concatenation of string literals of different types");
			
 
				+				}
			
 
				+				prev_char_size := char_size;
			
 
				+
			
 
				+				#partial switch type_hint {
			
 
				+				case .UTF_8:    char_size = max(char_size, 1);
			
 
				+				case .UTF_16:   char_size = max(char_size, 2);
			
 
				+				case .UTF_32:   char_size = max(char_size, 4);
			
 
				+				case .UTF_Wide: char_size = max(char_size, wide_char_size(cpp));
			
 
				+				}
			
 
				+
			
 
				+				if type_hint == .None || prev_char_size < char_size {
			
 
				+					type_hint = t.type_hint;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// NOTE(bill): Verbose logic in order to correctly concantenate strings, even if they different in type
			
 
				+		max_len := 0;
			
 
				+		switch char_size {
			
 
				+		case 1:
			
 
				+			for t := start; t != nil && t.kind == .String; t = t.next {
			
 
				+				#partial switch v in t.val {
			
 
				+				case string: max_len += len(v);
			
 
				+				case []u16:  max_len += 2*len(v);
			
 
				+				case []u32:  max_len += 4*len(v);
			
 
				+				}
			
 
				+			}
			
 
				+			n := 0;
			
 
				+			buf := make([]byte, max_len);
			
 
				+			for t := start; t != nil && t.kind == .String; t = t.next {
			
 
				+				#partial switch v in t.val {
			
 
				+				case string:
			
 
				+					n += copy(buf[n:], v);
			
 
				+				case []u16:
			
 
				+					for i := 0; i < len(v); /**/ {
			
 
				+						c1 := v[i];
			
 
				+						r: rune;
			
 
				+						if !utf16.is_surrogate(rune(c1)) {
			
 
				+							r = rune(c1);
			
 
				+							i += 1;
			
 
				+						} else if i+1 == len(v) {
			
 
				+							r = utf16.REPLACEMENT_CHAR;
			
 
				+							i += 1;
			
 
				+						} else {
			
 
				+							c2 := v[i+1];
			
 
				+							i += 2;
			
 
				+							r = utf16.decode_surrogate_pair(rune(c1), rune(c2));
			
 
				+						}
			
 
				+
			
 
				+						b, w := utf8.encode_rune(r);
			
 
				+						n += copy(buf[n:], b[:w]);
			
 
				+					}
			
 
				+				case []u32:
			
 
				+					for r in v {
			
 
				+						b, w := utf8.encode_rune(rune(r));
			
 
				+						n += copy(buf[n:], b[:w]);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			new_tok := tokenizer.copy_token(start);
			
 
				+			new_tok.lit = "";
			
 
				+			new_tok.val = string(buf[:n]);
			
 
				+			new_tok.next = tok1;
			
 
				+			new_tok.type_hint = type_hint;
			
 
				+			start^ = new_tok^;
			
 
				+		case 2:
			
 
				+			for t := start; t != nil && t.kind == .String; t = t.next {
			
 
				+				#partial switch v in t.val {
			
 
				+				case string: max_len += len(v);
			
 
				+				case []u16:  max_len += len(v);
			
 
				+				case []u32:  max_len += 2*len(v);
			
 
				+				}
			
 
				+			}
			
 
				+			n := 0;
			
 
				+			buf := make([]u16, max_len);
			
 
				+			for t := start; t != nil && t.kind == .String; t = t.next {
			
 
				+				#partial switch v in t.val {
			
 
				+				case string:
			
 
				+					for r in v {
			
 
				+						if r >= 0x10000 {
			
 
				+							c1, c2 := utf16.encode_surrogate_pair(r);
			
 
				+							buf[n+0] = u16(c1);
			
 
				+							buf[n+1] = u16(c2);
			
 
				+							n += 2;
			
 
				+						} else {
			
 
				+							buf[n] = u16(r);
			
 
				+							n += 1;
			
 
				+						}
			
 
				+					}
			
 
				+				case []u16:
			
 
				+					n += copy(buf[n:], v);
			
 
				+				case []u32:
			
 
				+					for r in v {
			
 
				+						if r >= 0x10000 {
			
 
				+							c1, c2 := utf16.encode_surrogate_pair(rune(r));
			
 
				+							buf[n+0] = u16(c1);
			
 
				+							buf[n+1] = u16(c2);
			
 
				+							n += 2;
			
 
				+						} else {
			
 
				+							buf[n] = u16(r);
			
 
				+							n += 1;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			new_tok := tokenizer.copy_token(start);
			
 
				+			new_tok.lit = "";
			
 
				+			new_tok.val = buf[:n];
			
 
				+			new_tok.next = tok1;
			
 
				+			new_tok.type_hint = type_hint;
			
 
				+			start^ = new_tok^;
			
 
				+		case 4:
			
 
				+			for t := start; t != nil && t.kind == .String; t = t.next {
			
 
				+				#partial switch v in t.val {
			
 
				+				case string: max_len += len(v);
			
 
				+				case []u16:  max_len += len(v);
			
 
				+				case []u32:  max_len += len(v);
			
 
				+				}
			
 
				+			}
			
 
				+			n := 0;
			
 
				+			buf := make([]u32, max_len);
			
 
				+			for t := start; t != nil && t.kind == .String; t = t.next {
			
 
				+				#partial switch v in t.val {
			
 
				+				case string:
			
 
				+					for r in v {
			
 
				+						buf[n] = u32(r);
			
 
				+						n += 1;
			
 
				+					}
			
 
				+				case []u16:
			
 
				+					for i := 0; i < len(v); /**/ {
			
 
				+						c1 := v[i];
			
 
				+						if !utf16.is_surrogate(rune(c1)) {
			
 
				+							buf[n] = u32(c1);
			
 
				+							n += 1;
			
 
				+							i += 1;
			
 
				+						} else if i+1 == len(v) {
			
 
				+							buf[n] = utf16.REPLACEMENT_CHAR;
			
 
				+							n += 1;
			
 
				+							i += 1;
			
 
				+						} else {
			
 
				+							c2 := v[i+1];
			
 
				+							i += 2;
			
 
				+							r := utf16.decode_surrogate_pair(rune(c1), rune(c2));
			
 
				+							buf[n] = u32(r);
			
 
				+							n += 1;
			
 
				+						}
			
 
				+					}
			
 
				+				case []u32:
			
 
				+					n += copy(buf[n:], v);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			new_tok := tokenizer.copy_token(start);
			
 
				+			new_tok.lit = "";
			
 
				+			new_tok.val = buf[:n];
			
 
				+			new_tok.next = tok1;
			
 
				+			new_tok.type_hint = type_hint;
			
 
				+			start^ = new_tok^;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+quote_string :: proc(s: string) -> []byte {
			
 
				+	b := &strings.Builder{};
			
 
				+	strings.init_builder(b, 0, len(s)+2);
			
 
				+	strings.write_quoted_string(b, s, '"');
			
 
				+	return b.buf[:];
			
 
				+}
			
 
				+
			
 
				+
			
 
				+_init_tokenizer_from_preprocessor :: proc(t: ^Tokenizer, cpp: ^Preprocessor) -> ^Tokenizer {
			
 
				+	t.warn = cpp.warn;
			
 
				+	t.err = cpp.err;
			
 
				+	return t;
			
 
				+}
			
 
				+
			
 
				+new_string_token :: proc(cpp: ^Preprocessor, str: string, tok: ^Token) -> ^Token {
			
 
				+	assert(tok != nil);
			
 
				+	assert(str != "");
			
 
				+	t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp);
			
 
				+	src := quote_string(str);
			
 
				+	return tokenizer.inline_tokenize(t, tok, src);
			
 
				+}
			
 
				+
			
 
				+stringize :: proc(cpp: ^Preprocessor, hash, arg: ^Token) -> ^Token {
			
 
				+	s := join_tokens(arg, nil);
			
 
				+	return new_string_token(cpp, s, hash);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+new_number_token :: proc(cpp: ^Preprocessor, i: i64, tok: ^Token) -> ^Token {
			
 
				+	t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp);
			
 
				+	buf: [32]byte;
			
 
				+	n := len(strconv.append_int(buf[:], i, 10));
			
 
				+	src := make([]byte, n);
			
 
				+	copy(src, buf[:n]);
			
 
				+	return tokenizer.inline_tokenize(t, tok, src);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+find_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Macro {
			
 
				+	if tok.kind != .Ident {
			
 
				+		return nil;
			
 
				+	}
			
 
				+	return cpp.macros[tok.lit];
			
 
				+}
			
 
				+
			
 
				+add_macro :: proc(cpp: ^Preprocessor, name: string, kind: Macro_Kind, body: ^Token) -> ^Macro {
			
 
				+	m := new(Macro);
			
 
				+	m.name = name;
			
 
				+	m.kind = kind;
			
 
				+	m.body = body;
			
 
				+	cpp.macros[name] = m;
			
 
				+	return m;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+undef_macro :: proc(cpp: ^Preprocessor, name: string) {
			
 
				+	delete_key(&cpp.macros, name);
			
 
				+}
			
 
				+
			
 
				+add_builtin :: proc(cpp: ^Preprocessor, name: string, handler: Macro_Handler) -> ^Macro {
			
 
				+	m := add_macro(cpp, name, .Value_Like, nil);
			
 
				+	m.handler = handler;
			
 
				+	return m;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+skip :: proc(cpp: ^Preprocessor, tok: ^Token, op: string) -> ^Token {
			
 
				+	if tok.lit != op {
			
 
				+		error(cpp, tok, "expected '%q'", op);
			
 
				+	}
			
 
				+	return tok.next;
			
 
				+}
			
 
				+
			
 
				+consume :: proc(rest: ^^Token, tok: ^Token, lit: string) -> bool {
			
 
				+	if tok.lit == lit {
			
 
				+		rest^ = tok.next;
			
 
				+		return true;
			
 
				+	}
			
 
				+	rest^ = tok;
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+read_macro_params :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> (param: ^Macro_Param, va_args_name: string) {
			
 
				+	head: Macro_Param;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	tok := tok;
			
 
				+	for tok.lit != ")" && tok.kind != .EOF {
			
 
				+		if curr != &head {
			
 
				+			tok = skip(cpp, tok, ",");
			
 
				+		}
			
 
				+
			
 
				+		if tok.lit == "..." {
			
 
				+			va_args_name = "__VA_ARGS__";
			
 
				+			rest^ = skip(cpp, tok.next, ")");
			
 
				+			param = head.next;
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		if tok.kind != .Ident {
			
 
				+			error(cpp, tok, "expected an identifier");
			
 
				+		}
			
 
				+
			
 
				+		if tok.next.lit == "..." {
			
 
				+			va_args_name = tok.lit;
			
 
				+			rest^ = skip(cpp, tok.next.next, ")");
			
 
				+			param = head.next;
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		m := new(Macro_Param);
			
 
				+		m.name = tok.lit;
			
 
				+		curr.next = m;
			
 
				+		curr = curr.next;
			
 
				+		tok = tok.next;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	rest^ = tok.next;
			
 
				+	param = head.next;
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+copy_line :: proc(rest: ^^Token, tok: ^Token) -> ^Token {
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	tok := tok;
			
 
				+	for ; !tok.at_bol; tok = tok.next {
			
 
				+		curr.next = tokenizer.copy_token(tok);
			
 
				+		curr = curr.next;
			
 
				+	}
			
 
				+	curr.next = tokenizer.new_eof(tok);
			
 
				+	rest^ = tok;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+read_macro_definition :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) {
			
 
				+	tok := tok;
			
 
				+	if tok.kind != .Ident {
			
 
				+		error(cpp, tok, "macro name must be an identifier");
			
 
				+	}
			
 
				+	name := tok.lit;
			
 
				+	tok = tok.next;
			
 
				+
			
 
				+	if !tok.has_space && tok.lit == "(" {
			
 
				+		params, va_args_name := read_macro_params(cpp, &tok, tok.next);
			
 
				+
			
 
				+		m := add_macro(cpp, name, .Function_Like, copy_line(rest, tok));
			
 
				+		m.params = params;
			
 
				+		m.va_args_name = va_args_name;
			
 
				+	} else {
			
 
				+		add_macro(cpp, name, .Value_Like, copy_line(rest, tok));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+join_tokens :: proc(tok, end: ^Token) -> string {
			
 
				+	n := 1;
			
 
				+	for t := tok; t != end && t.kind != .EOF; t = t.next {
			
 
				+		if t != tok && t.has_space {
			
 
				+			n += 1;
			
 
				+		}
			
 
				+		n += len(t.lit);
			
 
				+	}
			
 
				+
			
 
				+	buf := make([]byte, n);
			
 
				+
			
 
				+	pos := 0;
			
 
				+	for t := tok; t != end && t.kind != .EOF; t = t.next {
			
 
				+		if t != tok && t.has_space {
			
 
				+			buf[pos] = ' ';
			
 
				+			pos += 1;
			
 
				+		}
			
 
				+		copy(buf[pos:], t.lit);
			
 
				+		pos += len(t.lit);
			
 
				+	}
			
 
				+
			
 
				+	return string(buf[:pos]);
			
 
				+}
			
 
				+
			
 
				+read_include_filename :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> (filename: string, is_quote: bool) {
			
 
				+	tok := tok;
			
 
				+
			
 
				+	if tok.kind == .String {
			
 
				+		rest^ = skip_line(cpp, tok.next);
			
 
				+		filename = tok.lit[1:len(tok.lit)-1];
			
 
				+		is_quote = true;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if tok.lit == "<" {
			
 
				+		start := tok;
			
 
				+		for ; tok.kind != .EOF; tok = tok.next {
			
 
				+			if tok.at_bol || tok.kind == .EOF {
			
 
				+				error(cpp, tok, "expected '>'");
			
 
				+			}
			
 
				+			is_quote = false;
			
 
				+			if tok.lit == ">" {
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		rest^ = skip_line(cpp, tok.next);
			
 
				+		filename = join_tokens(start.next, tok);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if tok.kind == .Ident {
			
 
				+		tok2 := preprocess_internal(cpp, copy_line(rest, tok));
			
 
				+		return read_include_filename(cpp, &tok2, tok2);
			
 
				+	}
			
 
				+
			
 
				+	error(cpp, tok, "expected a filename");
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+skip_cond_incl :: proc(tok: ^Token) -> ^Token {
			
 
				+	next_skip :: proc(tok: ^Token) -> ^Token {
			
 
				+		tok := tok;
			
 
				+		for tok.kind != .EOF {
			
 
				+			if is_hash(tok) {
			
 
				+				switch tok.next.lit {
			
 
				+				case "if", "ifdef", "ifndef":
			
 
				+					tok = next_skip(tok.next.next);
			
 
				+					continue;
			
 
				+
			
 
				+				case "endif":
			
 
				+					return tok.next.next;
			
 
				+				}
			
 
				+			}
			
 
				+			tok = tok.next;
			
 
				+		}
			
 
				+		return tok;
			
 
				+	}
			
 
				+
			
 
				+	tok := tok;
			
 
				+
			
 
				+	loop: for tok.kind != .EOF {
			
 
				+		if is_hash(tok) {
			
 
				+			switch tok.next.lit {
			
 
				+			case "if", "ifdef", "ifndef":
			
 
				+				tok = next_skip(tok.next.next);
			
 
				+				continue loop;
			
 
				+
			
 
				+			case "elif", "else", "endif":
			
 
				+				break loop;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		tok = tok.next;
			
 
				+	}
			
 
				+	return tok;
			
 
				+}
			
 
				+
			
 
				+check_for_include_guard :: proc(tok: ^Token) -> (guard: string, ok: bool) {
			
 
				+	if !is_hash(tok) || tok.next.lit != "ifndef" {
			
 
				+		return;
			
 
				+	}
			
 
				+	tok := tok;
			
 
				+	tok = tok.next.next;
			
 
				+
			
 
				+	if tok.kind != .Ident {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	m := tok.lit;
			
 
				+	tok = tok.next;
			
 
				+
			
 
				+	if !is_hash(tok) || tok.next.lit != "define" || tok.next.lit != "macro" {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	for tok.kind != .EOF {
			
 
				+		if !is_hash(tok) {
			
 
				+			tok = tok.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if tok.next.lit == "endif" && tok.next.next.kind == .EOF {
			
 
				+			return m, true;
			
 
				+		}
			
 
				+
			
 
				+		switch tok.lit {
			
 
				+		case "if", "ifdef", "ifndef":
			
 
				+			tok = skip_cond_incl(tok.next);
			
 
				+		case:
			
 
				+			tok = tok.next;
			
 
				+		}
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+include_file :: proc(cpp: ^Preprocessor, tok: ^Token, path: string, filename_tok: ^Token) -> ^Token {
			
 
				+	if cpp.pragma_once[path] {
			
 
				+		return tok;
			
 
				+	}
			
 
				+
			
 
				+	guard_name, guard_name_found := cpp.include_guards[path];
			
 
				+	if guard_name_found && cpp.macros[guard_name] != nil {
			
 
				+		return tok;
			
 
				+	}
			
 
				+
			
 
				+	if !os.exists(path) {
			
 
				+		error(cpp, filename_tok, "%s: cannot open file", path);
			
 
				+		return tok;
			
 
				+	}
			
 
				+
			
 
				+	cpp.include_level += 1;
			
 
				+	if cpp.include_level > MAX_INCLUDE_LEVEL {
			
 
				+		error(cpp, tok, "exceeded maximum nest amount: %d", MAX_INCLUDE_LEVEL);
			
 
				+		return tok;
			
 
				+	}
			
 
				+
			
 
				+	t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp);
			
 
				+	tok2 := tokenizer.tokenize_file(t, path, /*file.id*/1);
			
 
				+	if tok2 == nil {
			
 
				+		error(cpp, filename_tok, "%s: cannot open file", path);
			
 
				+	}
			
 
				+	cpp.include_level -= 1;
			
 
				+
			
 
				+	guard_name, guard_name_found = check_for_include_guard(tok2);
			
 
				+	if guard_name_found {
			
 
				+		cpp.include_guards[path] = guard_name;
			
 
				+	}
			
 
				+
			
 
				+	return append_token(tok2, tok);
			
 
				+}
			
 
				+
			
 
				+find_arg :: proc(args: ^Macro_Arg, tok: ^Token) -> ^Macro_Arg {
			
 
				+	for ap := args; ap != nil; ap = ap.next {
			
 
				+		if tok.lit == ap.name {
			
 
				+			return ap;
			
 
				+		}
			
 
				+	}
			
 
				+	return nil;
			
 
				+}
			
 
				+
			
 
				+paste :: proc(cpp: ^Preprocessor, lhs, rhs: ^Token) -> ^Token {
			
 
				+	buf := strings.concatenate({lhs.lit, rhs.lit});
			
 
				+	t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp);
			
 
				+	tok := tokenizer.inline_tokenize(t, lhs, transmute([]byte)buf);
			
 
				+	if tok.next.kind != .EOF {
			
 
				+		error(cpp, lhs, "pasting forms '%s', an invalid token", buf);
			
 
				+	}
			
 
				+	return tok;
			
 
				+}
			
 
				+
			
 
				+has_varargs :: proc(args: ^Macro_Arg) -> bool {
			
 
				+	for ap := args; ap != nil; ap = ap.next {
			
 
				+		if ap.name == "__VA_ARGS__" {
			
 
				+			return ap.tok.kind != .EOF;
			
 
				+		}
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+substitute_token :: proc(cpp: ^Preprocessor, tok: ^Token, args: ^Macro_Arg) -> ^Token {
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+	tok := tok;
			
 
				+	for tok.kind != .EOF {
			
 
				+		if tok.lit == "#" {
			
 
				+			arg := find_arg(args, tok.next);
			
 
				+			if arg == nil {
			
 
				+				error(cpp, tok.next, "'#' is not followed by a macro parameter");
			
 
				+			}
			
 
				+			arg_tok := arg.tok if arg != nil else tok.next;
			
 
				+			curr.next = stringize(cpp, tok, arg_tok);
			
 
				+			curr = curr.next;
			
 
				+			tok = tok.next.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if tok.lit == "," && tok.next.lit == "##" {
			
 
				+			if arg := find_arg(args, tok.next.next); arg != nil && arg.is_va_args {
			
 
				+				if arg.tok.kind == .EOF {
			
 
				+					tok = tok.next.next.next;
			
 
				+				} else {
			
 
				+					curr.next = tokenizer.copy_token(tok);
			
 
				+					curr = curr.next;
			
 
				+					tok = tok.next.next;
			
 
				+				}
			
 
				+				continue;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if tok.lit == "##" {
			
 
				+			if curr == &head {
			
 
				+				error(cpp, tok, "'##' cannot appear at start of macro expansion");
			
 
				+			}
			
 
				+			if tok.next.kind == .EOF {
			
 
				+				error(cpp, tok, "'##' cannot appear at end of macro expansion");
			
 
				+			}
			
 
				+
			
 
				+			if arg := find_arg(args, tok.next); arg != nil {
			
 
				+				if arg.tok.kind != .EOF {
			
 
				+					curr^ = paste(cpp, curr, arg.tok)^;
			
 
				+					for t := arg.tok.next; t.kind != .EOF; t = t.next {
			
 
				+						curr.next = tokenizer.copy_token(t);
			
 
				+						curr = curr.next;
			
 
				+					}
			
 
				+				}
			
 
				+				tok = tok.next.next;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			curr^ = paste(cpp, curr, tok.next)^;
			
 
				+			tok = tok.next.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		arg := find_arg(args, tok);
			
 
				+
			
 
				+		if arg != nil && tok.next.lit == "##" {
			
 
				+			rhs := tok.next.next;
			
 
				+
			
 
				+			if arg.tok.kind == .EOF {
			
 
				+				args2 := find_arg(args, rhs);
			
 
				+				if args2 != nil {
			
 
				+					for t := args.tok; t.kind != .EOF; t = t.next {
			
 
				+						curr.next = tokenizer.copy_token(t);
			
 
				+						curr = curr.next;
			
 
				+					}
			
 
				+				} else {
			
 
				+					curr.next = tokenizer.copy_token(rhs);
			
 
				+					curr = curr.next;
			
 
				+				}
			
 
				+				tok = rhs.next;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			for t := arg.tok; t.kind != .EOF; t = t.next {
			
 
				+				curr.next = tokenizer.copy_token(t);
			
 
				+				curr = curr.next;
			
 
				+			}
			
 
				+			tok = tok.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if tok.lit == "__VA__OPT__" && tok.next.lit == "(" {
			
 
				+			opt_arg := read_macro_arg_one(cpp, &tok, tok.next.next, true);
			
 
				+			if has_varargs(args) {
			
 
				+				for t := opt_arg.tok; t.kind != .EOF; t = t.next {
			
 
				+					curr.next = t;
			
 
				+					curr = curr.next;
			
 
				+				}
			
 
				+			}
			
 
				+			tok = skip(cpp, tok, ")");
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if arg != nil {
			
 
				+			t := preprocess_internal(cpp, arg.tok);
			
 
				+			t.at_bol = tok.at_bol;
			
 
				+			t.has_space = tok.has_space;
			
 
				+			for ; t.kind != .EOF; t = t.next {
			
 
				+				curr.next = tokenizer.copy_token(t);
			
 
				+				curr = curr.next;
			
 
				+			}
			
 
				+			tok = tok.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		curr.next = tokenizer.copy_token(tok);
			
 
				+		curr = curr.next;
			
 
				+		tok = tok.next;
			
 
				+		continue;
			
 
				+	}
			
 
				+
			
 
				+	curr.next = tok;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+read_macro_arg_one :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token, read_rest: bool) -> ^Macro_Arg {
			
 
				+	tok := tok;
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+	level := 0;
			
 
				+	for {
			
 
				+		if level == 0 && tok.lit == ")" {
			
 
				+			break;
			
 
				+		}
			
 
				+		if level == 0 && !read_rest && tok.lit == "," {
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if tok.kind == .EOF {
			
 
				+			error(cpp, tok, "premature end of input");
			
 
				+		}
			
 
				+
			
 
				+		switch tok.lit {
			
 
				+		case "(": level += 1;
			
 
				+		case ")": level -= 1;
			
 
				+		}
			
 
				+
			
 
				+		curr.next = tokenizer.copy_token(tok);
			
 
				+		curr = curr.next;
			
 
				+		tok = tok.next;
			
 
				+	}
			
 
				+	curr.next = tokenizer.new_eof(tok);
			
 
				+
			
 
				+	arg := new(Macro_Arg);
			
 
				+	arg.tok = head.next;
			
 
				+	rest^ = tok;
			
 
				+	return arg;
			
 
				+}
			
 
				+
			
 
				+read_macro_args :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token, params: ^Macro_Param, va_args_name: string) -> ^Macro_Arg {
			
 
				+	tok := tok;
			
 
				+	start := tok;
			
 
				+	tok = tok.next.next;
			
 
				+
			
 
				+	head: Macro_Arg;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	pp := params;
			
 
				+	for ; pp != nil; pp = pp.next {
			
 
				+		if curr != &head {
			
 
				+			tok = skip(cpp, tok, ",");
			
 
				+		}
			
 
				+		curr.next = read_macro_arg_one(cpp, &tok, tok, false);
			
 
				+		curr = curr.next;
			
 
				+		curr.name = pp.name;
			
 
				+	}
			
 
				+
			
 
				+	if va_args_name != "" {
			
 
				+		arg: ^Macro_Arg;
			
 
				+		if tok.lit == ")" {
			
 
				+			arg = new(Macro_Arg);
			
 
				+			arg.tok = tokenizer.new_eof(tok);
			
 
				+		} else {
			
 
				+			if pp != params {
			
 
				+				tok = skip(cpp, tok, ",");
			
 
				+			}
			
 
				+			arg = read_macro_arg_one(cpp, &tok, tok, true);
			
 
				+		}
			
 
				+		arg.name = va_args_name;
			
 
				+		arg.is_va_args = true;
			
 
				+		curr.next = arg;
			
 
				+		curr = curr.next;
			
 
				+	} else if pp != nil {
			
 
				+		error(cpp, start, "too many arguments");
			
 
				+	}
			
 
				+
			
 
				+	skip(cpp, tok, ")");
			
 
				+	rest^ = tok;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+expand_macro :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> bool {
			
 
				+	if tokenizer.hide_set_contains(tok.hide_set, tok.lit) {
			
 
				+		return false;
			
 
				+	}
			
 
				+	tok := tok;
			
 
				+	m := find_macro(cpp, tok);
			
 
				+	if m == nil {
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	if m.handler != nil {
			
 
				+		rest^ = m.handler(cpp, tok);
			
 
				+		rest^.next = tok.next;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	if m.kind == .Value_Like {
			
 
				+		hs := tokenizer.hide_set_union(tok.hide_set, tokenizer.new_hide_set(m.name));
			
 
				+		body := tokenizer.add_hide_set(m.body, hs);
			
 
				+		for t := body; t.kind != .EOF; t = t.next {
			
 
				+			t.origin = tok;
			
 
				+		}
			
 
				+		rest^ = append_token(body, tok.next);
			
 
				+		rest^.at_bol = tok.at_bol;
			
 
				+		rest^.has_space = tok.has_space;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	if tok.next.lit != "(" {
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	macro_token := tok;
			
 
				+	args := read_macro_args(cpp, &tok, tok, m.params, m.va_args_name);
			
 
				+	close_paren := tok;
			
 
				+
			
 
				+	hs := tokenizer.hide_set_intersection(macro_token.hide_set, close_paren.hide_set);
			
 
				+	hs = tokenizer.hide_set_union(hs, tokenizer.new_hide_set(m.name));
			
 
				+
			
 
				+	body := substitute_token(cpp, m.body, args);
			
 
				+	body = tokenizer.add_hide_set(body, hs);
			
 
				+	for t := body; t.kind != .EOF; t = t.next {
			
 
				+		t.origin = macro_token;
			
 
				+	}
			
 
				+	rest^ = append_token(body, tok.next);
			
 
				+	rest^.at_bol = macro_token.at_bol;
			
 
				+	rest^.has_space = macro_token.has_space;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+search_include_next :: proc(cpp: ^Preprocessor, filename: string) -> (path: string, ok: bool) {
			
 
				+	for ; cpp.include_next_index < len(cpp.include_paths); cpp.include_next_index += 1 {
			
 
				+		tpath := filepath.join(elems={cpp.include_paths[cpp.include_next_index], filename}, allocator=context.temp_allocator);
			
 
				+		if os.exists(tpath) {
			
 
				+			return strings.clone(tpath), true;
			
 
				+		}
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+search_include_paths :: proc(cpp: ^Preprocessor, filename: string) -> (path: string, ok: bool) {
			
 
				+	if filepath.is_abs(filename) {
			
 
				+		return filename, true;
			
 
				+	}
			
 
				+
			
 
				+	if path, ok = cpp.filepath_cache[filename]; ok {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	for include_path in cpp.include_paths {
			
 
				+		tpath := filepath.join(elems={include_path, filename}, allocator=context.temp_allocator);
			
 
				+		if os.exists(tpath) {
			
 
				+			path, ok = strings.clone(tpath), true;
			
 
				+			cpp.filepath_cache[filename] = path;
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+read_const_expr :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> ^Token {
			
 
				+	tok := tok;
			
 
				+	tok = copy_line(rest, tok);
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+	for tok.kind != .EOF {
			
 
				+		if tok.lit == "defined" {
			
 
				+			start := tok;
			
 
				+			has_paren := consume(&tok, tok.next, "(");
			
 
				+			if tok.kind != .Ident {
			
 
				+				error(cpp, start, "macro name must be an identifier");
			
 
				+			}
			
 
				+			m := find_macro(cpp, tok);
			
 
				+			tok = tok.next;
			
 
				+
			
 
				+			if has_paren {
			
 
				+				tok = skip(cpp, tok, ")");
			
 
				+			}
			
 
				+
			
 
				+			curr.next = new_number_token(cpp, 1 if m != nil else 0, start);
			
 
				+			curr = curr.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		curr.next = tok;
			
 
				+		curr = curr.next;
			
 
				+		tok = tok.next;
			
 
				+	}
			
 
				+
			
 
				+	curr.next = tok;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+eval_const_expr :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> (val: i64) {
			
 
				+	tok := tok;
			
 
				+	start := tok;
			
 
				+	expr := read_const_expr(cpp, rest, tok.next);
			
 
				+	expr = preprocess_internal(cpp, expr);
			
 
				+
			
 
				+	if expr.kind == .EOF {
			
 
				+		error(cpp, start, "no expression");
			
 
				+	}
			
 
				+
			
 
				+	for t := expr; t.kind != .EOF; t = t.next {
			
 
				+		if t.kind == .Ident {
			
 
				+			next := t.next;
			
 
				+			t^ = new_number_token(cpp, 0, t)^;
			
 
				+			t.next = next;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	val = 1;
			
 
				+	convert_pp_tokens(cpp, expr, tokenizer.default_is_keyword);
			
 
				+
			
 
				+	rest2: ^Token;
			
 
				+	val = const_expr(&rest2, expr);
			
 
				+	if rest2 != nil && rest2.kind != .EOF {
			
 
				+		error(cpp, rest2, "extra token");
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+push_cond_incl :: proc(cpp: ^Preprocessor, tok: ^Token, included: bool) -> ^Cond_Incl {
			
 
				+	ci := new(Cond_Incl);
			
 
				+	ci.next = cpp.cond_incl;
			
 
				+	ci.state = .In_Then;
			
 
				+	ci.tok = tok;
			
 
				+	ci.included = included;
			
 
				+	cpp.cond_incl = ci;
			
 
				+	return ci;
			
 
				+}
			
 
				+
			
 
				+read_line_marker:: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) {
			
 
				+	tok := tok;
			
 
				+	start := tok;
			
 
				+	tok = preprocess(cpp, copy_line(rest, tok));
			
 
				+	if tok.kind != .Number {
			
 
				+		error(cpp, tok, "invalid line marker");
			
 
				+	}
			
 
				+	ival, _ := tok.val.(i64);
			
 
				+	start.file.line_delta = int(ival - i64(start.pos.line));
			
 
				+	tok = tok.next;
			
 
				+	if tok.kind == .EOF {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if tok.kind != .String {
			
 
				+		error(cpp, tok, "filename expected");
			
 
				+	}
			
 
				+	start.file.display_name = tok.lit;
			
 
				+}
			
 
				+
			
 
				+preprocess_internal :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	tok := tok;
			
 
				+	for tok != nil && tok.kind != .EOF {
			
 
				+		if expand_macro(cpp, &tok, tok) {
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if !is_hash(tok) {
			
 
				+			if tok.file != nil {
			
 
				+				tok.line_delta = tok.file.line_delta;
			
 
				+			}
			
 
				+			curr.next = tok;
			
 
				+			curr = curr.next;
			
 
				+			tok = tok.next;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		start := tok;
			
 
				+		tok = tok.next;
			
 
				+
			
 
				+		switch tok.lit {
			
 
				+		case "include":
			
 
				+			filename, is_quote := read_include_filename(cpp, &tok, tok.next);
			
 
				+			is_absolute := filepath.is_abs(filename);
			
 
				+			if is_absolute {
			
 
				+				tok = include_file(cpp, tok, filename, start.next.next);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			if is_quote {
			
 
				+				dir := "";
			
 
				+				if start.file != nil {
			
 
				+					dir = filepath.dir(start.file.name);
			
 
				+				}
			
 
				+				path := filepath.join(dir, filename);
			
 
				+				if os.exists(path) {
			
 
				+					tok = include_file(cpp, tok, path, start.next.next);
			
 
				+					continue;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			path, ok := search_include_paths(cpp, filename);
			
 
				+			if !ok {
			
 
				+				path = filename;
			
 
				+			}
			
 
				+			tok = include_file(cpp, tok, path, start.next.next);
			
 
				+			continue;
			
 
				+
			
 
				+		case "include_next":
			
 
				+			filename, _ := read_include_filename(cpp, &tok, tok.next);
			
 
				+			path, ok := search_include_next(cpp, filename);
			
 
				+			if !ok {
			
 
				+				path = filename;
			
 
				+			}
			
 
				+			tok = include_file(cpp, tok, path, start.next.next);
			
 
				+			continue;
			
 
				+
			
 
				+		case "define":
			
 
				+			read_macro_definition(cpp, &tok, tok.next);
			
 
				+			continue;
			
 
				+
			
 
				+		case "undef":
			
 
				+			tok = tok.next;
			
 
				+			if tok.kind != .Ident {
			
 
				+				error(cpp, tok, "macro name must be an identifier");
			
 
				+			}
			
 
				+			undef_macro(cpp, tok.lit);
			
 
				+			tok = skip_line(cpp, tok.next);
			
 
				+			continue;
			
 
				+
			
 
				+		case "if":
			
 
				+			val := eval_const_expr(cpp, &tok, tok);
			
 
				+			push_cond_incl(cpp, start, val != 0);
			
 
				+			if val == 0 {
			
 
				+				tok = skip_cond_incl(tok);
			
 
				+			}
			
 
				+			continue;
			
 
				+
			
 
				+		case "ifdef":
			
 
				+			defined := find_macro(cpp, tok.next);
			
 
				+			push_cond_incl(cpp, tok, defined != nil);
			
 
				+			tok = skip_line(cpp, tok.next.next);
			
 
				+			if defined == nil {
			
 
				+				tok = skip_cond_incl(tok);
			
 
				+			}
			
 
				+			continue;
			
 
				+
			
 
				+		case "ifndef":
			
 
				+			defined := find_macro(cpp, tok.next);
			
 
				+			push_cond_incl(cpp, tok, defined != nil);
			
 
				+			tok = skip_line(cpp, tok.next.next);
			
 
				+			if !(defined == nil) {
			
 
				+				tok = skip_cond_incl(tok);
			
 
				+			}
			
 
				+			continue;
			
 
				+
			
 
				+		case "elif":
			
 
				+			if cpp.cond_incl == nil || cpp.cond_incl.state == .In_Else {
			
 
				+				error(cpp, start, "stray #elif");
			
 
				+			}
			
 
				+			if cpp.cond_incl != nil {
			
 
				+				cpp.cond_incl.state = .In_Elif;
			
 
				+			}
			
 
				+
			
 
				+			if (cpp.cond_incl != nil && !cpp.cond_incl.included) && eval_const_expr(cpp, &tok, tok) != 0 {
			
 
				+				cpp.cond_incl.included = true;
			
 
				+			} else {
			
 
				+				tok = skip_cond_incl(tok);
			
 
				+			}
			
 
				+			continue;
			
 
				+
			
 
				+		case "else":
			
 
				+			if cpp.cond_incl == nil || cpp.cond_incl.state == .In_Else {
			
 
				+				error(cpp, start, "stray #else");
			
 
				+			}
			
 
				+			if cpp.cond_incl != nil {
			
 
				+				cpp.cond_incl.state = .In_Else;
			
 
				+			}
			
 
				+			tok = skip_line(cpp, tok.next);
			
 
				+
			
 
				+			if cpp.cond_incl != nil {
			
 
				+				tok = skip_cond_incl(tok);
			
 
				+			}
			
 
				+			continue;
			
 
				+
			
 
				+		case "endif":
			
 
				+			if cpp.cond_incl == nil {
			
 
				+				error(cpp, start, "stray #endif");
			
 
				+			} else {
			
 
				+				cpp.cond_incl = cpp.cond_incl.next;
			
 
				+			}
			
 
				+			tok = skip_line(cpp, tok.next);
			
 
				+			continue;
			
 
				+
			
 
				+		case "line":
			
 
				+			read_line_marker(cpp, &tok, tok.next);
			
 
				+			continue;
			
 
				+
			
 
				+		case "pragma":
			
 
				+			if tok.next.lit == "once" {
			
 
				+				cpp.pragma_once[tok.pos.file] = true;
			
 
				+				tok = skip_line(cpp, tok.next.next);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			pragma_tok, pragma_end := tok, tok;
			
 
				+
			
 
				+			for tok != nil && tok.kind != .EOF {
			
 
				+				pragma_end = tok;
			
 
				+				tok = tok.next;
			
 
				+				if tok.at_bol {
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+			pragma_end.next = tokenizer.new_eof(tok);
			
 
				+			if cpp.pragma_handler != nil {
			
 
				+				cpp.pragma_handler(cpp, pragma_tok.next);
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			continue;
			
 
				+
			
 
				+		case "error":
			
 
				+			error(cpp, tok, "error");
			
 
				+		}
			
 
				+
			
 
				+		if tok.kind == .PP_Number {
			
 
				+			read_line_marker(cpp, &tok, tok);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if !tok.at_bol {
			
 
				+			error(cpp, tok, "invalid preprocessor directive");
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	curr.next = tok;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+preprocess :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
			
 
				+	tok := tok;
			
 
				+	tok = preprocess_internal(cpp, tok);
			
 
				+	if cpp.cond_incl != nil {
			
 
				+		error(cpp, tok, "unterminated conditional directive");
			
 
				+	}
			
 
				+	convert_pp_tokens(cpp, tok, tokenizer.default_is_keyword);
			
 
				+	join_adjacent_string_literals(cpp, tok);
			
 
				+	for t := tok; t != nil; t = t.next {
			
 
				+		t.pos.line += t.line_delta;
			
 
				+	}
			
 
				+	return tok;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+define_macro :: proc(cpp: ^Preprocessor, name, def: string) {
			
 
				+	src := transmute([]byte)def;
			
 
				+
			
 
				+	file := new(tokenizer.File);
			
 
				+	file.id = -1;
			
 
				+	file.src = src;
			
 
				+	file.name = "<built-in>";
			
 
				+	file.display_name = file.name;
			
 
				+
			
 
				+
			
 
				+	t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp);
			
 
				+	tok := tokenizer.tokenize(t, file);
			
 
				+	add_macro(cpp, name, .Value_Like, tok);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+file_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
			
 
				+	tok := tok;
			
 
				+	for tok.origin != nil {
			
 
				+		tok = tok.origin;
			
 
				+	}
			
 
				+	i := i64(tok.pos.line + tok.file.line_delta);
			
 
				+	return new_number_token(cpp, i, tok);
			
 
				+}
			
 
				+line_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
			
 
				+	tok := tok;
			
 
				+	for tok.origin != nil {
			
 
				+		tok = tok.origin;
			
 
				+	}
			
 
				+	return new_string_token(cpp, tok.file.display_name, tok);
			
 
				+}
			
 
				+counter_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
			
 
				+	i := cpp.counter;
			
 
				+	cpp.counter += 1;
			
 
				+	return new_number_token(cpp, i, tok);
			
 
				+}
			
 
				+
			
 
				+init_default_macros :: proc(cpp: ^Preprocessor) {
			
 
				+	define_macro(cpp, "__C99_MACRO_WITH_VA_ARGS", "1");
			
 
				+	define_macro(cpp, "__alignof__", "_Alignof");
			
 
				+	define_macro(cpp, "__const__", "const");
			
 
				+	define_macro(cpp, "__inline__", "inline");
			
 
				+	define_macro(cpp, "__signed__", "signed");
			
 
				+	define_macro(cpp, "__typeof__", "typeof");
			
 
				+	define_macro(cpp, "__volatile__", "volatile");
			
 
				+
			
 
				+	add_builtin(cpp, "__FILE__", file_macro);
			
 
				+	add_builtin(cpp, "__LINE__", line_macro);
			
 
				+	add_builtin(cpp, "__COUNTER__", counter_macro);
			
 
				+}
			
 
				+
			
 
				+init_lookup_tables :: proc(cpp: ^Preprocessor, allocator := context.allocator) {
			
 
				+	context.allocator = allocator;
			
 
				+	reserve(&cpp.macros,         max(16, cap(cpp.macros)));
			
 
				+	reserve(&cpp.pragma_once,    max(16, cap(cpp.pragma_once)));
			
 
				+	reserve(&cpp.include_guards, max(16, cap(cpp.include_guards)));
			
 
				+	reserve(&cpp.filepath_cache, max(16, cap(cpp.filepath_cache)));
			
 
				+}
			
--- a/core/c/frontend/preprocessor/unquote.odin
+++ b/core/c/frontend/preprocessor/unquote.odin
@@ -0,0 +1,154 @@
 
				+package c_frontend_preprocess
			
 
				+
			
 
				+import "core:unicode/utf8"
			
 
				+
			
 
				+unquote_char :: proc(str: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) {
			
 
				+	hex_to_int :: proc(c: byte) -> int {
			
 
				+		switch c {
			
 
				+		case '0'..'9': return int(c-'0');
			
 
				+		case 'a'..'f': return int(c-'a')+10;
			
 
				+		case 'A'..'F': return int(c-'A')+10;
			
 
				+		}
			
 
				+		return -1;
			
 
				+	}
			
 
				+	w: int;
			
 
				+
			
 
				+	if str[0] == quote && quote == '"' {
			
 
				+		return;
			
 
				+	} else if str[0] >= 0x80 {
			
 
				+		r, w = utf8.decode_rune_in_string(str);
			
 
				+		return r, true, str[w:], true;
			
 
				+	} else if str[0] != '\\' {
			
 
				+		return rune(str[0]), false, str[1:], true;
			
 
				+	}
			
 
				+
			
 
				+	if len(str) <= 1 {
			
 
				+		return;
			
 
				+	}
			
 
				+	s := str;
			
 
				+	c := s[1];
			
 
				+	s = s[2:];
			
 
				+
			
 
				+	switch c {
			
 
				+	case: r = rune(c);
			
 
				+
			
 
				+	case 'a':  r = '\a';
			
 
				+	case 'b':  r = '\b';
			
 
				+	case 'e':  r = '\e';
			
 
				+	case 'f':  r = '\f';
			
 
				+	case 'n':  r = '\n';
			
 
				+	case 'r':  r = '\r';
			
 
				+	case 't':  r = '\t';
			
 
				+	case 'v':  r = '\v';
			
 
				+	case '\\': r = '\\';
			
 
				+
			
 
				+	case '"':  r = '"';
			
 
				+	case '\'': r = '\'';
			
 
				+
			
 
				+	case '0'..'7':
			
 
				+		v := int(c-'0');
			
 
				+		if len(s) < 2 {
			
 
				+			return;
			
 
				+		}
			
 
				+		for i in 0..<len(s) {
			
 
				+			d := int(s[i]-'0');
			
 
				+			if d < 0 || d > 7 {
			
 
				+				return;
			
 
				+			}
			
 
				+			v = (v<<3) | d;
			
 
				+		}
			
 
				+		s = s[2:];
			
 
				+		if v > 0xff {
			
 
				+			return;
			
 
				+		}
			
 
				+		r = rune(v);
			
 
				+
			
 
				+	case 'x', 'u', 'U':
			
 
				+		count: int;
			
 
				+		switch c {
			
 
				+		case 'x': count = 2;
			
 
				+		case 'u': count = 4;
			
 
				+		case 'U': count = 8;
			
 
				+		}
			
 
				+
			
 
				+		if len(s) < count {
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		for i in 0..<count {
			
 
				+			d := hex_to_int(s[i]);
			
 
				+			if d < 0 {
			
 
				+				return;
			
 
				+			}
			
 
				+			r = (r<<4) | rune(d);
			
 
				+		}
			
 
				+		s = s[count:];
			
 
				+		if c == 'x' {
			
 
				+			break;
			
 
				+		}
			
 
				+		if r > utf8.MAX_RUNE {
			
 
				+			return;
			
 
				+		}
			
 
				+		multiple_bytes = true;
			
 
				+	}
			
 
				+
			
 
				+	success = true;
			
 
				+	tail_string = s;
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+unquote_string :: proc(lit: string, allocator := context.allocator) -> (res: string, allocated, success: bool) {
			
 
				+	contains_rune :: proc(s: string, r: rune) -> int {
			
 
				+		for c, offset in s {
			
 
				+			if c == r {
			
 
				+				return offset;
			
 
				+			}
			
 
				+		}
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	assert(len(lit) >= 2);
			
 
				+
			
 
				+	s := lit;
			
 
				+	quote := '"';
			
 
				+
			
 
				+	if s == `""` {
			
 
				+		return "", false, true;
			
 
				+	}
			
 
				+
			
 
				+	if contains_rune(s, '\n') >= 0 {
			
 
				+		return s, false, false;
			
 
				+	}
			
 
				+
			
 
				+	if contains_rune(s, '\\') < 0 && contains_rune(s, quote) < 0 {
			
 
				+		if quote == '"' {
			
 
				+			return s, false, true;
			
 
				+		}
			
 
				+	}
			
 
				+	s = s[1:len(s)-1];
			
 
				+
			
 
				+
			
 
				+	buf_len := 3*len(s) / 2;
			
 
				+	buf := make([]byte, buf_len, allocator);
			
 
				+	offset := 0;
			
 
				+	for len(s) > 0 {
			
 
				+		r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote));
			
 
				+		if !ok {
			
 
				+			delete(buf);
			
 
				+			return s, false, false;
			
 
				+		}
			
 
				+		s = tail_string;
			
 
				+		if r < 0x80 || !multiple_bytes {
			
 
				+			buf[offset] = byte(r);
			
 
				+			offset += 1;
			
 
				+		} else {
			
 
				+			b, w := utf8.encode_rune(r);
			
 
				+			copy(buf[offset:], b[:w]);
			
 
				+			offset += w;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	new_string := string(buf[:offset]);
			
 
				+
			
 
				+	return new_string, true, true;
			
 
				+}
			
--- a/core/c/frontend/tokenizer/doc.odin
+++ b/core/c/frontend/tokenizer/doc.odin
@@ -0,0 +1,35 @@
 
				+/*
			
 
				+package demo
			
 
				+
			
 
				+import tokenizer "core:c/frontend/tokenizer"
			
 
				+import preprocessor "core:c/frontend/preprocessor"
			
 
				+import "core:fmt"
			
 
				+import "core:path/filepath"
			
 
				+
			
 
				+main :: proc() {
			
 
				+	t := &tokenizer.Tokenizer{};
			
 
				+	tokenizer.init_defaults(t);
			
 
				+
			
 
				+	cpp := &preprocessor.Preprocessor{};
			
 
				+	cpp.warn, cpp.err = t.warn, t.err;
			
 
				+	preprocessor.init_lookup_tables(cpp);
			
 
				+	preprocessor.init_default_macros(cpp);
			
 
				+	cpp.include_paths = {"W:/Odin/core/c/frontend/include"};
			
 
				+
			
 
				+	tok := tokenizer.tokenize_file(t, match_path, 1);
			
 
				+
			
 
				+	tok = preprocessor.preprocess(cpp, tok);
			
 
				+	if tok != nil {
			
 
				+		for t := tok; t.kind != .EOF; t = t.next {
			
 
				+			fmt.println(t.lit);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	fmt.println("[Done]");
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+package c_frontend_tokenizer
			
 
				+
			
 
				+
			
--- a/core/c/frontend/tokenizer/hide_set.odin
+++ b/core/c/frontend/tokenizer/hide_set.odin
@@ -0,0 +1,68 @@
 
				+package c_frontend_tokenizer
			
 
				+
			
 
				+// NOTE(bill): This is a really dumb approach for a hide set,
			
 
				+// but it's really simple and probably fast enough in practice
			
 
				+
			
 
				+
			
 
				+Hide_Set :: struct {
			
 
				+	next: ^Hide_Set,
			
 
				+	name: string,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+new_hide_set :: proc(name: string) -> ^Hide_Set {
			
 
				+	hs := new(Hide_Set);
			
 
				+	hs.name = name;
			
 
				+	return hs;
			
 
				+}
			
 
				+
			
 
				+hide_set_contains :: proc(hs: ^Hide_Set, name: string) -> bool {
			
 
				+	for h := hs; h != nil; h = h.next {
			
 
				+		if h.name == name {
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+hide_set_union :: proc(a, b: ^Hide_Set) -> ^Hide_Set {
			
 
				+	head: Hide_Set;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	for h := a; h != nil; h = h.next {
			
 
				+		curr.next = new_hide_set(h.name);
			
 
				+		curr = curr.next;
			
 
				+	}
			
 
				+	curr.next = b;
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+hide_set_intersection :: proc(a, b: ^Hide_Set) -> ^Hide_Set {
			
 
				+	head: Hide_Set;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	for h := a; h != nil; h = h.next {
			
 
				+		if hide_set_contains(b, h.name) {
			
 
				+			curr.next = new_hide_set(h.name);
			
 
				+			curr = curr.next;
			
 
				+		}
			
 
				+	}
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+add_hide_set :: proc(tok: ^Token, hs: ^Hide_Set) -> ^Token {
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+
			
 
				+	tok := tok;
			
 
				+	for ; tok != nil; tok = tok.next {
			
 
				+		t := copy_token(tok);
			
 
				+		t.hide_set = hide_set_union(t.hide_set, hs);
			
 
				+		curr.next = t;
			
 
				+		curr = curr.next;
			
 
				+	}
			
 
				+	return head.next;
			
 
				+}
			
--- a/core/c/frontend/tokenizer/token.odin
+++ b/core/c/frontend/tokenizer/token.odin
@@ -0,0 +1,169 @@
 
				+package c_frontend_tokenizer
			
 
				+
			
 
				+
			
 
				+Pos :: struct {
			
 
				+	file:   string,
			
 
				+	line:   int,
			
 
				+	column: int,
			
 
				+	offset: int,
			
 
				+}
			
 
				+
			
 
				+Token_Kind :: enum {
			
 
				+	Invalid,
			
 
				+	Ident,
			
 
				+	Punct,
			
 
				+	Keyword,
			
 
				+	Char,
			
 
				+	String,
			
 
				+	Number,
			
 
				+	PP_Number,
			
 
				+	Comment,
			
 
				+	EOF,
			
 
				+}
			
 
				+
			
 
				+File :: struct {
			
 
				+	name: string,
			
 
				+	id:   int,
			
 
				+	src:  []byte,
			
 
				+
			
 
				+	display_name: string,
			
 
				+	line_delta:   int,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+Token_Type_Hint :: enum u8 {
			
 
				+	None,
			
 
				+
			
 
				+	Int,
			
 
				+	Long,
			
 
				+	Long_Long,
			
 
				+
			
 
				+	Unsigned_Int,
			
 
				+	Unsigned_Long,
			
 
				+	Unsigned_Long_Long,
			
 
				+
			
 
				+	Float,
			
 
				+	Double,
			
 
				+	Long_Double,
			
 
				+
			
 
				+	UTF_8,
			
 
				+	UTF_16,
			
 
				+	UTF_32,
			
 
				+	UTF_Wide,
			
 
				+}
			
 
				+
			
 
				+Token_Value :: union {
			
 
				+	i64,
			
 
				+	f64,
			
 
				+	string,
			
 
				+	[]u16,
			
 
				+	[]u32,
			
 
				+}
			
 
				+
			
 
				+Token :: struct {
			
 
				+	kind: Token_Kind,
			
 
				+	next: ^Token,
			
 
				+	lit: string,
			
 
				+
			
 
				+	pos:   Pos,
			
 
				+	file:  ^File,
			
 
				+	line_delta: int,
			
 
				+	at_bol:     bool,
			
 
				+	has_space:  bool,
			
 
				+
			
 
				+	type_hint: Token_Type_Hint,
			
 
				+	val: Token_Value,
			
 
				+	prefix: string,
			
 
				+
			
 
				+	// Preprocessor values
			
 
				+	hide_set: ^Hide_Set,
			
 
				+	origin:   ^Token,
			
 
				+}
			
 
				+
			
 
				+Is_Keyword_Proc :: #type proc(tok: ^Token) -> bool;
			
 
				+
			
 
				+copy_token :: proc(tok: ^Token) -> ^Token {
			
 
				+	t := new_clone(tok^);
			
 
				+	t.next = nil;
			
 
				+	return t;
			
 
				+}
			
 
				+
			
 
				+new_eof :: proc(tok: ^Token) -> ^Token {
			
 
				+	t := new_clone(tok^);
			
 
				+	t.kind = .EOF;
			
 
				+	t.lit = "";
			
 
				+	return t;
			
 
				+}
			
 
				+
			
 
				+default_is_keyword :: proc(tok: ^Token) -> bool {
			
 
				+	if tok.kind == .Keyword {
			
 
				+		return true;
			
 
				+	}
			
 
				+	if len(tok.lit) > 0 {
			
 
				+		return default_keyword_set[tok.lit];
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+token_name := [Token_Kind]string {
			
 
				+	.Invalid   = "invalid",
			
 
				+	.Ident     = "ident",
			
 
				+	.Punct     = "punct",
			
 
				+	.Keyword   = "keyword",
			
 
				+	.Char      = "char",
			
 
				+	.String    = "string",
			
 
				+	.Number    = "number",
			
 
				+	.PP_Number = "preprocessor number",
			
 
				+	.Comment   = "comment",
			
 
				+	.EOF       = "eof",
			
 
				+};
			
 
				+
			
 
				+default_keyword_set := map[string]bool{
			
 
				+	"auto"          = true,
			
 
				+	"break"         = true,
			
 
				+	"case"          = true,
			
 
				+	"char"          = true,
			
 
				+	"const"         = true,
			
 
				+	"continue"      = true,
			
 
				+	"default"       = true,
			
 
				+	"do"            = true,
			
 
				+	"double"        = true,
			
 
				+	"else"          = true,
			
 
				+	"enum"          = true,
			
 
				+	"extern"        = true,
			
 
				+	"float"         = true,
			
 
				+	"for"           = true,
			
 
				+	"goto"          = true,
			
 
				+	"if"            = true,
			
 
				+	"int"           = true,
			
 
				+	"long"          = true,
			
 
				+	"register"      = true,
			
 
				+	"restrict"      = true,
			
 
				+	"return"        = true,
			
 
				+	"short"         = true,
			
 
				+	"signed"        = true,
			
 
				+	"sizeof"        = true,
			
 
				+	"static"        = true,
			
 
				+	"struct"        = true,
			
 
				+	"switch"        = true,
			
 
				+	"typedef"       = true,
			
 
				+	"union"         = true,
			
 
				+	"unsigned"      = true,
			
 
				+	"void"          = true,
			
 
				+	"volatile"      = true,
			
 
				+	"while"         = true,
			
 
				+	"_Alignas"      = true,
			
 
				+	"_Alignof"      = true,
			
 
				+	"_Atomic"       = true,
			
 
				+	"_Bool"         = true,
			
 
				+	"_Generic"      = true,
			
 
				+	"_Noreturn"     = true,
			
 
				+	"_Thread_local" = true,
			
 
				+	"__restrict"    = true,
			
 
				+	"typeof"        = true,
			
 
				+	"asm"           = true,
			
 
				+	"__restrict__"  = true,
			
 
				+	"__thread"      = true,
			
 
				+	"__attribute__" = true,
			
 
				+};
			
--- a/core/c/frontend/tokenizer/tokenizer.odin
+++ b/core/c/frontend/tokenizer/tokenizer.odin
@@ -0,0 +1,667 @@
 
				+package c_frontend_tokenizer
			
 
				+
			
 
				+import "core:fmt"
			
 
				+import "core:os"
			
 
				+import "core:strings"
			
 
				+import "core:unicode/utf8"
			
 
				+
			
 
				+
			
 
				+Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any);
			
 
				+
			
 
				+
			
 
				+Tokenizer :: struct {
			
 
				+	// Immutable data
			
 
				+	path: string,
			
 
				+	src:  []byte,
			
 
				+
			
 
				+
			
 
				+	// Tokenizing state
			
 
				+	ch:          rune,
			
 
				+	offset:      int,
			
 
				+	read_offset: int,
			
 
				+	line_offset: int,
			
 
				+	line_count:  int,
			
 
				+
			
 
				+	// Extra information for tokens
			
 
				+	at_bol:    bool,
			
 
				+	has_space: bool,
			
 
				+
			
 
				+	// Mutable data
			
 
				+	err:  Error_Handler,
			
 
				+	warn: Error_Handler,
			
 
				+	error_count:   int,
			
 
				+	warning_count: int,
			
 
				+}
			
 
				+
			
 
				+init_defaults :: proc(t: ^Tokenizer, err: Error_Handler = default_error_handler, warn: Error_Handler = default_warn_handler) {
			
 
				+	t.err = err;
			
 
				+	t.warn = warn;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+@(private)
			
 
				+offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> (pos: Pos) {
			
 
				+	pos.file = t.path;
			
 
				+	pos.offset = offset;
			
 
				+	pos.line = t.line_count;
			
 
				+	pos.column = offset - t.line_offset + 1;
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
			
 
				+	fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column);
			
 
				+	fmt.eprintf(msg, ..args);
			
 
				+	fmt.eprintf("\n");
			
 
				+}
			
 
				+
			
 
				+default_warn_handler :: proc(pos: Pos, msg: string, args: ..any) {
			
 
				+	fmt.eprintf("%s(%d:%d) warning: ", pos.file, pos.line, pos.column);
			
 
				+	fmt.eprintf(msg, ..args);
			
 
				+	fmt.eprintf("\n");
			
 
				+}
			
 
				+
			
 
				+error_offset :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
			
 
				+	pos := offset_to_pos(t, offset);
			
 
				+	if t.err != nil {
			
 
				+		t.err(pos, msg, ..args);
			
 
				+	}
			
 
				+	t.error_count += 1;
			
 
				+}
			
 
				+
			
 
				+warn_offset :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
			
 
				+	pos := offset_to_pos(t, offset);
			
 
				+	if t.warn != nil {
			
 
				+		t.warn(pos, msg, ..args);
			
 
				+	}
			
 
				+	t.warning_count += 1;
			
 
				+}
			
 
				+
			
 
				+error :: proc(t: ^Tokenizer, tok: ^Token, msg: string, args: ..any) {
			
 
				+	pos := tok.pos;
			
 
				+	if t.err != nil {
			
 
				+		t.err(pos, msg, ..args);
			
 
				+	}
			
 
				+	t.error_count += 1;
			
 
				+}
			
 
				+
			
 
				+warn :: proc(t: ^Tokenizer, tok: ^Token, msg: string, args: ..any) {
			
 
				+	pos := tok.pos;
			
 
				+	if t.warn != nil {
			
 
				+		t.warn(pos, msg, ..args);
			
 
				+	}
			
 
				+	t.warning_count += 1;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+advance_rune :: proc(t: ^Tokenizer) {
			
 
				+	if t.read_offset < len(t.src) {
			
 
				+		t.offset = t.read_offset;
			
 
				+		if t.ch == '\n' {
			
 
				+			t.at_bol = true;
			
 
				+			t.line_offset = t.offset;
			
 
				+			t.line_count += 1;
			
 
				+		}
			
 
				+		r, w := rune(t.src[t.read_offset]), 1;
			
 
				+		switch {
			
 
				+		case r == 0:
			
 
				+			error_offset(t, t.offset, "illegal character NUL");
			
 
				+		case r >= utf8.RUNE_SELF:
			
 
				+			r, w = utf8.decode_rune(t.src[t.read_offset:]);
			
 
				+			if r == utf8.RUNE_ERROR && w == 1 {
			
 
				+				error_offset(t, t.offset, "illegal UTF-8 encoding");
			
 
				+			} else if r == utf8.RUNE_BOM && t.offset > 0 {
			
 
				+				error_offset(t, t.offset, "illegal byte order mark");
			
 
				+			}
			
 
				+		}
			
 
				+		t.read_offset += w;
			
 
				+		t.ch = r;
			
 
				+	} else {
			
 
				+		t.offset = len(t.src);
			
 
				+		if t.ch == '\n' {
			
 
				+			t.at_bol = true;
			
 
				+			t.line_offset = t.offset;
			
 
				+			t.line_count += 1;
			
 
				+		}
			
 
				+		t.ch = -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+advance_rune_n :: proc(t: ^Tokenizer, n: int) {
			
 
				+	for in 0..<n {
			
 
				+		advance_rune(t);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+is_digit :: proc(r: rune) -> bool {
			
 
				+	return '0' <= r && r <= '9';
			
 
				+}
			
 
				+
			
 
				+skip_whitespace :: proc(t: ^Tokenizer) {
			
 
				+	for {
			
 
				+		switch t.ch {
			
 
				+		case ' ', '\t', '\r', '\v', '\f', '\n':
			
 
				+			t.has_space = true;
			
 
				+			advance_rune(t);
			
 
				+		case:
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+scan_comment :: proc(t: ^Tokenizer) -> string {
			
 
				+	offset := t.offset-1;
			
 
				+	next := -1;
			
 
				+	general: {
			
 
				+		if t.ch == '/'{ // line comments
			
 
				+			advance_rune(t);
			
 
				+			for t.ch != '\n' && t.ch >= 0 {
			
 
				+				advance_rune(t);
			
 
				+			}
			
 
				+
			
 
				+			next = t.offset;
			
 
				+			if t.ch == '\n' {
			
 
				+				next += 1;
			
 
				+			}
			
 
				+			break general;
			
 
				+		}
			
 
				+
			
 
				+		/* style comment */
			
 
				+		advance_rune(t);
			
 
				+		for t.ch >= 0 {
			
 
				+			ch := t.ch;
			
 
				+			advance_rune(t);
			
 
				+			if ch == '*' && t.ch == '/' {
			
 
				+				advance_rune(t);
			
 
				+				next = t.offset;
			
 
				+				break general;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		error_offset(t, offset, "comment not terminated");
			
 
				+	}
			
 
				+
			
 
				+	lit := t.src[offset : t.offset];
			
 
				+
			
 
				+	// NOTE(bill): Strip CR for line comments
			
 
				+	for len(lit) > 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' {
			
 
				+		lit = lit[:len(lit)-1];
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	return string(lit);
			
 
				+}
			
 
				+
			
 
				+scan_identifier :: proc(t: ^Tokenizer) -> string {
			
 
				+	offset := t.offset;
			
 
				+
			
 
				+	for is_ident1(t.ch) {
			
 
				+		advance_rune(t);
			
 
				+	}
			
 
				+
			
 
				+	return string(t.src[offset : t.offset]);
			
 
				+}
			
 
				+
			
 
				+scan_string :: proc(t: ^Tokenizer) -> string {
			
 
				+	offset := t.offset-1;
			
 
				+
			
 
				+	for {
			
 
				+		ch := t.ch;
			
 
				+		if ch == '\n' || ch < 0 {
			
 
				+			error_offset(t, offset, "string literal was not terminated");
			
 
				+			break;
			
 
				+		}
			
 
				+		advance_rune(t);
			
 
				+		if ch == '"' {
			
 
				+			break;
			
 
				+		}
			
 
				+		if ch == '\\' {
			
 
				+			scan_escape(t);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return string(t.src[offset : t.offset]);
			
 
				+}
			
 
				+
			
 
				+digit_val :: proc(r: rune) -> int {
			
 
				+	switch r {
			
 
				+	case '0'..'9':
			
 
				+		return int(r-'0');
			
 
				+	case 'A'..'F':
			
 
				+		return int(r-'A' + 10);
			
 
				+	case 'a'..'f':
			
 
				+		return int(r-'a' + 10);
			
 
				+	}
			
 
				+	return 16;
			
 
				+}
			
 
				+
			
 
				+scan_escape :: proc(t: ^Tokenizer) -> bool {
			
 
				+	offset := t.offset;
			
 
				+
			
 
				+	esc := t.ch;
			
 
				+	n: int;
			
 
				+	base, max: u32;
			
 
				+	switch esc {
			
 
				+	case 'a', 'b', 'e', 'f', 'n', 't', 'v', 'r', '\\', '\'', '\"':
			
 
				+		advance_rune(t);
			
 
				+		return true;
			
 
				+
			
 
				+	case '0'..'7':
			
 
				+		for digit_val(t.ch) < 8 {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+		return true;
			
 
				+	case 'x':
			
 
				+		advance_rune(t);
			
 
				+		for digit_val(t.ch) < 16 {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+		return true;
			
 
				+	case 'u':
			
 
				+		advance_rune(t);
			
 
				+		n, base, max = 4, 16, utf8.MAX_RUNE;
			
 
				+	case 'U':
			
 
				+		advance_rune(t);
			
 
				+		n, base, max = 8, 16, utf8.MAX_RUNE;
			
 
				+	case:
			
 
				+		if t.ch < 0 {
			
 
				+			error_offset(t, offset, "escape sequence was not terminated");
			
 
				+		} else {
			
 
				+			break;
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	x: u32;
			
 
				+	main_loop: for n > 0 {
			
 
				+		d := u32(digit_val(t.ch));
			
 
				+		if d >= base {
			
 
				+			if t.ch == '"' || t.ch == '\'' {
			
 
				+				break main_loop;
			
 
				+			}
			
 
				+			if t.ch < 0 {
			
 
				+				error_offset(t, t.offset, "escape sequence was not terminated");
			
 
				+			} else {
			
 
				+				error_offset(t, t.offset, "illegal character '%r' : %d in escape sequence", t.ch, t.ch);
			
 
				+			}
			
 
				+			return false;
			
 
				+		}
			
 
				+
			
 
				+		x = x*base + d;
			
 
				+		advance_rune(t);
			
 
				+		n -= 1;
			
 
				+	}
			
 
				+
			
 
				+	if x > max || 0xd800 <= x && x <= 0xe000 {
			
 
				+		error_offset(t, offset, "escape sequence is an invalid Unicode code point");
			
 
				+		return false;
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+scan_rune :: proc(t: ^Tokenizer) -> string {
			
 
				+	offset := t.offset-1;
			
 
				+	valid := true;
			
 
				+	n := 0;
			
 
				+	for {
			
 
				+		ch := t.ch;
			
 
				+		if ch == '\n' || ch < 0 {
			
 
				+			if valid {
			
 
				+				error_offset(t, offset, "rune literal not terminated");
			
 
				+				valid = false;
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		advance_rune(t);
			
 
				+		if ch == '\'' {
			
 
				+			break;
			
 
				+		}
			
 
				+		n += 1;
			
 
				+		if ch == '\\' {
			
 
				+			if !scan_escape(t)  {
			
 
				+				valid = false;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if valid && n != 1 {
			
 
				+		error_offset(t, offset, "illegal rune literal");
			
 
				+	}
			
 
				+
			
 
				+	return string(t.src[offset : t.offset]);
			
 
				+}
			
 
				+
			
 
				+scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Token_Kind, string) {
			
 
				+	scan_mantissa :: proc(t: ^Tokenizer, base: int) {
			
 
				+		for digit_val(t.ch) < base {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	}
			
 
				+	scan_exponent :: proc(t: ^Tokenizer) {
			
 
				+		if t.ch == 'e' || t.ch == 'E' || t.ch == 'p' || t.ch == 'P' {
			
 
				+			advance_rune(t);
			
 
				+			if t.ch == '-' || t.ch == '+' {
			
 
				+				advance_rune(t);
			
 
				+			}
			
 
				+			if digit_val(t.ch) < 10 {
			
 
				+				scan_mantissa(t, 10);
			
 
				+			} else {
			
 
				+				error_offset(t, t.offset, "illegal floating-point exponent");
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	scan_fraction :: proc(t: ^Tokenizer) -> (early_exit: bool) {
			
 
				+		if t.ch == '.' && peek(t) == '.' {
			
 
				+			return true;
			
 
				+		}
			
 
				+		if t.ch == '.' {
			
 
				+			advance_rune(t);
			
 
				+			scan_mantissa(t, 10);
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	check_end := true;
			
 
				+
			
 
				+
			
 
				+	offset := t.offset;
			
 
				+	seen_point := seen_decimal_point;
			
 
				+
			
 
				+	if seen_point {
			
 
				+		offset -= 1;
			
 
				+		scan_mantissa(t, 10);
			
 
				+		scan_exponent(t);
			
 
				+	} else {
			
 
				+		if t.ch == '0' {
			
 
				+			int_base :: inline proc(t: ^Tokenizer, base: int, msg: string) {
			
 
				+				prev := t.offset;
			
 
				+				advance_rune(t);
			
 
				+				scan_mantissa(t, base);
			
 
				+				if t.offset - prev <= 1 {
			
 
				+					error_offset(t, t.offset, msg);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			advance_rune(t);
			
 
				+			switch t.ch {
			
 
				+			case 'b', 'B':
			
 
				+				int_base(t, 2, "illegal binary integer");
			
 
				+			case 'x', 'X':
			
 
				+				int_base(t, 16, "illegal hexadecimal integer");
			
 
				+			case:
			
 
				+				seen_point = false;
			
 
				+				scan_mantissa(t, 10);
			
 
				+				if t.ch == '.' {
			
 
				+					seen_point = true;
			
 
				+					if scan_fraction(t) {
			
 
				+						check_end = false;
			
 
				+					}
			
 
				+				}
			
 
				+				if check_end {
			
 
				+					scan_exponent(t);
			
 
				+					check_end = false;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if check_end {
			
 
				+		scan_mantissa(t, 10);
			
 
				+
			
 
				+		if !scan_fraction(t) {
			
 
				+			scan_exponent(t);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return .Number, string(t.src[offset : t.offset]);
			
 
				+}
			
 
				+
			
 
				+scan_punct :: proc(t: ^Tokenizer, ch: rune) -> (kind: Token_Kind) {
			
 
				+	kind = .Punct;
			
 
				+	switch ch {
			
 
				+	case:
			
 
				+		kind = .Invalid;
			
 
				+
			
 
				+	case '<', '>':
			
 
				+		if t.ch == ch {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+		if t.ch == '=' {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	case '!', '+', '-', '*', '/', '%', '^', '=':
			
 
				+		if t.ch == '=' {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	case '#':
			
 
				+		if t.ch == '#' {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	case '&':
			
 
				+		if t.ch == '=' || t.ch == '&' {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	case '|':
			
 
				+		if t.ch == '=' || t.ch == '|' {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	case '(', ')', '[', ']', '{', '}':
			
 
				+		// okay
			
 
				+	case '~', ',', ':', ';', '?':
			
 
				+		// okay
			
 
				+	case '`':
			
 
				+		// okay
			
 
				+	case '.':
			
 
				+		if t.ch == '.' && peek(t) == '.' {
			
 
				+			advance_rune(t);
			
 
				+			advance_rune(t); // consume last '.'
			
 
				+		}
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+peek :: proc(t: ^Tokenizer) -> byte {
			
 
				+	if t.read_offset < len(t.src) {
			
 
				+		return t.src[t.read_offset];
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+peek_str :: proc(t: ^Tokenizer, str: string) -> bool {
			
 
				+	if t.read_offset < len(t.src) {
			
 
				+		return strings.has_prefix(string(t.src[t.offset:]), str);
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+scan_literal_prefix :: proc(t: ^Tokenizer, str: string, prefix: ^string) -> bool {
			
 
				+	if peek_str(t, str) {
			
 
				+		offset := t.offset;
			
 
				+		for _ in str {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+		prefix^ = string(t.src[offset:][:len(str)-1]);
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+allow_next_to_be_newline :: proc(t: ^Tokenizer) -> bool {
			
 
				+	if t.ch == '\n' {
			
 
				+		advance_rune(t);
			
 
				+		return true;
			
 
				+	} else if t.ch == '\r' && peek(t) == '\n' { // allow for MS-DOS style line endings
			
 
				+		advance_rune(t); // \r
			
 
				+		advance_rune(t); // \n
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+scan :: proc(t: ^Tokenizer, f: ^File) -> ^Token {
			
 
				+	skip_whitespace(t);
			
 
				+
			
 
				+	offset := t.offset;
			
 
				+
			
 
				+	kind: Token_Kind;
			
 
				+	lit: string;
			
 
				+	prefix: string;
			
 
				+
			
 
				+	switch ch := t.ch; {
			
 
				+	case scan_literal_prefix(t, `u8"`, &prefix):
			
 
				+		kind = .String;
			
 
				+		lit = scan_string(t);
			
 
				+	case scan_literal_prefix(t, `u"`, &prefix):
			
 
				+		kind = .String;
			
 
				+		lit = scan_string(t);
			
 
				+	case scan_literal_prefix(t, `L"`, &prefix):
			
 
				+		kind = .String;
			
 
				+		lit = scan_string(t);
			
 
				+	case scan_literal_prefix(t, `U"`, &prefix):
			
 
				+		kind = .String;
			
 
				+		lit = scan_string(t);
			
 
				+	case scan_literal_prefix(t, `u'`, &prefix):
			
 
				+		kind = .Char;
			
 
				+		lit = scan_rune(t);
			
 
				+	case scan_literal_prefix(t, `L'`, &prefix):
			
 
				+		kind = .Char;
			
 
				+		lit = scan_rune(t);
			
 
				+	case scan_literal_prefix(t, `U'`, &prefix):
			
 
				+		kind = .Char;
			
 
				+		lit = scan_rune(t);
			
 
				+
			
 
				+	case is_ident0(ch):
			
 
				+		lit = scan_identifier(t);
			
 
				+		kind = .Ident;
			
 
				+	case '0' <= ch && ch <= '9':
			
 
				+		kind, lit = scan_number(t, false);
			
 
				+	case:
			
 
				+		advance_rune(t);
			
 
				+		switch ch {
			
 
				+		case -1:
			
 
				+			kind = .EOF;
			
 
				+		case '\\':
			
 
				+			kind = .Punct;
			
 
				+			if allow_next_to_be_newline(t) {
			
 
				+				t.at_bol = true;
			
 
				+				t.has_space = false;
			
 
				+				return scan(t, f);
			
 
				+			}
			
 
				+
			
 
				+		case '.':
			
 
				+			if is_digit(t.ch) {
			
 
				+				kind, lit = scan_number(t, true);
			
 
				+			} else {
			
 
				+				kind = scan_punct(t, ch);
			
 
				+			}
			
 
				+		case '"':
			
 
				+			kind = .String;
			
 
				+			lit = scan_string(t);
			
 
				+		case '\'':
			
 
				+			kind = .Char;
			
 
				+			lit = scan_rune(t);
			
 
				+		case '/':
			
 
				+			if t.ch == '/' || t.ch == '*' {
			
 
				+				kind = .Comment;
			
 
				+				lit = scan_comment(t);
			
 
				+				t.has_space = true;
			
 
				+				break;
			
 
				+			}
			
 
				+			fallthrough;
			
 
				+		case:
			
 
				+			kind = scan_punct(t, ch);
			
 
				+			if kind == .Invalid && ch != utf8.RUNE_BOM {
			
 
				+				error_offset(t, t.offset, "illegal character '%r': %d", ch, ch);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if lit == "" {
			
 
				+		lit = string(t.src[offset : t.offset]);
			
 
				+	}
			
 
				+
			
 
				+	if kind == .Comment {
			
 
				+		return scan(t, f);
			
 
				+	}
			
 
				+
			
 
				+	tok := new(Token);
			
 
				+	tok.kind = kind;
			
 
				+	tok.lit = lit;
			
 
				+	tok.pos = offset_to_pos(t, offset);
			
 
				+	tok.file = f;
			
 
				+	tok.prefix = prefix;
			
 
				+	tok.at_bol = t.at_bol;
			
 
				+	tok.has_space = t.has_space;
			
 
				+
			
 
				+	t.at_bol, t.has_space = false, false;
			
 
				+
			
 
				+	return tok;
			
 
				+}
			
 
				+
			
 
				+tokenize :: proc(t: ^Tokenizer, f: ^File) -> ^Token {
			
 
				+	setup_tokenizer: {
			
 
				+		t.src = f.src;
			
 
				+		t.ch = ' ';
			
 
				+		t.offset = 0;
			
 
				+		t.read_offset = 0;
			
 
				+		t.line_offset = 0;
			
 
				+		t.line_count = len(t.src) > 0 ? 1 : 0;
			
 
				+		t.error_count = 0;
			
 
				+		t.path = f.name;
			
 
				+
			
 
				+
			
 
				+		advance_rune(t);
			
 
				+		if t.ch == utf8.RUNE_BOM {
			
 
				+			advance_rune(t);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	t.at_bol = true;
			
 
				+	t.has_space = false;
			
 
				+
			
 
				+	head: Token;
			
 
				+	curr := &head;
			
 
				+	for {
			
 
				+		tok := scan(t, f);
			
 
				+		if tok == nil {
			
 
				+			break;
			
 
				+		}
			
 
				+		curr.next = tok;
			
 
				+		curr = curr.next;
			
 
				+		if tok.kind == .EOF {
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return head.next;
			
 
				+}
			
 
				+
			
 
				+add_new_file :: proc(t: ^Tokenizer, name: string, src: []byte, id: int) -> ^File {
			
 
				+	file := new(File);
			
 
				+	file.id = id;
			
 
				+	file.src = src;
			
 
				+	file.name = name;
			
 
				+	file.display_name = name;
			
 
				+	return file;
			
 
				+}
			
 
				+
			
 
				+tokenize_file :: proc(t: ^Tokenizer, path: string, id: int, loc := #caller_location) -> ^Token {
			
 
				+	src, ok := os.read_entire_file(path);
			
 
				+	if !ok {
			
 
				+		return nil;
			
 
				+	}
			
 
				+	return tokenize(t, add_new_file(t, path, src, id));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline_tokenize :: proc(t: ^Tokenizer, tok: ^Token, src: []byte) -> ^Token {
			
 
				+	file := new(File);
			
 
				+	file.src = src;
			
 
				+	if tok.file != nil {
			
 
				+		file.id = tok.file.id;
			
 
				+		file.name = tok.file.name;
			
 
				+		file.display_name = tok.file.name;
			
 
				+	}
			
 
				+
			
 
				+	return tokenize(t, file);
			
 
				+}
			
--- a/core/c/frontend/tokenizer/unicode.odin
+++ b/core/c/frontend/tokenizer/unicode.odin
@@ -0,0 +1,116 @@
 
				+package c_frontend_tokenizer
			
 
				+
			
 
				+
			
 
				+in_range :: proc(range: []rune, c: rune) -> bool #no_bounds_check {
			
 
				+	for i := 0; range[i] != -1; i += 2 {
			
 
				+		if range[i] <= c && c <= range[i+1] {
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// [https://www.sigbus.info/n1570#D] C11 allows ASCII and some multibyte characters in certan Unicode ranges to be used in an identifier.
			
 
				+//
			
 
				+// is_ident0 returns true if a given character is acceptable as the first character of an identifier.
			
 
				+is_ident0 :: proc(c: rune) -> bool {
			
 
				+	return in_range(_range_ident0, c);
			
 
				+}
			
 
				+// is_ident0 returns true if a given character is acceptable as a non-first character of an identifier.
			
 
				+is_ident1 :: proc(c: rune) -> bool {
			
 
				+	return is_ident0(c) || in_range(_range_ident1, c);
			
 
				+}
			
 
				+
			
 
				+// Returns the number of columns needed to display a given character in a fixed-width font.
			
 
				+// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
			
 
				+char_width :: proc(c: rune) -> int {
			
 
				+	switch {
			
 
				+	case in_range(_range_width0, c):
			
 
				+		return 0;
			
 
				+	case in_range(_range_width2, c):
			
 
				+		return 2;
			
 
				+	}
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+display_width :: proc(str: string) -> (w: int) {
			
 
				+	for c in str {
			
 
				+		w += char_width(c);
			
 
				+	}
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+_range_ident0 := []rune{
			
 
				+	'_', '_', 'a', 'z', 'A', 'Z', '$', '$',
			
 
				+	0x00A8, 0x00A8, 0x00AA, 0x00AA, 0x00AD, 0x00AD, 0x00AF, 0x00AF,
			
 
				+	0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BC, 0x00BE, 0x00C0, 0x00D6,
			
 
				+	0x00D8, 0x00F6, 0x00F8, 0x00FF, 0x0100, 0x02FF, 0x0370, 0x167F,
			
 
				+	0x1681, 0x180D, 0x180F, 0x1DBF, 0x1E00, 0x1FFF, 0x200B, 0x200D,
			
 
				+	0x202A, 0x202E, 0x203F, 0x2040, 0x2054, 0x2054, 0x2060, 0x206F,
			
 
				+	0x2070, 0x20CF, 0x2100, 0x218F, 0x2460, 0x24FF, 0x2776, 0x2793,
			
 
				+	0x2C00, 0x2DFF, 0x2E80, 0x2FFF, 0x3004, 0x3007, 0x3021, 0x302F,
			
 
				+	0x3031, 0x303F, 0x3040, 0xD7FF, 0xF900, 0xFD3D, 0xFD40, 0xFDCF,
			
 
				+	0xFDF0, 0xFE1F, 0xFE30, 0xFE44, 0xFE47, 0xFFFD,
			
 
				+	0x10000, 0x1FFFD, 0x20000, 0x2FFFD, 0x30000, 0x3FFFD, 0x40000, 0x4FFFD,
			
 
				+	0x50000, 0x5FFFD, 0x60000, 0x6FFFD, 0x70000, 0x7FFFD, 0x80000, 0x8FFFD,
			
 
				+	0x90000, 0x9FFFD, 0xA0000, 0xAFFFD, 0xB0000, 0xBFFFD, 0xC0000, 0xCFFFD,
			
 
				+	0xD0000, 0xDFFFD, 0xE0000, 0xEFFFD,
			
 
				+	-1,
			
 
				+};
			
 
				+
			
 
				+_range_ident1 := []rune{
			
 
				+	'0', '9', '$', '$', 0x0300, 0x036F, 0x1DC0, 0x1DFF, 0x20D0, 0x20FF, 0xFE20, 0xFE2F,
			
 
				+	-1,
			
 
				+};
			
 
				+
			
 
				+
			
 
				+_range_width0 := []rune{
			
 
				+	0x0000, 0x001F, 0x007f, 0x00a0, 0x0300, 0x036F, 0x0483, 0x0486,
			
 
				+	0x0488, 0x0489, 0x0591, 0x05BD, 0x05BF, 0x05BF, 0x05C1, 0x05C2,
			
 
				+	0x05C4, 0x05C5, 0x05C7, 0x05C7, 0x0600, 0x0603, 0x0610, 0x0615,
			
 
				+	0x064B, 0x065E, 0x0670, 0x0670, 0x06D6, 0x06E4, 0x06E7, 0x06E8,
			
 
				+	0x06EA, 0x06ED, 0x070F, 0x070F, 0x0711, 0x0711, 0x0730, 0x074A,
			
 
				+	0x07A6, 0x07B0, 0x07EB, 0x07F3, 0x0901, 0x0902, 0x093C, 0x093C,
			
 
				+	0x0941, 0x0948, 0x094D, 0x094D, 0x0951, 0x0954, 0x0962, 0x0963,
			
 
				+	0x0981, 0x0981, 0x09BC, 0x09BC, 0x09C1, 0x09C4, 0x09CD, 0x09CD,
			
 
				+	0x09E2, 0x09E3, 0x0A01, 0x0A02, 0x0A3C, 0x0A3C, 0x0A41, 0x0A42,
			
 
				+	0x0A47, 0x0A48, 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A82,
			
 
				+	0x0ABC, 0x0ABC, 0x0AC1, 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0ACD,
			
 
				+	0x0AE2, 0x0AE3, 0x0B01, 0x0B01, 0x0B3C, 0x0B3C, 0x0B3F, 0x0B3F,
			
 
				+	0x0B41, 0x0B43, 0x0B4D, 0x0B4D, 0x0B56, 0x0B56, 0x0B82, 0x0B82,
			
 
				+	0x0BC0, 0x0BC0, 0x0BCD, 0x0BCD, 0x0C3E, 0x0C40, 0x0C46, 0x0C48,
			
 
				+	0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 0x0CBC, 0x0CBC, 0x0CBF, 0x0CBF,
			
 
				+	0x0CC6, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D41, 0x0D43,
			
 
				+	0x0D4D, 0x0D4D, 0x0DCA, 0x0DCA, 0x0DD2, 0x0DD4, 0x0DD6, 0x0DD6,
			
 
				+	0x0E31, 0x0E31, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB1, 0x0EB1,
			
 
				+	0x0EB4, 0x0EB9, 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19,
			
 
				+	0x0F35, 0x0F35, 0x0F37, 0x0F37, 0x0F39, 0x0F39, 0x0F71, 0x0F7E,
			
 
				+	0x0F80, 0x0F84, 0x0F86, 0x0F87, 0x0F90, 0x0F97, 0x0F99, 0x0FBC,
			
 
				+	0x0FC6, 0x0FC6, 0x102D, 0x1030, 0x1032, 0x1032, 0x1036, 0x1037,
			
 
				+	0x1039, 0x1039, 0x1058, 0x1059, 0x1160, 0x11FF, 0x135F, 0x135F,
			
 
				+	0x1712, 0x1714, 0x1732, 0x1734, 0x1752, 0x1753, 0x1772, 0x1773,
			
 
				+	0x17B4, 0x17B5, 0x17B7, 0x17BD, 0x17C6, 0x17C6, 0x17C9, 0x17D3,
			
 
				+	0x17DD, 0x17DD, 0x180B, 0x180D, 0x18A9, 0x18A9, 0x1920, 0x1922,
			
 
				+	0x1927, 0x1928, 0x1932, 0x1932, 0x1939, 0x193B, 0x1A17, 0x1A18,
			
 
				+	0x1B00, 0x1B03, 0x1B34, 0x1B34, 0x1B36, 0x1B3A, 0x1B3C, 0x1B3C,
			
 
				+	0x1B42, 0x1B42, 0x1B6B, 0x1B73, 0x1DC0, 0x1DCA, 0x1DFE, 0x1DFF,
			
 
				+	0x200B, 0x200F, 0x202A, 0x202E, 0x2060, 0x2063, 0x206A, 0x206F,
			
 
				+	0x20D0, 0x20EF, 0x302A, 0x302F, 0x3099, 0x309A, 0xA806, 0xA806,
			
 
				+	0xA80B, 0xA80B, 0xA825, 0xA826, 0xFB1E, 0xFB1E, 0xFE00, 0xFE0F,
			
 
				+	0xFE20, 0xFE23, 0xFEFF, 0xFEFF, 0xFFF9, 0xFFFB, 0x10A01, 0x10A03,
			
 
				+	0x10A05, 0x10A06, 0x10A0C, 0x10A0F, 0x10A38, 0x10A3A, 0x10A3F, 0x10A3F,
			
 
				+	0x1D167, 0x1D169, 0x1D173, 0x1D182, 0x1D185, 0x1D18B, 0x1D1AA, 0x1D1AD,
			
 
				+	0x1D242, 0x1D244, 0xE0001, 0xE0001, 0xE0020, 0xE007F, 0xE0100, 0xE01EF,
			
 
				+	-1,
			
 
				+};
			
 
				+
			
 
				+_range_width2 := []rune{
			
 
				+	0x1100, 0x115F, 0x2329, 0x2329, 0x232A, 0x232A, 0x2E80, 0x303E,
			
 
				+	0x3040, 0xA4CF, 0xAC00, 0xD7A3, 0xF900, 0xFAFF, 0xFE10, 0xFE19,
			
 
				+	0xFE30, 0xFE6F, 0xFF00, 0xFF60, 0xFFE0, 0xFFE6, 0x1F000, 0x1F644,
			
 
				+	0x20000, 0x2FFFD, 0x30000, 0x3FFFD,
			
 
				+	-1,
			
 
				+};