Browse Source

Improve tokenizer slightly

gingerBill 6 years ago
parent
commit
9cadd58465
2 changed files with 25 additions and 11 deletions
  1. 12 3
      core/odin/tokenizer/tokenizer.odin
  2. 13 8
      src/tokenizer.cpp

+ 12 - 3
core/odin/tokenizer/tokenizer.odin

@@ -98,9 +98,9 @@ advance_rune :: proc(using t: ^Tokenizer) {
 	}
 }
 
-peek_byte :: proc(using t: ^Tokenizer) -> byte {
-	if read_offset < len(src) {
-		return src[read_offset];
+peek_byte :: proc(using t: ^Tokenizer, offset := 0) -> byte {
+	if read_offset+offset < len(src) {
+		return src[read_offset+offset];
 	}
 	return 0;
 }
@@ -577,6 +577,8 @@ scan :: proc(t: ^Tokenizer) -> token.Token {
 		case '≠': kind = token.Not_Eq;
 		case '≤': kind = token.Lt_Eq;
 		case '≥': kind = token.Gt_Eq;
+		case '∈': kind = token.In;
+		case '∉': kind = token.Notin;
 
 		case '.':
 			if '0' <= t.ch && t.ch <= '9' {
@@ -586,6 +588,10 @@ scan :: proc(t: ^Tokenizer) -> token.Token {
 				if t.ch == '.' {
 					advance_rune(t);
 					kind = token.Ellipsis;
+					if t.ch == '<' {
+						advance_rune(t);
+						kind = token.Range_Half;
+					}
 				}
 			}
 		case ':': kind = token.Colon;
@@ -597,6 +603,9 @@ scan :: proc(t: ^Tokenizer) -> token.Token {
 		case ']': kind = token.Close_Bracket;
 		case '{': kind = token.Open_Brace;
 		case '}': kind = token.Close_Brace;
+
+		case '\\': kind = token.Back_Slash;
+
 		case:
 			if ch != utf8.RUNE_BOM {
 				error(t, t.offset, "illegal character '%r': %d", ch, ch);

+ 13 - 8
src/tokenizer.cpp

@@ -625,6 +625,13 @@ gb_inline void scan_mantissa(Tokenizer *t, i32 base) {
 	}
 }
 
+u8 peek_byte(Tokenizer *t, isize offset=0) {
+	if (t->read_curr+offset < t->end) {
+		return t->read_curr[offset];
+	}
+	return 0;
+}
+
 Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
 	Token token = {};
 	token.kind = Token_Integer;
@@ -718,14 +725,12 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
 
 fraction:
 	if (t->curr_rune == '.') {
-		// HACK(bill): This may be inefficient
-		TokenizerState state = save_tokenizer_state(t);
-		advance_to_next_rune(t);
-		if (t->curr_rune == '.') {
-			// TODO(bill): Clean up this shit
-			restore_tokenizer_state(t, &state);
+		if (peek_byte(t) == '.') {
+			// NOTE(bill): this is kind of ellipsis
 			goto end;
 		}
+		advance_to_next_rune(t);
+
 		token.kind = Token_Float;
 		scan_mantissa(t, 10);
 	}
@@ -983,7 +988,7 @@ Token tokenizer_get_token(Tokenizer *t) {
 		} break;
 
 		case '.':
-			if (t->curr_rune == '.') { // Could be an ellipsis
+			if (t->curr_rune == '.') {
 				advance_to_next_rune(t);
 				token.kind = Token_Ellipsis;
 				if (t->curr_rune == '<') {
@@ -1039,7 +1044,7 @@ Token tokenizer_get_token(Tokenizer *t) {
 			if (t->curr_rune == '=') {
 				advance_to_next_rune(t);
 				token.kind = Token_SubEq;
-			} else if (t->curr_rune == '-' && t->read_curr[0] == '-') {
+			} else if (t->curr_rune == '-' && peek_byte(t) == '-') {
 				advance_to_next_rune(t);
 				advance_to_next_rune(t);
 				token.kind = Token_Undef;