Browse Source

Optimize `rune_is_*` procedures for tokenizer

gingerBill 5 years ago
parent
commit
4e21a4d46a
2 changed files with 6 additions and 4 deletions
  1. 4 2
      src/tokenizer.cpp
  2. 2 2
      src/unicode.cpp

+ 4 - 2
src/tokenizer.cpp

@@ -937,8 +937,10 @@ Token tokenizer_get_token(Tokenizer *t) {
 	}
 
 	Token token = {};
-	token.string = {t->curr, 1};
-	token.pos.file = t->fullpath;
+	token.string.text = t->curr;
+	token.string.len  = 1;
+	token.pos.file.text = t->fullpath.text;
+	token.pos.file.len  = t->fullpath.len;
 	token.pos.line = t->line_count;
 	token.pos.offset = t->curr - t->start;
 	token.pos.column = t->curr - t->line + 1;

+ 2 - 2
src/unicode.cpp

@@ -12,7 +12,7 @@ bool rune_is_letter(Rune r) {
 		if (r == '_') {
 			return true;
 		}
-		return gb_char_is_alpha(cast(char)r) != 0;
+		return ((cast(u32)r | 0x20) - 0x61) < 26;
 	}
 	switch (utf8proc_category(r)) {
 	case UTF8PROC_CATEGORY_LU:
@@ -27,7 +27,7 @@ bool rune_is_letter(Rune r) {
 
 bool rune_is_digit(Rune r) {
 	if (r < 0x80) {
-		return gb_is_between(r, '0', '9');
+		return (cast(u32)r - '0') < 10;
 	}
 	return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
 }