Browse Source

Add `rune_is_letter_or_digit` for tokenizer

gingerBill 5 years ago
parent
commit
876820789e
2 changed files with 24 additions and 1 deletions
  1. 1 1
      src/tokenizer.cpp
  2. 23 0
      src/unicode.cpp

+ 1 - 1
src/tokenizer.cpp

@@ -948,7 +948,7 @@ Token tokenizer_get_token(Tokenizer *t) {
 	Rune curr_rune = t->curr_rune;
 	if (rune_is_letter(curr_rune)) {
 		token.kind = Token_Ident;
-		while (rune_is_letter(t->curr_rune) || rune_is_digit(t->curr_rune)) {
+		while (rune_is_letter_or_digit(t->curr_rune)) {
 			advance_to_next_rune(t);
 		}
 

+ 23 - 0
src/unicode.cpp

@@ -32,6 +32,29 @@ bool rune_is_digit(Rune r) {
 	return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
 }
 
+bool rune_is_letter_or_digit(Rune r) {
+	if (r < 0x80) {
+		if (r == '_') {
+			return true;
+		}
+		if (((cast(u32)r | 0x20) - 0x61) < 26) {
+			return true;
+		}
+		return (cast(u32)r - '0') < 10;
+	}
+	switch (utf8proc_category(r)) {
+	case UTF8PROC_CATEGORY_LU:
+	case UTF8PROC_CATEGORY_LL:
+	case UTF8PROC_CATEGORY_LT:
+	case UTF8PROC_CATEGORY_LM:
+	case UTF8PROC_CATEGORY_LO:
+		return true;
+	case UTF8PROC_CATEGORY_ND:
+		return true;
+	}
+	return false;
+}
+
 bool rune_is_whitespace(Rune r) {
 	switch (r) {
 	case ' ':