|
@@ -8,6 +8,7 @@
|
|
// A Scanner may be customized to recognize only a subset of those literals and to recognize different identifiers and white space characters.
|
|
// A Scanner may be customized to recognize only a subset of those literals and to recognize different identifiers and white space characters.
|
|
package text_scanner
|
|
package text_scanner
|
|
|
|
|
|
|
|
+import "base:runtime"
|
|
import "core:fmt"
|
|
import "core:fmt"
|
|
import "core:strings"
|
|
import "core:strings"
|
|
import "core:unicode"
|
|
import "core:unicode"
|
|
@@ -23,10 +24,12 @@ Position :: struct {
|
|
}
|
|
}
|
|
|
|
|
|
// position_is_valid reports where the position is valid
|
|
// position_is_valid reports where the position is valid
|
|
|
|
+@(require_results)
|
|
position_is_valid :: proc(pos: Position) -> bool {
|
|
position_is_valid :: proc(pos: Position) -> bool {
|
|
return pos.line > 0
|
|
return pos.line > 0
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+@(require_results)
|
|
position_to_string :: proc(pos: Position, allocator := context.temp_allocator) -> string {
|
|
position_to_string :: proc(pos: Position, allocator := context.temp_allocator) -> string {
|
|
s := pos.filename
|
|
s := pos.filename
|
|
if s == "" {
|
|
if s == "" {
|
|
@@ -140,7 +143,7 @@ init :: proc(s: ^Scanner, src: string, filename := "") -> ^Scanner {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
-@(private)
|
|
|
|
|
|
+@(private, require_results)
|
|
advance :: proc(s: ^Scanner) -> rune {
|
|
advance :: proc(s: ^Scanner) -> rune {
|
|
if s.src_pos >= len(s.src) {
|
|
if s.src_pos >= len(s.src) {
|
|
s.prev_char_len = 0
|
|
s.prev_char_len = 0
|
|
@@ -190,6 +193,7 @@ next :: proc(s: ^Scanner) -> rune {
|
|
// peek returns the next Unicode character in the source without advancing the scanner
|
|
// peek returns the next Unicode character in the source without advancing the scanner
|
|
// It returns EOF if the scanner's position is at least the last character of the source
|
|
// It returns EOF if the scanner's position is at least the last character of the source
|
|
// if n > 0, it call next n times and return the nth Unicode character and then restore the Scanner's state
|
|
// if n > 0, it call next n times and return the nth Unicode character and then restore the Scanner's state
|
|
|
|
+@(require_results)
|
|
peek :: proc(s: ^Scanner, n := 0) -> (ch: rune) {
|
|
peek :: proc(s: ^Scanner, n := 0) -> (ch: rune) {
|
|
if s.ch == -2 {
|
|
if s.ch == -2 {
|
|
s.ch = advance(s)
|
|
s.ch = advance(s)
|
|
@@ -211,6 +215,7 @@ peek :: proc(s: ^Scanner, n := 0) -> (ch: rune) {
|
|
// peek returns the next token in the source
|
|
// peek returns the next token in the source
|
|
// It returns EOF if the scanner's position is at least the last character of the source
|
|
// It returns EOF if the scanner's position is at least the last character of the source
|
|
// if n > 0, it call next n times and return the nth token and then restore the Scanner's state
|
|
// if n > 0, it call next n times and return the nth token and then restore the Scanner's state
|
|
|
|
+@(require_results)
|
|
peek_token :: proc(s: ^Scanner, n := 0) -> (tok: rune) {
|
|
peek_token :: proc(s: ^Scanner, n := 0) -> (tok: rune) {
|
|
assert(n >= 0)
|
|
assert(n >= 0)
|
|
prev_s := s^
|
|
prev_s := s^
|
|
@@ -249,7 +254,7 @@ errorf :: proc(s: ^Scanner, format: string, args: ..any) {
|
|
error(s, fmt.tprintf(format, ..args))
|
|
error(s, fmt.tprintf(format, ..args))
|
|
}
|
|
}
|
|
|
|
|
|
-@(private)
|
|
|
|
|
|
+@(private, require_results)
|
|
is_ident_rune :: proc(s: ^Scanner, ch: rune, i: int) -> bool {
|
|
is_ident_rune :: proc(s: ^Scanner, ch: rune, i: int) -> bool {
|
|
if s.is_ident_rune != nil {
|
|
if s.is_ident_rune != nil {
|
|
return s.is_ident_rune(ch, i)
|
|
return s.is_ident_rune(ch, i)
|
|
@@ -257,7 +262,7 @@ is_ident_rune :: proc(s: ^Scanner, ch: rune, i: int) -> bool {
|
|
return ch == '_' || unicode.is_letter(ch) || unicode.is_digit(ch) && i > 0
|
|
return ch == '_' || unicode.is_letter(ch) || unicode.is_digit(ch) && i > 0
|
|
}
|
|
}
|
|
|
|
|
|
-@(private)
|
|
|
|
|
|
+@(private, require_results)
|
|
scan_identifier :: proc(s: ^Scanner) -> rune {
|
|
scan_identifier :: proc(s: ^Scanner) -> rune {
|
|
ch := advance(s)
|
|
ch := advance(s)
|
|
for i := 1; is_ident_rune(s, ch, i); i += 1 {
|
|
for i := 1; is_ident_rune(s, ch, i); i += 1 {
|
|
@@ -266,13 +271,13 @@ scan_identifier :: proc(s: ^Scanner) -> rune {
|
|
return ch
|
|
return ch
|
|
}
|
|
}
|
|
|
|
|
|
-@(private) lower :: proc(ch: rune) -> rune { return ('a' - 'A') | ch }
|
|
|
|
-@(private) is_decimal :: proc(ch: rune) -> bool { return '0' <= ch && ch <= '9' }
|
|
|
|
-@(private) is_hex :: proc(ch: rune) -> bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
|
|
|
|
|
|
+@(private, require_results) lower :: proc(ch: rune) -> rune { return ('a' - 'A') | ch }
|
|
|
|
+@(private, require_results) is_decimal :: proc(ch: rune) -> bool { return '0' <= ch && ch <= '9' }
|
|
|
|
+@(private, require_results) is_hex :: proc(ch: rune) -> bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-@(private)
|
|
|
|
|
|
+@(private, require_results)
|
|
scan_number :: proc(s: ^Scanner, ch: rune, seen_dot: bool) -> (rune, rune) {
|
|
scan_number :: proc(s: ^Scanner, ch: rune, seen_dot: bool) -> (rune, rune) {
|
|
lit_name :: proc(prefix: rune) -> string {
|
|
lit_name :: proc(prefix: rune) -> string {
|
|
switch prefix {
|
|
switch prefix {
|
|
@@ -417,7 +422,7 @@ scan_number :: proc(s: ^Scanner, ch: rune, seen_dot: bool) -> (rune, rune) {
|
|
return tok, ch
|
|
return tok, ch
|
|
}
|
|
}
|
|
|
|
|
|
-@(private)
|
|
|
|
|
|
+@(private, require_results)
|
|
scan_string :: proc(s: ^Scanner, quote: rune) -> (n: int) {
|
|
scan_string :: proc(s: ^Scanner, quote: rune) -> (n: int) {
|
|
digit_val :: proc(ch: rune) -> int {
|
|
digit_val :: proc(ch: rune) -> int {
|
|
switch v := lower(ch); v {
|
|
switch v := lower(ch); v {
|
|
@@ -484,7 +489,7 @@ scan_char :: proc(s: ^Scanner) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-@(private)
|
|
|
|
|
|
+@(private, require_results)
|
|
scan_comment :: proc(s: ^Scanner, ch: rune) -> rune {
|
|
scan_comment :: proc(s: ^Scanner, ch: rune) -> rune {
|
|
ch := ch
|
|
ch := ch
|
|
if ch == '/' { // line comment
|
|
if ch == '/' { // line comment
|
|
@@ -611,6 +616,7 @@ scan :: proc(s: ^Scanner) -> (tok: rune) {
|
|
|
|
|
|
// position returns the position of the character immediately after the character or token returns by the previous call to next or scan
|
|
// position returns the position of the character immediately after the character or token returns by the previous call to next or scan
|
|
// Use the Scanner's position field for the most recently scanned token position
|
|
// Use the Scanner's position field for the most recently scanned token position
|
|
|
|
+@(require_results)
|
|
position :: proc(s: ^Scanner) -> Position {
|
|
position :: proc(s: ^Scanner) -> Position {
|
|
pos: Position
|
|
pos: Position
|
|
pos.filename = s.pos.filename
|
|
pos.filename = s.pos.filename
|
|
@@ -630,6 +636,7 @@ position :: proc(s: ^Scanner) -> Position {
|
|
}
|
|
}
|
|
|
|
|
|
// token_text returns the string of the most recently scanned token
|
|
// token_text returns the string of the most recently scanned token
|
|
|
|
+@(require_results)
|
|
token_text :: proc(s: ^Scanner) -> string {
|
|
token_text :: proc(s: ^Scanner) -> string {
|
|
if s.tok_pos < 0 {
|
|
if s.tok_pos < 0 {
|
|
return ""
|
|
return ""
|
|
@@ -639,7 +646,8 @@ token_text :: proc(s: ^Scanner) -> string {
|
|
|
|
|
|
// token_string returns a printable string for a token or Unicode character
|
|
// token_string returns a printable string for a token or Unicode character
|
|
// By default, it uses the context.temp_allocator to produce the string
|
|
// By default, it uses the context.temp_allocator to produce the string
|
|
-token_string :: proc(tok: rune, allocator := context.temp_allocator) -> string {
|
|
|
|
|
|
+@(require_results)
|
|
|
|
+token_string :: proc(tok: rune, allocator: runtime.Allocator) -> string {
|
|
context.allocator = allocator
|
|
context.allocator = allocator
|
|
switch tok {
|
|
switch tok {
|
|
case EOF: return strings.clone("EOF")
|
|
case EOF: return strings.clone("EOF")
|