Przeglądaj źródła

Add docs to text/scanner

gingerBill 4 lat temu
rodzic
commit
510d1f2518
1 zmienionych plików z 51 dodań i 3 usunięć
  1. 51 3
      core/text/scanner/scanner.odin

+ 51 - 3
core/text/scanner/scanner.odin

@@ -1,3 +1,11 @@
+// package text/scanner provides a scanner and tokenizer for UTF-8-encoded text.
+// It takes a string providing the source, which then can be tokenized through
+// repeated calls to the scan procedure.
+// For compatibility with existing tooling and languages, the NUL character is not allowed.
+// If an UTF-8 encoded byte order mark (BOM) is the first character in the first character in the source, it will be discarded.
+//
+// By default, a Scanner skips white space and Odin comments and recognizes all literals defined by the Odin programming language specification.
+// A Scanner may be customized to recognize only a subset of those literals and to recognize different identifiers and white space characters.
 package text_scanner
 
 import "core:fmt"
@@ -5,6 +13,8 @@ import "core:strings"
 import "core:unicode"
 import "core:unicode/utf8"
 
+// Position represents a source position
+// A position is valid if line > 0
 Position :: struct {
 	filename: string, // filename, if present
 	offset:   int,    // byte offset, starting @ 0
@@ -12,6 +22,7 @@ Position :: struct {
 	column:   int,    // column number, starting @ 1 (character count per line)
 }
 
+// position_is_valid reports where the position is valid
 position_is_valid :: proc(pos: Position) -> bool {
 	return pos.line > 0;
 }
@@ -43,22 +54,24 @@ Scan_Flag :: enum u32 {
 	Scan_Idents,
 	Scan_Ints,
 	Scan_C_Int_Prefixes,
-	Scan_Floats,
+	Scan_Floats, // Includes integers and hexadecimal floats
 	Scan_Chars,
 	Scan_Strings,
 	Scan_Raw_Strings,
 	Scan_Comments,
-	Skip_Comments,
+	Skip_Comments, // if set with .Scan_Comments, comments become white space
 }
 Scan_Flags :: bit_set[Scan_Flag; u32];
 
 Odin_Like_Tokens :: Scan_Flags{.Scan_Idents, .Scan_Ints, .Scan_Floats, .Scan_Chars, .Scan_Strings, .Scan_Raw_Strings, .Scan_Comments, .Skip_Comments};
 C_Like_Tokens    :: Scan_Flags{.Scan_Idents, .Scan_Ints, .Scan_C_Int_Prefixes, .Scan_Floats, .Scan_Chars, .Scan_Strings, .Scan_Raw_Strings, .Scan_Comments, .Skip_Comments};
 
+// Odin_Whitespace is the default value for the Scanner's whitespace field
 Odin_Whitespace :: 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ';
 C_Whitespace    :: 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<'\v' | 1<<'\f' | 1<<' ';
 
 
+// Scanner allows for the reading of Unicode characters and tokens from a string
 Scanner :: struct {
 	src: string,
 
@@ -75,20 +88,39 @@ Scanner :: struct {
 	prev_line_len: int,
 	prev_char_len: int,
 
+	// error is called for each error encountered
+	// If no error procedure is set, the error is reported to os.stderr
 	error: proc(s: ^Scanner, msg: string),
+
+	// error_count is incremented by one for each error encountered
 	error_count: int,
 
+	// flags controls which tokens are recognized
+	// e.g. to recognize integers, set the .Scan_Ints flag
+	// This field may be changed by the user at any time during scanning
 	flags: Scan_Flags,
+
+	// The whitespace field controls which characters are recognized as white space
+	// This field may be changed by the user at any time during scanning
 	whitespace: u64,
 
+	// is_ident_rune is a predicate controlling the characters accepted as the ith rune in an identifier
+	// The valid characters must not conflict with the set of white space characters
+	// If is_ident_rune is not set, regular Odin-like identifiers are accepted
+	// This field may be changed by the user at any time during scanning
 	is_ident_rune: proc(ch: rune, i: int) -> bool,
 
+	// Start position of most recently scanned token (set by scan(s))
+	// Call init or next invalidates the position
 	pos: Position,
 }
 
+// init initializes a scanner with a new source and returns itself.
+// error_count is set to 0, flags is set to Odin_Like_Tokens, whitespace is set to Odin_Whitespace
 init :: proc(s: ^Scanner, src: string, filename := "") -> ^Scanner {
 	s^ = {};
 
+	s.error_count = 0;
 	s.src = src;
 	s.pos.filename = filename;
 
@@ -140,6 +172,8 @@ advance :: proc(s: ^Scanner) -> rune {
 	return ch;
 }
 
+// next reads and returns the next Unicode character. It returns EOF at the end of the source.
+// next does not update the Scanner's pos field. Use 'position(s)' to get the current position
 next :: proc(s: ^Scanner) -> rune {
 	s.tok_pos = -1;
 	s.pos.line = 0;
@@ -150,6 +184,9 @@ next :: proc(s: ^Scanner) -> rune {
 	return ch;
 }
 
+// peek returns the next Unicode character in the source without advancing the scanner
+// It returns EOF if the scanner's position is at least the last character of the source
+// if n > 0, it call next n times and return the nth Unicode character and then restore the Scanner's state
 peek :: proc(s: ^Scanner, n := 0) -> (ch: rune) {
 	if s.ch == -2 {
 		s.ch = advance(s);
@@ -168,7 +205,9 @@ peek :: proc(s: ^Scanner, n := 0) -> (ch: rune) {
 	}
 	return ch;
 }
-
+// peek returns the next token in the source
+// It returns EOF if the scanner's position is at least the last character of the source
+// if n > 0, it call next n times and return the nth token and then restore the Scanner's state
 peek_token :: proc(s: ^Scanner, n := 0) -> (tok: rune) {
 	assert(n >= 0);
 	prev_s := s^;
@@ -469,6 +508,10 @@ scan_comment :: proc(s: ^Scanner, ch: rune) -> rune {
 	return ch;
 }
 
+// scan reads the next token or Unicode character from source and returns it
+// It only recognizes tokens for which the respective flag that is set
+// It returns EOF at the end of the source
+// It reports Scanner errors by calling s.error, if not nil; otherwise it will print the error message to os.stderr
 scan :: proc(s: ^Scanner) -> (tok: rune) {
 	ch := peek(s);
 	if ch == EOF {
@@ -563,6 +606,8 @@ scan :: proc(s: ^Scanner) -> (tok: rune) {
 	return tok;
 }
 
+// position returns the position of the character immediately after the character or token returns by the previous call to next or scan
+// Use the Scanner's position field for the most recently scanned token position
 position :: proc(s: ^Scanner) -> Position {
 	pos: Position;
 	pos.filename = s.pos.filename;
@@ -581,6 +626,7 @@ position :: proc(s: ^Scanner) -> Position {
 	return pos;
 }
 
+// token_text returns the string of the most recently scanned token
 token_text :: proc(s: ^Scanner) -> string {
 	if s.tok_pos < 0 {
 		return "";
@@ -588,6 +634,8 @@ token_text :: proc(s: ^Scanner) -> string {
 	return string(s.src[s.tok_pos:s.tok_end]);
 }
 
+// token_string returns a printable string for a token or Unicode character
+// By default, it uses the context.temp_allocator to produce the string
 token_string :: proc(tok: rune, allocator := context.temp_allocator) -> string {
 	context.allocator = allocator;
 	switch tok {