|
@@ -9,11 +9,12 @@ MAX_CAPTURES :: 32
|
|
|
|
|
|
Capture :: struct {
|
|
|
init: int,
|
|
|
- len: int,
|
|
|
+ len: int,
|
|
|
}
|
|
|
|
|
|
Match :: struct {
|
|
|
- byte_start, byte_end: int,
|
|
|
+ byte_start: int,
|
|
|
+ byte_end: int,
|
|
|
}
|
|
|
|
|
|
Error :: enum {
|
|
@@ -33,12 +34,13 @@ CAP_UNFINISHED :: -1
|
|
|
INVALID :: -1
|
|
|
|
|
|
Match_State :: struct {
|
|
|
- src: string,
|
|
|
+ src: string,
|
|
|
pattern: string,
|
|
|
- level: int,
|
|
|
+ level: int,
|
|
|
capture: [MAX_CAPTURES]Capture,
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
match_class :: proc(c: rune, cl: rune) -> (res: bool) {
|
|
|
switch unicode.to_lower(cl) {
|
|
|
case 'a': res = is_alpha(c)
|
|
@@ -65,19 +67,23 @@ is_punct :: unicode.is_punct
|
|
|
is_space :: unicode.is_space
|
|
|
is_cntrl :: unicode.is_control
|
|
|
|
|
|
+@(require_results)
|
|
|
is_alnum :: proc(c: rune) -> bool {
|
|
|
return unicode.is_alpha(c) || unicode.is_digit(c)
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
is_graph :: proc(c: rune) -> bool {
|
|
|
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c)
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
is_xdigit :: proc(c: rune) -> bool {
|
|
|
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c)
|
|
|
}
|
|
|
|
|
|
// find the first utf8 charater and its size, return an error if the character is an error
|
|
|
+@(require_results)
|
|
|
utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) {
|
|
|
c, size = utf8.decode_rune_in_string(bytes)
|
|
|
|
|
@@ -90,6 +96,7 @@ utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) {
|
|
|
|
|
|
// find the first utf8 charater and its size and advance the index
|
|
|
// return an error if the character is an error
|
|
|
+@(require_results)
|
|
|
utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) {
|
|
|
size: int
|
|
|
c, size = utf8.decode_rune_in_string(bytes[index^:])
|
|
@@ -103,10 +110,12 @@ utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) {
|
|
|
}
|
|
|
|
|
|
// continuation byte?
|
|
|
+@(require_results)
|
|
|
is_cont :: proc(b: byte) -> bool {
|
|
|
return b & 0xc0 == 0x80
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
utf8_prev :: proc(bytes: string, a, b: int) -> int {
|
|
|
b := b
|
|
|
|
|
@@ -117,6 +126,7 @@ utf8_prev :: proc(bytes: string, a, b: int) -> int {
|
|
|
return a < b ? b - 1 : a
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
utf8_next :: proc(bytes: string, a: int) -> int {
|
|
|
a := a
|
|
|
b := len(bytes)
|
|
@@ -128,6 +138,7 @@ utf8_next :: proc(bytes: string, a: int) -> int {
|
|
|
return a < b ? a + 1 : b
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) {
|
|
|
l := int(l - '1')
|
|
|
|
|
@@ -138,6 +149,7 @@ check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) {
|
|
|
return l, .OK
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
capture_to_close :: proc(ms: ^Match_State) -> (int, Error) {
|
|
|
level := ms.level - 1
|
|
|
|
|
@@ -152,6 +164,7 @@ capture_to_close :: proc(ms: ^Match_State) -> (int, Error) {
|
|
|
return 0, .Invalid_Pattern_Capture
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) {
|
|
|
step = p
|
|
|
ch := utf8_advance(ms.pattern, &step) or_return
|
|
@@ -163,7 +176,7 @@ class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- utf8_advance(ms.pattern, &step) or_return
|
|
|
+ _ = utf8_advance(ms.pattern, &step) or_return
|
|
|
|
|
|
case '[':
|
|
|
// fine with step by 1
|
|
@@ -198,6 +211,7 @@ class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) {
|
|
|
sig = true
|
|
|
p := p
|
|
@@ -240,6 +254,7 @@ match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) {
|
|
|
if s >= len(ms.src) {
|
|
|
return
|
|
@@ -261,6 +276,7 @@ single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_s
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
|
|
if p >= len(ms.pattern) - 1 {
|
|
|
return INVALID, .Invalid_Pattern_Capture
|
|
@@ -300,6 +316,7 @@ match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error)
|
|
|
return INVALID, .OK
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
|
|
|
m := s
|
|
|
|
|
@@ -331,6 +348,7 @@ max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
|
|
|
return INVALID, .OK
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
|
|
|
s := s
|
|
|
|
|
@@ -352,6 +370,7 @@ min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) {
|
|
|
level := ms.level
|
|
|
|
|
@@ -366,6 +385,7 @@ start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Erro
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) {
|
|
|
l := capture_to_close(ms) or_return
|
|
|
|
|
@@ -379,6 +399,7 @@ end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) {
|
|
|
index := check_capture(ms, char) or_return
|
|
|
length := ms.capture[index].len
|
|
@@ -390,6 +411,7 @@ match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: E
|
|
|
return INVALID, .OK
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
|
|
s := s
|
|
|
p := p
|
|
@@ -486,6 +508,7 @@ match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
|
|
return s, .OK
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
|
|
s := s
|
|
|
ep := class_end(ms, p) or_return
|
|
@@ -521,6 +544,7 @@ match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error)
|
|
|
return s, .OK
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) {
|
|
|
if i >= ms.level {
|
|
|
if i == 0 {
|
|
@@ -542,6 +566,7 @@ push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []M
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
push_captures :: proc(
|
|
|
ms: ^Match_State,
|
|
|
s: int,
|
|
@@ -559,6 +584,7 @@ push_captures :: proc(
|
|
|
|
|
|
// SPECIALS := "^$*+?.([%-"
|
|
|
// all special characters inside a small ascii array
|
|
|
+@(rodata)
|
|
|
SPECIALS_TABLE := [256]bool {
|
|
|
'^' = true,
|
|
|
'$' = true,
|
|
@@ -573,6 +599,7 @@ SPECIALS_TABLE := [256]bool {
|
|
|
}
|
|
|
|
|
|
// helper call to quick search for special characters
|
|
|
+@(require_results)
|
|
|
index_special :: proc(text: string) -> int {
|
|
|
for i in 0..<len(text) {
|
|
|
if SPECIALS_TABLE[text[i]] {
|
|
@@ -583,6 +610,7 @@ index_special :: proc(text: string) -> int {
|
|
|
return -1
|
|
|
}
|
|
|
|
|
|
+@(require_results)
|
|
|
lmem_find :: proc(s1, s2: string) -> int {
|
|
|
l1 := len(s1)
|
|
|
l2 := len(s2)
|
|
@@ -618,6 +646,7 @@ lmem_find :: proc(s1, s2: string) -> int {
|
|
|
|
|
|
// find a pattern with in a haystack with an offset
|
|
|
// allow_memfind will speed up simple searches
|
|
|
+@(require_results)
|
|
|
find_aux :: proc(
|
|
|
haystack: string,
|
|
|
pattern: string,
|
|
@@ -684,6 +713,7 @@ find_aux :: proc(
|
|
|
// rest has to be used from captures
|
|
|
// assumes captures is zeroed on first iteration
|
|
|
// resets captures to zero on last iteration
|
|
|
+@(require_results)
|
|
|
gmatch :: proc(
|
|
|
haystack: ^string,
|
|
|
pattern: string,
|
|
@@ -707,6 +737,7 @@ gmatch :: proc(
|
|
|
}
|
|
|
|
|
|
// gsub with builder, replace patterns found with the replace content
|
|
|
+@(require_results)
|
|
|
gsub_builder :: proc(
|
|
|
builder: ^strings.Builder,
|
|
|
haystack: string,
|
|
@@ -746,6 +777,7 @@ gsub_builder :: proc(
|
|
|
}
|
|
|
|
|
|
// uses temp builder to build initial string - then allocates the result
|
|
|
+@(require_results)
|
|
|
gsub_allocator :: proc(
|
|
|
haystack: string,
|
|
|
pattern: string,
|
|
@@ -768,12 +800,7 @@ Gsub_Proc :: proc(
|
|
|
)
|
|
|
|
|
|
// call a procedure on every match in the haystack
|
|
|
-gsub_with :: proc(
|
|
|
- haystack: string,
|
|
|
- pattern: string,
|
|
|
- data: rawptr,
|
|
|
- call: Gsub_Proc,
|
|
|
-) {
|
|
|
+gsub_with :: proc(haystack, pattern: string, data: rawptr, call: Gsub_Proc) {
|
|
|
// find matches
|
|
|
captures: [MAX_CAPTURES]Match
|
|
|
haystack := haystack
|
|
@@ -800,11 +827,8 @@ gsub :: proc { gsub_builder, gsub_allocator }
|
|
|
// iterative find with zeroth capture only
|
|
|
// assumes captures is zeroed on first iteration
|
|
|
// resets captures to zero on last iteration
|
|
|
-gfind :: proc(
|
|
|
- haystack: ^string,
|
|
|
- pattern: string,
|
|
|
- captures: ^[MAX_CAPTURES]Match,
|
|
|
-) -> (res: string, ok: bool) {
|
|
|
+@(require_results)
|
|
|
+gfind :: proc(haystack: ^string, pattern: string, captures: ^[MAX_CAPTURES]Match) -> (res: string, ok: bool) {
|
|
|
haystack^ = haystack[captures[0].byte_end:]
|
|
|
if len(haystack) > 0 {
|
|
|
length, err := find_aux(haystack^, pattern, 0, true, captures)
|
|
@@ -822,10 +846,8 @@ gfind :: proc(
|
|
|
}
|
|
|
|
|
|
// rebuilds a pattern into a case insensitive pattern
|
|
|
-pattern_case_insensitive_builder :: proc(
|
|
|
- builder: ^strings.Builder,
|
|
|
- pattern: string,
|
|
|
-) -> (res: string) {
|
|
|
+@(require_results)
|
|
|
+pattern_case_insensitive_builder :: proc(builder: ^strings.Builder, pattern: string) -> string {
|
|
|
p := pattern
|
|
|
last_percent: bool
|
|
|
|
|
@@ -849,11 +871,8 @@ pattern_case_insensitive_builder :: proc(
|
|
|
return strings.to_string(builder^)
|
|
|
}
|
|
|
|
|
|
-pattern_case_insensitive_allocator :: proc(
|
|
|
- pattern: string,
|
|
|
- cap: int = 256,
|
|
|
- allocator := context.allocator,
|
|
|
-) -> (res: string) {
|
|
|
+@(require_results)
|
|
|
+pattern_case_insensitive_allocator :: proc(pattern: string, cap: int = 256, allocator := context.allocator) -> string {
|
|
|
builder := strings.builder_make(0, cap, context.temp_allocator)
|
|
|
return pattern_case_insensitive_builder(&builder, pattern)
|
|
|
}
|
|
@@ -877,6 +896,7 @@ Matcher :: struct {
|
|
|
}
|
|
|
|
|
|
// init using haystack & pattern and an optional byte offset
|
|
|
+@(require_results)
|
|
|
matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) {
|
|
|
res.haystack = haystack
|
|
|
res.pattern = pattern
|
|
@@ -886,6 +906,7 @@ matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matche
|
|
|
}
|
|
|
|
|
|
// find the first match and return the byte start / end position in the string, true on success
|
|
|
+@(require_results)
|
|
|
matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check {
|
|
|
matcher.captures_length, matcher.err = find_aux(
|
|
|
matcher.haystack,
|
|
@@ -902,6 +923,7 @@ matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bound
|
|
|
}
|
|
|
|
|
|
// find the first match and return the matched word, true on success
|
|
|
+@(require_results)
|
|
|
matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check {
|
|
|
matcher.captures_length, matcher.err = find_aux(
|
|
|
matcher.haystack,
|
|
@@ -917,6 +939,7 @@ matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_
|
|
|
}
|
|
|
|
|
|
// get the capture at the "correct" spot, as spot 0 is reserved for the first match
|
|
|
+@(require_results)
|
|
|
matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check {
|
|
|
runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1)
|
|
|
cap := matcher.captures[index + 1]
|
|
@@ -924,6 +947,7 @@ matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location)
|
|
|
}
|
|
|
|
|
|
// get the raw match out of the captures, skipping spot 0
|
|
|
+@(require_results)
|
|
|
matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check {
|
|
|
runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1)
|
|
|
return matcher.captures[index + 1]
|
|
@@ -933,6 +957,7 @@ matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_locati
|
|
|
matcher_gmatch :: matcher_match_iter
|
|
|
|
|
|
// iteratively match the haystack till it cant find any matches
|
|
|
+@(require_results)
|
|
|
matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) {
|
|
|
if len(matcher.iter) > 0 {
|
|
|
matcher.captures_length, matcher.err = find_aux(
|
|
@@ -962,6 +987,7 @@ matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: b
|
|
|
}
|
|
|
|
|
|
// get a slice of all valid captures above the first match
|
|
|
+@(require_results)
|
|
|
matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match {
|
|
|
return matcher.captures[1:matcher.captures_length]
|
|
|
}
|