Просмотр исходного кода

balanced string, frontier pattern, gsub_with and their tests added

skytrias 2 лет назад
Родитель
Сommit
70bd220f34
2 измененных файлов с 139 добавлено и 76 удалено
  1. 92 64
      core/text/lua/strlib.odin
  2. 47 12
      tests/core/text/lua/test_core_text_lua.odin

+ 92 - 64
core/text/lua/strlib.odin

@@ -19,6 +19,7 @@ Error :: enum {
 	Invalid_Capture_Index,
 	Invalid_Pattern_Capture,
 	Unfinished_Capture,
+	Malformed_Pattern,
 }
 
 L_ESC :: '%'
@@ -143,20 +144,22 @@ classend :: proc(ms: ^MatchState, p: int) -> (int, Error) {
 				p += 1
 			}
 
-			// TODO double check
-			for {
+			for ms.pattern[p] != ']' {
+				// if p == len(ms.pattern) {
+				// 	return 0, .Malformed_Pattern
+				// }
+
 				ch := ms.pattern[p]
+				p += 1
 
-				if ch == L_ESC && p <= len(ms.pattern) {
+				if p < len(ms.pattern) && ch == L_ESC {
 					// skip escapes like '%'
 					p += 1
 				}
 
-				if ms.pattern[p] == ']' {
-					break
-				}
-
-				p += 1
+				// if ms.pattern[p] == ']' {
+				// 	break
+				// }
 			}
 
 			return p + 1, .OK
@@ -183,13 +186,14 @@ matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool {
 	for p < ec {
 		ch := ms.pattern[p]
 
-		if ms.pattern[p] == L_ESC {
+		// e.g. %a
+		if ms.pattern[p] == L_ESC { 
 			p += 1
 
 			if match_class(c, ms.pattern[p]) {
 				return sig
 			}
-		} else if ms.pattern[p + 1] == '-' && p + 2 < len(ms.pattern) {
+		} else if p + 2 < len(ms.pattern) && ms.pattern[p + 1] == '-' {
 			// e.g. [a-z] check
 			if ms.pattern[p] <= c && c <= ms.pattern[p + 2] {
 				return sig
@@ -219,39 +223,40 @@ singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool {
 	}
 }
 
-// matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) {
-// 	s_begin := s
-// 	s := s + 1
-// 	cont := 0
+matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) {
+	if p >= len(ms.pattern) - 1 {
+		return INVALID, .Invalid_Pattern_Capture
+	}
+
+	// skip until the src and pattern match
+	if ms.src[s] != ms.pattern[p] {
+		return INVALID, .OK
+	}
 
-// 	begin := ms.pattern[p]
-// 	end := ms.pattern[p + 1]
-// 	print("BALANCED between", rune(begin), "AND", rune(end))
+	s_begin := s
+	cont := 1
+	s := s + 1
+	begin := ms.pattern[p]
+	end := ms.pattern[p + 1]
 
-// 	for s < len(ms.src) {
-// 		ch := ms.src[s]
-// 		print("\t", rune(ch))
+	for s < len(ms.src) {
+		ch := ms.src[s]
 
-// 		if ch == end {
-// 			cont -= 1
-// 			print("END", cont)
+		if ch == end {
+			cont -= 1
 
-// 			if cont == 0 {
-// 				print("BALANCED RET", s + 1, len(ms.src), ms.src[s_begin:s + 1])
-// 				return s + 1
-// 			}
-// 		} else if ch == begin {
-// 			cont += 1
-// 			print("BEGIN", cont)
-// 		}
+			if cont == 0 {
+				return s + 1, .OK
+			}
+		} else if ch == begin {
+			cont += 1
+		}
 
-// 		s += 1
-// 	}
+		s += 1
+	}
 
-// 	print("OUT OF BALANCE", cont)
-// 	// out of balance
-// 	return 0, .
-// }
+	return INVALID, .OK
+}
 
 max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
 	i := 0
@@ -263,7 +268,6 @@ max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
 		result := match(ms, s + i, ep + 1) or_return
 
 		if result != INVALID {
-			// print("SET", result)
 			return result, .OK
 		}
 
@@ -368,35 +372,34 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 			switch ms.pattern[p + 1] {
 				// balanced string
 				case 'b': {
-					// res := matchbalance(ms, s, p + 2)
-
-					// if data, ok := res.?; ok {
-					// 	// s = data
-					// 	// eg after %b()
-					// 	// print("SUCCESS")
-					// 	return patt_match(ms, s, p + 4)
-					// }
+					s = matchbalance(ms, s, p + 2) or_return
 
+					if s != INVALID {
+						// eg after %b()
+						return match(ms, s, p + 4)
+					}
 				}
 
 				// frontier
 				case 'f': {
-					// p += 2
+					p += 2
 					
-					// if ms.pattern[p] != '[' {
-					// 	print("missing '[' after %f in pattern")
-					// 	return nil
-					// }
+					if ms.pattern[p] != '[' {
+						return INVALID, .Invalid_Pattern_Capture
+					}
 
-					// ep := classend(ms, p).?
-					// previous := 0 if s == 0 else s - 1
+					ep := classend(ms, p) or_return
+					previous := s == 0 ? '\x00' : ms.src[s - 1]
+					// allow last character to count too
+					current := s >= len(ms.src) ? '\x00' : ms.src[s]
 
-					// if !matchbracketclass(ms, ms.src[previous], p, ep - 1) && 
-					// 	matchbracketclass(ms, ms.src[s], p, ep) {
-					// 	return patt_match(ms, s, ep)
-					// }
+					// fmt.eprintln("TRY", rune(ms.src[s]), ep)
+					if !matchbracketclass(ms, previous, p, ep - 1) && 
+						matchbracketclass(ms, current, p, ep - 1) {
+						return match(ms, s, ep)
+					}
 
-					// return nil
+					s = INVALID
 				}
 
 				// capture group
@@ -416,7 +419,6 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 
 		case: {
 			return match_default(ms, s, p)
-			// print("PATT DEF", rune(ms.src[s]), rune(ms.pattern[p]))
 		}
 	}
 
@@ -426,11 +428,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 	s := s
 	ep := classend(ms, p) or_return
-	// ch := s < len(ms.src) ? rune(ms.src[s]) : 0
 
 	if !singlematch(ms, s, p, ep) {
 		epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
-		// print("+++", rune(epc))
 
 		if epc == '*' || epc == '?' || epc == '-' {
 			return match(ms, s, ep + 1)
@@ -439,7 +439,6 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 		}
 	} else {
 		epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
-		// print("~~~", ch, rune(epc))
 
 		switch epc {
 			case '?': {
@@ -652,7 +651,7 @@ gmatch :: proc(
 	return
 }
 
-// gsub with builder
+// gsub with builder, replace patterns found with the replace content
 gsub_builder :: proc(
 	builder: ^strings.Builder,
 	haystack: string,
@@ -702,9 +701,38 @@ gsub_allocator :: proc(
 	return gsub_builder(&builder, haystack, pattern, replace)
 }
 
+// call a procedure on every match in the haystack
+gsub_with :: proc(
+	haystack: string,
+	pattern: string,
+	data: rawptr,
+	call: proc(data: rawptr, word: string),
+) {
+	// find matches
+	captures: [MAXCAPTURES]Match
+	haystack := haystack
+
+	for {
+		length, err := find_aux(haystack, pattern, 0, false, &captures)
+
+		// done
+		if length == 0 || err != .OK {
+			break
+		}
+
+		cap := captures[0]
+
+		word := haystack[cap.start:cap.end]
+		call(data, word)
+
+		// advance string till end
+		haystack = haystack[cap.end:]
+	}
+}
+
 gsub :: proc { gsub_builder, gsub_allocator }
 
-// iterative find with first capture only
+// iterative find with zeroth capture only
 gfind :: proc(
 	haystack: ^string,
 	pattern: string,

+ 47 - 12
tests/core/text/lua/test_core_text_lua.odin

@@ -15,7 +15,7 @@ when ODIN_TEST {
 		TEST_count += 1
 		if !condition {
 			TEST_fail += 1
-			fmt.printf("[%v] %v\n", loc, message)
+			fmt.printf("%v %v\n", loc, message)
 			return
 		}
 	}
@@ -166,6 +166,12 @@ test_match :: proc(t: ^testing.T) {
 		{ " testing this", "^testing", "", false },
 		{ "testing this", "^%w+", "testing", true },
 		{ " testing this", "^%w+", "", false },
+
+		// balanced string %b
+		{ "testing (this) out", "%b()", "(this)", true },
+		{ "testing athisz out", "%baz", "athisz", true },
+		{ "testing _this_ out", "%b__", "_this_", true },
+		{ "testing _this_ out", "%b_", "", false },
 	}
 
 	captures: [lua.MAXCAPTURES]lua.Match
@@ -294,19 +300,47 @@ test_gsub :: proc(t: ^testing.T) {
 
 @test
 test_gfind :: proc(t: ^testing.T) {
-	{
-		haystack := "test1 123 test2 123 test3"
-		pattern := "%w+" 
-		captures: [lua.MAXCAPTURES]lua.Match
-		s := &haystack
-		output := [?]string { "test1", "123", "test2", "123", "test3" }
-		index: int
+	haystack := "test1 123 test2 123 test3"
+	pattern := "%w+" 
+	captures: [lua.MAXCAPTURES]lua.Match
+	s := &haystack
+	output := [?]string { "test1", "123", "test2", "123", "test3" }
+	index: int
 
-		for word in lua.gfind(s, pattern, &captures) {
-			expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word))
-			index += 1
-		}
+	for word in lua.gfind(s, pattern, &captures) {
+		expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word))
+		index += 1
+	}
+}
+
+test_frontier :: proc(t: ^testing.T) {
+	Temp :: struct {
+		t: ^testing.T,
+		index: int,
+		output: [3]string,
+	}
+	
+	call :: proc(data: rawptr, word: string) {
+		temp := cast(^Temp) data
+		expect(
+			temp.t, 
+			word == temp.output[temp.index], 
+			fmt.tprintf("frontier temp didnt match: %s != %s\n", word, temp.output[temp.index]),
+		)
+		temp.index += 1
 	}
+
+	temp := Temp {
+		t = t,
+		output = {
+			"THE",
+			"QUICK",
+			"JUMPS",
+		},
+	}
+
+	// https://lua-users.org/wiki/FrontierPattern example taken from here
+	lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
 }
 
 main :: proc() {
@@ -317,6 +351,7 @@ main :: proc() {
 	test_gmatch(&t)
 	test_gsub(&t)
 	test_gfind(&t)
+	test_frontier(&t)
 
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {