Browse Source

case insensitive helper call

skytrias 2 years ago
parent
commit
eb5523d5d3
2 changed files with 81 additions and 27 deletions
  1. 53 3
      core/text/lua/strlib.odin
  2. 28 24
      tests/core/text/lua/test_core_text_lua.odin

+ 53 - 3
core/text/lua/strlib.odin

@@ -788,12 +788,23 @@ gsub_allocator :: proc(
 	return gsub_builder(&builder, haystack, pattern, replace)
 	return gsub_builder(&builder, haystack, pattern, replace)
 }
 }
 
 
+Gsub_Proc :: proc(
+	// optional passed data
+	data: rawptr, 
+	// word match found
+	word: string, 
+	// current haystack for found captures
+	haystack: string, 
+	// found captures - empty for no captures
+	captures: []Match,
+)
+
 // call a procedure on every match in the haystack
 // call a procedure on every match in the haystack
 gsub_with :: proc(
 gsub_with :: proc(
 	haystack: string,
 	haystack: string,
 	pattern: string,
 	pattern: string,
 	data: rawptr,
 	data: rawptr,
-	call: proc(data: rawptr, word: string),
+	call: Gsub_Proc,
 ) {
 ) {
 	// find matches
 	// find matches
 	captures: [MAXCAPTURES]Match
 	captures: [MAXCAPTURES]Match
@@ -810,7 +821,7 @@ gsub_with :: proc(
 		cap := captures[0]
 		cap := captures[0]
 
 
 		word := haystack[cap.byte_start:cap.byte_end]
 		word := haystack[cap.byte_start:cap.byte_end]
-		call(data, word)
+		call(data, word, haystack, captures[1:length])
 
 
 		// advance string till end
 		// advance string till end
 		haystack = haystack[cap.byte_end:]
 		haystack = haystack[cap.byte_end:]
@@ -837,4 +848,43 @@ gfind :: proc(
 	} 
 	} 
 
 
 	return
 	return
-}
+}
+
+// rebuilds a pattern into a case insensitive pattern
+pattern_case_insensitive_builder :: proc(
+	builder: ^strings.Builder, 
+	pattern: string,
+) -> (res: string) {
+	p := pattern
+	last_percent: bool
+
+	for len(p) > 0 {
+		char, size := utf8.decode_rune_in_string(p)
+
+		if unicode.is_alpha(char) && !last_percent {
+			// write character class in manually
+			strings.write_byte(builder, '[')
+			strings.write_rune(builder, unicode.to_lower(char))
+			strings.write_rune(builder, unicode.to_upper(char))
+			strings.write_byte(builder, ']')
+		} else {
+			strings.write_rune(builder, char)
+		}
+
+		last_percent = char == L_ESC 
+		p = p[size:]
+	}
+
+	return strings.to_string(builder^)
+}
+
+pattern_case_insensitive_allocator :: proc(
+	pattern: string, 
+	cap: int = 256,
+	allocator := context.allocator,
+) -> (res: string) {
+	builder := strings.builder_make(0, cap, context.temp_allocator)
+	return pattern_case_insensitive_builder(&builder, pattern)	
+}
+
+pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator }

+ 28 - 24
tests/core/text/lua/test_core_text_lua.odin

@@ -261,14 +261,15 @@ test_captures :: proc(t: ^testing.T) {
 	}
 	}
 }
 }
 
 
+gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) {
+	if failed(t, a[index] == b) {
+		logf(t, "GMATCH %d failed!\n", index)
+		logf(t, "\t%s != %s\n", a[index], b)
+	}
+}
+
 @test
 @test
 test_gmatch :: proc(t: ^testing.T) {
 test_gmatch :: proc(t: ^testing.T) {
-	gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) {
-		if failed(t, a[index] == b) {
-			logf(t, "GMATCH %d failed!\n", index)
-			logf(t, "\t%s != %s\n", a[index], b)
-		}
-	}
 
 
 	{
 	{
 		haystack := "testing this out 123"
 		haystack := "testing this out 123"
@@ -347,7 +348,7 @@ test_frontier :: proc(t: ^testing.T) {
 		output: [3]string,
 		output: [3]string,
 	}
 	}
 	
 	
-	call :: proc(data: rawptr, word: string) {
+	call :: proc(data: rawptr, word: string, haystack: string, captures: []lua.Match) {
 		temp := cast(^Temp) data
 		temp := cast(^Temp) data
 
 
 		if failed(temp.t, word == temp.output[temp.index]) {
 		if failed(temp.t, word == temp.output[temp.index]) {
@@ -373,28 +374,31 @@ test_frontier :: proc(t: ^testing.T) {
 
 
 @test
 @test
 test_utf8 :: proc(t: ^testing.T) {
 test_utf8 :: proc(t: ^testing.T) {
-	// {
-	// 	haystack := "恥ずべき恥フク恥ロ"
-	// 	s := &haystack
-	// 	captures: [lua.MAXCAPTURES]lua.Match
-
-	// 	for word in lua.gmatch(s, "恥", &captures) {
-	// 		fmt.eprintln(word)
-	// 	}
-	// }
-
 	{
 	{
-		haystack := "恥ずべき恥フク恥ロ"
+		haystack := "恥ず べき恥 フク恥ロ"
 		s := &haystack
 		s := &haystack
 		captures: [lua.MAXCAPTURES]lua.Match
 		captures: [lua.MAXCAPTURES]lua.Match
+		output := [?]string { "恥ず", "べき恥", "フク恥ロ" }
+		index: int
 
 
-		for word in lua.gmatch(s, "w+", &captures) {
-			fmt.eprintln(word)
+		for word in lua.gmatch(s, "%w+", &captures) {
+			gmatch_check(t, index, output[:], word)
+			index += 1
 		}
 		}
 	}
 	}
+}
 
 
-	// captures: [MAXCAPTURES]Match
-	// length, err := lua.find_aux("damn, pattern,)
+@test
+test_case_insensitive :: proc(t: ^testing.T) {
+	{
+		pattern := lua.pattern_case_insensitive("test", 256, context.temp_allocator)
+		goal := "[tT][eE][sS][tT]"
+		
+		if failed(t, pattern == goal) {
+			logf(t, "Case Insensitive Pattern doesn't match result\n")
+			logf(t, "\t%s != %s\n", pattern, goal)
+		}
+	}
 }
 }
 
 
 main :: proc() {
 main :: proc() {
@@ -410,8 +414,8 @@ main :: proc() {
 	test_gsub(&t)
 	test_gsub(&t)
 	test_gfind(&t)
 	test_gfind(&t)
 	test_frontier(&t)
 	test_frontier(&t)
-
-	// test_utf8(&t)
+	test_utf8(&t)
+	test_case_insensitive(&t)
 
 
 	fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {
 	if TEST_fail > 0 {