浏览代码

add proper unicode walking

skytrias 2 年之前
父节点
当前提交
3f4bbbec29
共有 2 个文件被更改,包括 308 次插入161 次删除
  1. 206 119
      core/text/lua/strlib.odin
  2. 102 42
      tests/core/text/lua/test_core_text_lua.odin

+ 206 - 119
core/text/lua/strlib.odin

@@ -1,5 +1,7 @@
 package strlib
 package strlib
 
 
+import "core:unicode"
+import "core:unicode/utf8"
 import "core:strings"
 import "core:strings"
 
 
 MAXCAPTURES :: 32
 MAXCAPTURES :: 32
@@ -10,7 +12,7 @@ Capture :: struct {
 }
 }
 
 
 Match :: struct {
 Match :: struct {
-	start, end: int,
+	byte_start, byte_end: int,
 }
 }
 
 
 Error :: enum {
 Error :: enum {
@@ -20,6 +22,7 @@ Error :: enum {
 	Invalid_Pattern_Capture,
 	Invalid_Pattern_Capture,
 	Unfinished_Capture,
 	Unfinished_Capture,
 	Malformed_Pattern,
 	Malformed_Pattern,
+	Rune_Error,
 }
 }
 
 
 L_ESC :: '%'
 L_ESC :: '%'
@@ -34,8 +37,8 @@ MatchState :: struct {
 	capture: [MAXCAPTURES]Capture,
 	capture: [MAXCAPTURES]Capture,
 }
 }
 
 
-match_class :: proc(c: u8, cl: u8) -> (res: bool) {
-	switch tolower(cl) {
+match_class :: proc(c: rune, cl: rune) -> (res: bool) {
+	switch unicode.to_lower(cl) {
 		case 'a': res = isalpha(c)
 		case 'a': res = isalpha(c)
 		case 'c': res = iscntrl(c)
 		case 'c': res = iscntrl(c)
 		case 'd': res = isdigit(c)
 		case 'd': res = isdigit(c)
@@ -52,53 +55,92 @@ match_class :: proc(c: u8, cl: u8) -> (res: bool) {
 	return islower(cl) ? res : !res
 	return islower(cl) ? res : !res
 }
 }
 
 
-isalpha :: proc(c: u8) -> bool {
-	return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
+isalpha :: proc(c: rune) -> bool {
+	return unicode.is_alpha(c)
 }
 }
 
 
-isdigit :: proc(c: u8) -> bool {
-	return '0' <= c && c <= '9'
+isdigit :: proc(c: rune) -> bool {
+	return unicode.is_digit(c)
 }
 }
 
 
-isalnum :: proc(c: u8) -> bool {
-	return isalpha(c) || isdigit(c)
+isalnum :: proc(c: rune) -> bool {
+	return unicode.is_alpha(c) || unicode.is_digit(c)
 }
 }
 
 
-iscntrl :: proc(c: u8) -> bool {
-	return c <= '\007' || (c >= '\010' && c <= '\017') || (c >= '\020' && c <= '\027') || (c >= '\030' && c <= '\037') || c == '\177'	
+iscntrl :: proc(c: rune) -> bool {
+	return unicode.is_control(c)
 }
 }
 
 
-islower :: proc(c: u8) -> bool {
-	return c >= 'a' && c <= 'z'
+islower :: proc(c: rune) -> bool {
+	return unicode.is_lower(c)
 }
 }
 
 
-isupper :: proc(c: u8) -> bool {
-	return c >= 'A' && c <= 'Z'
+isupper :: proc(c: rune) -> bool {
+	return unicode.is_upper(c)
 }
 }
 
 
-isgraph :: proc(c: u8) -> bool {
-	return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
+isgraph :: proc(c: rune) -> bool {
+	return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
 }
 }
 
 
-ispunct :: proc(c: u8) -> bool {
-	return (c >= '{' && c <= '~') || (c == '`') || (c >= '[' && c <= '_') || (c == '@') || (c >= ':' && c <= '?') || (c >= '(' && c <= '/') || (c >= '!' && c <= '\'')
+ispunct :: proc(c: rune) -> bool {
+	return unicode.is_punct(c)
 }
 }
 
 
-isxdigit :: proc(c: u8) -> bool {
-	return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
+isxdigit :: proc(c: rune) -> bool {
+	return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
 }
 }
 
 
-isspace :: proc(c: u8) -> bool {
-	return c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || c == ' '
+isspace :: proc(c: rune) -> bool {
+	return unicode.is_space(c)
 }
 }
 
 
-// ascii safe
-tolower :: proc(c: u8) -> u8 {
-	if c >= 65 && c <= 90 { // upper case
-		return c + 32
+utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) {
+	c, size = utf8.decode_rune_in_string(bytes)
+
+	if c == utf8.RUNE_ERROR {
+		err = .Rune_Error
+	}
+
+	return
+}
+
+utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) {
+	size: int
+	c, size = utf8.decode_rune_in_string(bytes[index^:])
+
+	if c == utf8.RUNE_ERROR {
+		err = .Rune_Error
+	}
+
+	index^ += size
+	return
+}
+
+// continuation byte?
+is_cont :: proc(b: byte) -> bool {
+	return b & 0xc0 == 0x80
+}
+
+utf8_prev :: proc(bytes: string, a, b: int) -> int {
+	b := b
+
+	for a < b && is_cont(bytes[b - 1]) {
+		b -= 1
+	}
+
+	return a < b ? b - 1 : a
+}
+
+utf8_next :: proc(bytes: string, a: int) -> int {
+	a := a
+	b := len(bytes)
+
+	for a < b - 1 && is_cont(bytes[a + 1]) {
+		a += 1
 	}
 	}
 
 
-	return c
+	return a < b ? a + 1 : b
 }
 }
 
 
 check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) {
 check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) {
@@ -125,54 +167,52 @@ capture_to_close :: proc(ms: ^MatchState) -> (int, Error) {
 	return 0, .Invalid_Pattern_Capture
 	return 0, .Invalid_Pattern_Capture
 }
 }
 
 
-classend :: proc(ms: ^MatchState, p: int) -> (int, Error) {
-	ch := ms.pattern[p]
-	p := p + 1
+classend :: proc(ms: ^MatchState, p: int) -> (step: int, err: Error) {
+	step = p
+	ch := utf8_advance(ms.pattern, &step) or_return
 
 
 	switch ch {
 	switch ch {
 		case L_ESC: {
 		case L_ESC: {
-			// if  > 0 {
-			// 	fmt.eprintln("ERR classend: not enough pattern length")
-			// 	return nil
-			// }
+			if step == len(ms.pattern) {
+				err = .Malformed_Pattern
+				return
+			}
 
 
-			return p + 1, .OK
+			utf8_advance(ms.pattern, &step) or_return
 		}
 		}
 
 
 		case '[': {
 		case '[': {
-			if ms.pattern[p] == '^' {
-				p += 1
+			// fine with step by 1
+			if ms.pattern[step] == '^' {
+				step += 1
 			}
 			}
 
 
-			for ms.pattern[p] != ']' {
-				// if p == len(ms.pattern) {
-				// 	return 0, .Malformed_Pattern
-				// }
+			// run till end is reached
+			for ms.pattern[step] != ']' {
+				if step == len(ms.pattern) {
+					err = .Malformed_Pattern
+					return
+				}
 
 
-				ch := ms.pattern[p]
-				p += 1
+				// dont care about utf8 here
+				step += 1
 
 
-				if p < len(ms.pattern) && ch == L_ESC {
+				if step < len(ms.pattern) && ms.pattern[step] == L_ESC {
 					// skip escapes like '%'
 					// skip escapes like '%'
-					p += 1
+					step += 1
 				}
 				}
-
-				// if ms.pattern[p] == ']' {
-				// 	break
-				// }
 			}
 			}
 
 
-			return p + 1, .OK
-		}
-
-		case: {
-			return p, .OK
+			// advance last time
+			step += 1
 		}
 		}
 	}
 	}
+
+	return
 }
 }
 
 
-matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool {
-	sig := true
+matchbracketclass :: proc(ms: ^MatchState, c: rune, p, ec: int) -> (sig: bool, err: Error) {
+	sig = true
 	p := p
 	p := p
 
 
 	if ms.pattern[p + 1] == '^' {
 	if ms.pattern[p + 1] == '^' {
@@ -180,98 +220,127 @@ matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool {
 		sig = false
 		sig = false
 	}
 	}
 
 
-	p += 1
-
 	// while inside of class range
 	// while inside of class range
 	for p < ec {
 	for p < ec {
-		ch := ms.pattern[p]
+		char := utf8_advance(ms.pattern, &p) or_return
 
 
 		// e.g. %a
 		// e.g. %a
-		if ms.pattern[p] == L_ESC { 
-			p += 1
+		if char == L_ESC { 
+			next := utf8_advance(ms.pattern, &p) or_return
 
 
-			if match_class(c, ms.pattern[p]) {
-				return sig
-			}
-		} else if p + 2 < len(ms.pattern) && ms.pattern[p + 1] == '-' {
-			// e.g. [a-z] check
-			if ms.pattern[p] <= c && c <= ms.pattern[p + 2] {
-				return sig
+			if match_class(c, next) {
+				return
 			}
 			}
+		} else {
+			next, next_size := utf8_peek(ms.pattern[p:]) or_return
 
 
-			p += 2 
-		} else if ms.pattern[p] == c {
-			return sig
-		}
+			// TODO test case for [a-???] where ??? is missing
+			if next == '-' && p + next_size < len(ms.pattern) {
+				// advance 2 codepoints
+				p += next_size
+				last := utf8_advance(ms.pattern, &p) or_return
 
 
-		p += 1
+				if char <= c && c <= last {
+					return
+				}
+			} else if char == c {
+				return
+			}
+		}
 	}
 	}
 
 
-	return !sig
+	sig = !sig
+	return
 }
 }
 
 
-singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool {
+singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) {
 	if s >= len(ms.src) {
 	if s >= len(ms.src) {
-		return false
+		return
 	}
 	}
 
 
-	switch ms.pattern[p] {
-		case '.': return true
-		case L_ESC: return match_class(ms.src[s], ms.pattern[p + 1])
-		case '[': return matchbracketclass(ms, ms.src[s], p, ep - 1)
-		case: return ms.src[s] == ms.pattern[p]
+	pchar, psize := utf8_peek(ms.pattern[p:]) or_return
+	schar, ssize := utf8_peek(ms.src[s:]) or_return
+	schar_size = ssize
+
+	switch pchar {
+		case '.': matched = true
+		case L_ESC: {
+			pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return
+			matched = match_class(schar, pchar_next)
+		}
+		case '[': {
+			matched = matchbracketclass(ms, schar, p, ep - 1) or_return
+		}
+		case: {
+			matched = schar == pchar
+		}
 	}
 	}
+
+	return
 }
 }
 
 
-matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) {
+matchbalance :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 	if p >= len(ms.pattern) - 1 {
 	if p >= len(ms.pattern) - 1 {
 		return INVALID, .Invalid_Pattern_Capture
 		return INVALID, .Invalid_Pattern_Capture
 	}
 	}
 
 
+	schar, ssize := utf8_peek(ms.src[s:]) or_return
+	pchar, psize := utf8_peek(ms.pattern[p:]) or_return
+
 	// skip until the src and pattern match
 	// skip until the src and pattern match
-	if ms.src[s] != ms.pattern[p] {
+	if schar != pchar {
 		return INVALID, .OK
 		return INVALID, .OK
 	}
 	}
 
 
 	s_begin := s
 	s_begin := s
 	cont := 1
 	cont := 1
-	s := s + 1
-	begin := ms.pattern[p]
-	end := ms.pattern[p + 1]
+	s := s + ssize
+	begin := pchar
+	end, _ := utf8_peek(ms.pattern[p + psize:]) or_return
 
 
 	for s < len(ms.src) {
 	for s < len(ms.src) {
-		ch := ms.src[s]
+		ch := utf8_advance(ms.src, &s) or_return
 
 
 		if ch == end {
 		if ch == end {
 			cont -= 1
 			cont -= 1
 
 
 			if cont == 0 {
 			if cont == 0 {
-				return s + 1, .OK
+				return s, .OK
 			}
 			}
 		} else if ch == begin {
 		} else if ch == begin {
 			cont += 1
 			cont += 1
 		}
 		}
-
-		s += 1
 	}
 	}
 
 
 	return INVALID, .OK
 	return INVALID, .OK
 }
 }
 
 
 max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
 max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
-	i := 0
-	for singlematch(ms, s + i, p, ep) {
-		i += 1
+	m := s
+
+	// count up matches
+	for {
+		matched, size := singlematch(ms, m, p, ep) or_return
+		
+		if !matched {
+			break
+		}
+
+		m += size
 	}
 	}
 
 
-	for i >= 0 {
-		result := match(ms, s + i, ep + 1) or_return
+	for s <= m {
+		result := match(ms, m, ep + 1) or_return
 
 
 		if result != INVALID {
 		if result != INVALID {
 			return result, .OK
 			return result, .OK
 		}
 		}
 
 
-		i -= 1
+		if s == m {
+			break
+		}
+
+		m = utf8_prev(ms.src, s, m)
 	}
 	}
 
 
 	return INVALID, .OK
 	return INVALID, .OK
@@ -285,10 +354,15 @@ min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
 
 
 		if result != INVALID {
 		if result != INVALID {
 			return result, .OK
 			return result, .OK
-		} else if singlematch(ms, s, p, ep) {
-			s += 1
 		} else {
 		} else {
-			return INVALID, .OK
+			// TODO receive next step maybe?
+			matched, rune_size := singlematch(ms, s, p, ep) or_return
+
+			if matched {
+				s += rune_size
+			} else {
+				return INVALID, .OK
+			}
 		}
 		}
 	}
 	}
 }
 }
@@ -339,7 +413,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 		return s, .OK
 		return s, .OK
 	}
 	}
 
 
-	switch ms.pattern[p] {
+	// NOTE we can walk by ascii steps if we know the characters are ascii
+	char, _ := utf8_peek(ms.pattern[p:]) or_return
+	switch char {
 		case '(': {
 		case '(': {
 			if ms.pattern[p + 1] == ')' {
 			if ms.pattern[p + 1] == ')' {
 				s = start_capture(ms, s, p + 2, CAP_POSITION) or_return
 				s = start_capture(ms, s, p + 2, CAP_POSITION) or_return
@@ -389,13 +465,23 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 					}
 					}
 
 
 					ep := classend(ms, p) or_return
 					ep := classend(ms, p) or_return
-					previous := s == 0 ? '\x00' : ms.src[s - 1]
-					// allow last character to count too
-					current := s >= len(ms.src) ? '\x00' : ms.src[s]
+					previous, current: rune
+
+					// get previous
+					if s != 0 {
+						temp := utf8_prev(ms.src, 0, s)
+						previous, _ = utf8_peek(ms.src[temp:]) or_return
+					}
+
+					// get current
+					if s != len(ms.src) {
+						current, _ = utf8_peek(ms.src[s:]) or_return
+					}
+
+					m1 := matchbracketclass(ms, previous, p, ep - 1) or_return
+					m2 := matchbracketclass(ms, current, p, ep - 1) or_return
 
 
-					// fmt.eprintln("TRY", rune(ms.src[s]), ep)
-					if !matchbracketclass(ms, previous, p, ep - 1) && 
-						matchbracketclass(ms, current, p, ep - 1) {
+					if !m1 && m2 {
 						return match(ms, s, ep)
 						return match(ms, s, ep)
 					}
 					}
 
 
@@ -428,8 +514,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 	s := s
 	s := s
 	ep := classend(ms, p) or_return
 	ep := classend(ms, p) or_return
+	single_matched, ssize := singlematch(ms, s, p, ep) or_return
 
 
-	if !singlematch(ms, s, p, ep) {
+	if !single_matched {
 		epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
 		epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
 
 
 		if epc == '*' || epc == '?' || epc == '-' {
 		if epc == '*' || epc == '?' || epc == '-' {
@@ -442,7 +529,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 
 
 		switch epc {
 		switch epc {
 			case '?': {
 			case '?': {
-				result := match(ms, s + 1, ep + 1) or_return
+				result := match(ms, s + ssize, ep + 1) or_return
 				
 				
 				if result != INVALID {
 				if result != INVALID {
 					s = result
 					s = result
@@ -452,7 +539,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 			}
 			}
 
 
 			case '+': {
 			case '+': {
-				s = max_expand(ms, s + 1, p, ep) or_return
+				s = max_expand(ms, s + ssize, p, ep) or_return
 			}
 			}
 
 
 			case '*': {
 			case '*': {
@@ -464,7 +551,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
 			}
 			}
 
 
 			case: {
 			case: {
-				return match(ms, s + 1, ep)
+				return match(ms, s + ssize, ep)
 			}
 			}
 		}
 		}
 	}
 	}
@@ -643,8 +730,8 @@ gmatch :: proc(
 			ok = true
 			ok = true
 			first := length > 1 ? 1 : 0
 			first := length > 1 ? 1 : 0
 			cap := captures[first]
 			cap := captures[first]
-			res = haystack[cap.start:cap.end]
-			haystack^ = haystack[cap.end:]
+			res = haystack[cap.byte_start:cap.byte_end]
+			haystack^ = haystack[cap.byte_end:]
 		}
 		}
 	} 
 	} 
 
 
@@ -677,13 +764,13 @@ gsub_builder :: proc(
 		cap := captures[0]
 		cap := captures[0]
 
 
 		// write front till capture
 		// write front till capture
-		strings.write_string(builder, haystack[:cap.start])
+		strings.write_string(builder, haystack[:cap.byte_start])
 
 
 		// write replacements
 		// write replacements
 		strings.write_string(builder, replace)
 		strings.write_string(builder, replace)
 
 
 		// advance string till end
 		// advance string till end
-		haystack = haystack[cap.end:]
+		haystack = haystack[cap.byte_end:]
 	}
 	}
 
 
 	strings.write_string(builder, haystack[:])
 	strings.write_string(builder, haystack[:])
@@ -722,11 +809,11 @@ gsub_with :: proc(
 
 
 		cap := captures[0]
 		cap := captures[0]
 
 
-		word := haystack[cap.start:cap.end]
+		word := haystack[cap.byte_start:cap.byte_end]
 		call(data, word)
 		call(data, word)
 
 
 		// advance string till end
 		// advance string till end
-		haystack = haystack[cap.end:]
+		haystack = haystack[cap.byte_end:]
 	}
 	}
 }
 }
 
 
@@ -744,8 +831,8 @@ gfind :: proc(
 		if length != 0 && err == .OK {
 		if length != 0 && err == .OK {
 			ok = true
 			ok = true
 			cap := captures[0]
 			cap := captures[0]
-			res = haystack[cap.start:cap.end]
-			haystack^ = haystack[cap.end:]
+			res = haystack[cap.byte_start:cap.byte_end]
+			haystack^ = haystack[cap.byte_end:]
 		}
 		}
 	} 
 	} 
 
 

+ 102 - 42
tests/core/text/lua/test_core_text_lua.odin

@@ -4,21 +4,28 @@ import lua "core:text/lua"
 import "core:testing"
 import "core:testing"
 import "core:fmt"
 import "core:fmt"
 import "core:os"
 import "core:os"
+import "core:io"
 
 
 TEST_count: int
 TEST_count: int
 TEST_fail: int
 TEST_fail: int
 
 
-when ODIN_TEST {
-	expect  :: testing.expect
-} else {
-	expect  :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
-		TEST_count += 1
-		if !condition {
-			TEST_fail += 1
-			fmt.printf("%v %v\n", loc, message)
-			return
-		}
+// inline expect with custom props
+failed :: proc(t: ^testing.T, ok: bool, loc := #caller_location) -> bool {
+	TEST_count += 1
+	
+	if !ok {
+		fmt.wprintf(t.w, "%v: ", loc)
+		t.error_count += 1	
+		TEST_fail += 1
 	}
 	}
+
+	return !ok
+}
+
+expect :: testing.expect
+
+logf :: proc(t: ^testing.T, format: string, args: ..any) {
+	fmt.wprintf(t.w, format, ..args)
 }
 }
 
 
 // find correct byte offsets 
 // find correct byte offsets 
@@ -55,20 +62,20 @@ test_find :: proc(t: ^testing.T) {
 	}
 	}
 
 
 	captures: [lua.MAXCAPTURES]lua.Match
 	captures: [lua.MAXCAPTURES]lua.Match
-	for entry in ENTRIES {
+	for entry, i in ENTRIES {
 		captures[0] = {}
 		captures[0] = {}
 		length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures)
 		length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures)
 		cap := captures[0]
 		cap := captures[0]
 		ok := length > 0 && err == .OK
 		ok := length > 0 && err == .OK
-		success := entry.match.ok == ok && entry.match.start == cap.start && entry.match.end == cap.end 
-
-		if !success {
-			fmt.eprintf("Find failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p)
+		success := entry.match.ok == ok && entry.match.start == cap.byte_start && entry.match.end == cap.byte_end 
+
+		if failed(t, success) {
+			logf(t, "Find %d failed!\n", i)
+			logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
+			logf(t, "\tSTART: %d == %d?\n", entry.match.start, cap.byte_start)
+			logf(t, "\tEND: %d == %d?\n", entry.match.end, cap.byte_end)
+			logf(t, "\tErr: %v\tLength %d\n", err, length)			
 		}
 		}
-
-		expect(t, entry.match.start == cap.start, "match start didnt match")
-		expect(t, entry.match.end == cap.end, "match end didnt match",)
-		expect(t, entry.match.ok == ok, "find result didnt match")
 	}
 	}
 }
 }
 
 
@@ -179,16 +186,15 @@ test_match :: proc(t: ^testing.T) {
 		captures[0] = {}
 		captures[0] = {}
 		length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures)
 		length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures)
 		ok := length > 0 && err == .OK
 		ok := length > 0 && err == .OK
-		result := entry.s[captures[0].start:captures[0].end]
+		result := entry.s[captures[0].byte_start:captures[0].byte_end]
 		success := entry.ok == ok && result == entry.result
 		success := entry.ok == ok && result == entry.result
 
 
-		if !success {
-			fmt.eprintf("Match failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p)
-			fmt.eprintf("Match invalid result! |WANTED:| %s |GOT:| %s\n", entry.result, result)
+		if failed(t, success) {
+			logf(t, "Match %d failed!\n", i)
+			logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
+			logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result)
+			logf(t, "\tErr: %v\tLength %d\n", err, length)
 		}
 		}
-
-		expect(t, entry.ok == ok, "find result didnt match")
-		expect(t, result == entry.result, "entry result didnt match")
 	}
 	}
 }
 }
 
 
@@ -202,12 +208,17 @@ test_captures :: proc(t: ^testing.T) {
 	// match all captures
 	// match all captures
 	compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) {
 	compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) {
 		length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures)
 		length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures)
-		expect(t, len(comp) == length, "didnt match input comparison strings", loc)
+		if failed(t, len(comp) == length) {
+			logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length)
+		}
 
 
 		for i in 0..<length {
 		for i in 0..<length {
 			cap := test.captures[i]
 			cap := test.captures[i]
-			text := haystack[cap.start:cap.end]
-			expect(t, comp[i] == text, "didnt match comparison string", loc)
+			text := haystack[cap.byte_start:cap.byte_end]
+
+			if failed(t, comp[i] == text) {
+				logf(t, "Capture don't equal -> %s != %s\n", comp[i], text)
+			}
 		}
 		}
 	}
 	}
 
 
@@ -215,7 +226,12 @@ test_captures :: proc(t: ^testing.T) {
 	matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) {
 	matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) {
 		length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures)
 		length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures)
 		result := length > 0 && err == .OK
 		result := length > 0 && err == .OK
-		expect(t, result == ok, "result didnt eq", loc)
+
+		if failed(t, result == ok) {
+			logf(t, "Capture match failed!\n")
+			logf(t, "\tErr: %v\n", err)
+			logf(t, "\tLength: %v\n", length)
+		}
 	}
 	}
 
 
 	temp := Temp { pattern = "(one).+" }
 	temp := Temp { pattern = "(one).+" }
@@ -238,8 +254,8 @@ test_captures :: proc(t: ^testing.T) {
 		lua.find_aux(haystack, pattern, 0, false, &captures)
 		lua.find_aux(haystack, pattern, 0, false, &captures)
 		cap1 := captures[1]
 		cap1 := captures[1]
 		cap2 := captures[2]
 		cap2 := captures[2]
-		text1 := haystack[cap1.start:cap1.end]
-		text2 := haystack[cap2.start:cap2.end]
+		text1 := haystack[cap1.byte_start:cap1.byte_end]
+		text2 := haystack[cap2.byte_start:cap2.byte_end]
 		expect(t, text1 == "233", "Multi-Capture failed at 1")
 		expect(t, text1 == "233", "Multi-Capture failed at 1")
 		expect(t, text2 == "hello", "Multi-Capture failed at 2")
 		expect(t, text2 == "hello", "Multi-Capture failed at 2")
 	}
 	}
@@ -247,6 +263,13 @@ test_captures :: proc(t: ^testing.T) {
 
 
 @test
 @test
 test_gmatch :: proc(t: ^testing.T) {
 test_gmatch :: proc(t: ^testing.T) {
+	gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) {
+		if failed(t, a[index] == b) {
+			logf(t, "GMATCH %d failed!\n", index)
+			logf(t, "\t%s != %s\n", a[index], b)
+		}
+	}
+
 	{
 	{
 		haystack := "testing this out 123"
 		haystack := "testing this out 123"
 		pattern := "%w+"
 		pattern := "%w+"
@@ -256,7 +279,7 @@ test_gmatch :: proc(t: ^testing.T) {
 		index: int
 		index: int
 
 
 		for match in lua.gmatch(s, pattern, &captures) {
 		for match in lua.gmatch(s, pattern, &captures) {
-			expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match))
+			gmatch_check(t, index, output[:], match)
 			index += 1
 			index += 1
 		}
 		}
 	}
 	}
@@ -270,7 +293,7 @@ test_gmatch :: proc(t: ^testing.T) {
 		index: int
 		index: int
 
 
 		for match in lua.gmatch(s, pattern, &captures) {
 		for match in lua.gmatch(s, pattern, &captures) {
-			expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match))
+			gmatch_check(t, index, output[:], match)
 			index += 1
 			index += 1
 		}
 		}
 	}
 	}
@@ -284,7 +307,7 @@ test_gmatch :: proc(t: ^testing.T) {
 		index: int
 		index: int
 
 
 		for match in lua.gmatch(s, pattern, &captures) {
 		for match in lua.gmatch(s, pattern, &captures) {
-			expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match))
+			gmatch_check(t, index, output[:], match)
 			index += 1
 			index += 1
 		}
 		}
 	}		
 	}		
@@ -308,11 +331,15 @@ test_gfind :: proc(t: ^testing.T) {
 	index: int
 	index: int
 
 
 	for word in lua.gfind(s, pattern, &captures) {
 	for word in lua.gfind(s, pattern, &captures) {
-		expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word))
+		if failed(t, output[index] == word) {
+			logf(t, "GFIND %d failed!\n", index)
+			logf(t, "\t%s != %s\n", output[index], word)
+		}
 		index += 1
 		index += 1
 	}
 	}
 }
 }
 
 
+@test
 test_frontier :: proc(t: ^testing.T) {
 test_frontier :: proc(t: ^testing.T) {
 	Temp :: struct {
 	Temp :: struct {
 		t: ^testing.T,
 		t: ^testing.T,
@@ -322,11 +349,12 @@ test_frontier :: proc(t: ^testing.T) {
 	
 	
 	call :: proc(data: rawptr, word: string) {
 	call :: proc(data: rawptr, word: string) {
 		temp := cast(^Temp) data
 		temp := cast(^Temp) data
-		expect(
-			temp.t, 
-			word == temp.output[temp.index], 
-			fmt.tprintf("frontier temp didnt match: %s != %s\n", word, temp.output[temp.index]),
-		)
+
+		if failed(temp.t, word == temp.output[temp.index]) {
+			logf(temp.t, "GSUB_WITH %d failed!\n", temp.index)
+			logf(temp.t, "\t%s != %s\n", temp.output[temp.index], word)			
+		}
+
 		temp.index += 1
 		temp.index += 1
 	}
 	}
 
 
@@ -343,8 +371,38 @@ test_frontier :: proc(t: ^testing.T) {
 	lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
 	lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
 }
 }
 
 
+@test
+test_utf8 :: proc(t: ^testing.T) {
+	// {
+	// 	haystack := "恥ずべき恥フク恥ロ"
+	// 	s := &haystack
+	// 	captures: [lua.MAXCAPTURES]lua.Match
+
+	// 	for word in lua.gmatch(s, "恥", &captures) {
+	// 		fmt.eprintln(word)
+	// 	}
+	// }
+
+	{
+		haystack := "恥ずべき恥フク恥ロ"
+		s := &haystack
+		captures: [lua.MAXCAPTURES]lua.Match
+
+		for word in lua.gmatch(s, "w+", &captures) {
+			fmt.eprintln(word)
+		}
+	}
+
+	// captures: [MAXCAPTURES]Match
+	// length, err := lua.find_aux("damn, pattern,)
+}
+
 main :: proc() {
 main :: proc() {
 	t: testing.T
 	t: testing.T
+	stream := os.stream_from_handle(os.stdout)
+	w := io.to_writer(stream)
+	t.w = w
+	
 	test_find(&t)
 	test_find(&t)
 	test_match(&t)
 	test_match(&t)
 	test_captures(&t)
 	test_captures(&t)
@@ -353,7 +411,9 @@ main :: proc() {
 	test_gfind(&t)
 	test_gfind(&t)
 	test_frontier(&t)
 	test_frontier(&t)
 
 
-	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+	// test_utf8(&t)
+
+	fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {
 	if TEST_fail > 0 {
 		os.exit(1)
 		os.exit(1)
 	}
 	}