Browse Source

Merge pull request #1666 from Kelimion/improve_string_cut

[strings] Improve `cut`, add tests for it.
Jeroen van Rijn 3 years ago
parent
commit
6e701ef36d
2 changed files with 57 additions and 9 deletions
  1. 29 8
      core/strings/strings.odin
  2. 28 1
      tests/core/strings/test_core_strings.odin

+ 29 - 8
core/strings/strings.odin

@@ -286,9 +286,8 @@ concatenate :: proc(a: []string, allocator := context.allocator) -> string {
 }
 }
 
 
 /*
 /*
-
 	`rune_offset` and `rune_length` are in runes, not bytes.
 	`rune_offset` and `rune_length` are in runes, not bytes.
-	If `rune_length` <= 0, then it'll return the remainder of the string starting with `rune_offset`.
+	If `rune_length` <= 0, then it'll return the remainder of the string starting at `rune_offset`.
 
 
 	strings.cut("some example text", 0, 4) -> "some"
 	strings.cut("some example text", 0, 4) -> "some"
 	strings.cut("some example text", 2, 2) -> "me"
 	strings.cut("some example text", 2, 2) -> "me"
@@ -296,26 +295,48 @@ concatenate :: proc(a: []string, allocator := context.allocator) -> string {
 */
 */
 cut :: proc(s: string, rune_offset := int(0), rune_length := int(0), allocator := context.allocator) -> (res: string) {
 cut :: proc(s: string, rune_offset := int(0), rune_length := int(0), allocator := context.allocator) -> (res: string) {
 	s := s; rune_length := rune_length
 	s := s; rune_length := rune_length
-	l := utf8.rune_count_in_string(s)
+	context.allocator = allocator
 
 
-	if rune_offset >= l { return "" }
+	// If we signal that we want the entire remainder (length <= 0) *and*
+	// the offset is zero, then we can early out by cloning the input
 	if rune_offset == 0 && rune_length <= 0 {
 	if rune_offset == 0 && rune_length <= 0 {
-		return clone(s, allocator)
+		return clone(s)
+	}
+
+	// We need to know if we have enough runes to cover offset + length.
+	rune_count := utf8.rune_count_in_string(s)
+
+	// We're asking for a substring starting after the end of the input string.
+	// That's just an empty string.
+	if rune_offset >= rune_count {
+		return ""
 	}
 	}
-	if rune_length == 0 { rune_length = l }
 
 
+	// If we don't specify the length of the substring, use the remainder.
+	if rune_length <= 0 {
+		rune_length = rune_count - rune_offset
+	}
+
+	// We don't yet know how many bytes we need exactly.
+	// But we do know it's bounded by the number of runes * 4 bytes,
+	// and can be no more than the size of the input string.
 	bytes_needed := min(rune_length * 4, len(s))
 	bytes_needed := min(rune_length * 4, len(s))
-	buf := make([]u8, bytes_needed, allocator)
+	buf := make([]u8, bytes_needed)
 
 
 	byte_offset := 0
 	byte_offset := 0
-	for i := 0; i < l; i += 1 {
+	for i := 0; i < rune_count; i += 1 {
 		_, w := utf8.decode_rune_in_string(s)
 		_, w := utf8.decode_rune_in_string(s)
+
+		// If the rune is part of the substring, copy it to the output buffer.
 		if i >= rune_offset {
 		if i >= rune_offset {
 			for j := 0; j < w; j += 1 {
 			for j := 0; j < w; j += 1 {
 				buf[byte_offset+j] = s[j]
 				buf[byte_offset+j] = s[j]
 			}
 			}
 			byte_offset += w
 			byte_offset += w
 		}
 		}
+
+		// We're done if we reach the end of the input string, *or*
+		// if we've reached a specified length in runes.
 		if rune_length > 0 {
 		if rune_length > 0 {
 			if i == rune_offset + rune_length - 1 { break }
 			if i == rune_offset + rune_length - 1 { break }
 		}
 		}

+ 28 - 1
tests/core/strings/test_core_strings.odin

@@ -32,6 +32,7 @@ main :: proc() {
 	test_index_any_larger_string_not_found(&t)
 	test_index_any_larger_string_not_found(&t)
 	test_index_any_small_string_found(&t)
 	test_index_any_small_string_found(&t)
 	test_index_any_larger_string_found(&t)
 	test_index_any_larger_string_found(&t)
+	test_cut(&t)
 
 
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
 	if TEST_fail > 0 {
 	if TEST_fail > 0 {
@@ -42,7 +43,6 @@ main :: proc() {
 @test
 @test
 test_index_any_small_string_not_found :: proc(t: ^testing.T) {
 test_index_any_small_string_not_found :: proc(t: ^testing.T) {
 	index := strings.index_any(".", "/:\"")
 	index := strings.index_any(".", "/:\"")
-	log(t, index)
 	expect(t, index == -1, "index_any should be negative")
 	expect(t, index == -1, "index_any should be negative")
 }
 }
 
 
@@ -63,3 +63,30 @@ test_index_any_larger_string_found :: proc(t: ^testing.T) {
 	index := strings.index_any("aaaaaaaa:aaaaaaaa", "/:\"")
 	index := strings.index_any("aaaaaaaa:aaaaaaaa", "/:\"")
 	expect(t, index == 8, "index_any should be 8")
 	expect(t, index == 8, "index_any should be 8")
 }
 }
+
+Cut_Test :: struct {
+	input:  string,
+	offset: int,
+	length: int,
+	output: string,
+}
+
+cut_tests :: []Cut_Test{
+	{"some example text", 0, 4, "some"        },
+	{"some example text", 2, 2, "me"          },
+	{"some example text", 5, 7, "example"     },
+	{"some example text", 5, 0, "example text"},
+	{"恥ずべきフクロウ",        4, 0, "フクロウ"       },
+}
+
+@test
+test_cut :: proc(t: ^testing.T) {
+	for test in cut_tests {
+		res := strings.cut(test.input, test.offset, test.length)
+		defer delete(res)
+
+		msg := fmt.tprintf("cut(\"%v\", %v, %v) expected to return \"%v\", got \"%v\"",
+			test.input, test.offset, test.length, test.output, res)
+		expect(t, res == test.output, msg)
+	}
+}