Browse Source

Merge pull request #4192 from laytan/strings-substring-rune-wise

strings: add `substring`, `substring_to` and `substring_from`
Jeroen van Rijn 11 months ago
parent
commit
645207b8b0
2 changed files with 135 additions and 1 deletions
  1. 103 0
      core/strings/strings.odin
  2. 32 1
      tests/core/strings/test_core_strings.odin

+ 103 - 0
core/strings/strings.odin

@@ -3327,3 +3327,106 @@ levenshtein_distance :: proc(a, b: string, allocator := context.allocator, loc :
 
 	return costs[n], nil
 }
+
+@(private)
+internal_substring :: proc(s: string, rune_start: int, rune_end: int) -> (sub: string, ok: bool) {
+	sub = s
+	ok  = true
+
+	rune_i: int
+
+	if rune_start > 0 {
+		ok = false
+		for _, i in sub {
+			if rune_start == rune_i {
+				ok = true
+				sub = sub[i:]
+				break
+			}
+			rune_i += 1
+		}
+		if !ok { return }
+	}
+
+	if rune_end >= rune_start {
+		ok = false
+		for _, i in sub {
+			if rune_end == rune_i {
+				ok = true
+				sub = sub[:i]
+				break
+			}
+			rune_i += 1
+		}
+
+		if rune_end == rune_i {
+			ok = true
+		}
+	}
+
+	return
+}
+
+/*
+Returns a substring of `s` that starts at rune index `rune_start` and goes up to `rune_end`.
+
+Think of it as slicing `s[rune_start:rune_end]` but rune-wise.
+
+Inputs:
+- s: the string to substring
+- rune_start: the start (inclusive) rune
+- rune_end: the end (exclusive) rune
+
+Returns:
+- sub: the substring
+- ok: whether the rune indexes where in bounds of the original string
+*/
+substring :: proc(s: string, rune_start: int, rune_end: int) -> (sub: string, ok: bool) {
+	if rune_start < 0 || rune_end < 0 || rune_end < rune_start {
+		return
+	}
+
+	return internal_substring(s, rune_start, rune_end)
+}
+
+/*
+Returns a substring of `s` that starts at rune index `rune_start` and goes up to the end of the string.
+
+Think of it as slicing `s[rune_start:]` but rune-wise.
+
+Inputs:
+- s: the string to substring
+- rune_start: the start (inclusive) rune
+
+Returns:
+- sub: the substring
+- ok: whether the rune indexes where in bounds of the original string
+*/
+substring_from :: proc(s: string, rune_start: int) -> (sub: string, ok: bool) {
+	if rune_start < 0 {
+		return
+	}
+
+	return internal_substring(s, rune_start, -1)
+}
+
+/*
+Returns a substring of `s` that goes up to rune index `rune_end`.
+
+Think of it as slicing `s[:rune_end]` but rune-wise.
+
+Inputs:
+- s: the string to substring
+- rune_end: the end (exclusive) rune
+
+Returns:
+- sub: the substring
+- ok: whether the rune indexes where in bounds of the original string
+*/
+substring_to :: proc(s: string, rune_end: int) -> (sub: string, ok: bool) {
+	if rune_end < 0 {
+		return
+	}
+
+	return internal_substring(s, -1, rune_end)
+}

+ 32 - 1
tests/core/strings/test_core_strings.odin

@@ -124,4 +124,35 @@ test_case_conversion :: proc(t: ^testing.T) {
 			testing.expectf(t, result == entry.s, "ERROR: Input `{}` to converter {} does not match `{}`, got `{}`.\n", test_case.s, case_kind, entry.s, result)
 		}
 	}
-}
+}
+
+@(test)
+test_substring :: proc(t: ^testing.T) {
+	Case :: struct {
+		s:     string,
+		start: int,
+		end:   int,
+		sub:   string,
+		ok:    bool,
+	}
+	cases := []Case {
+		{ok = true},
+		{s = "", start = -1, ok = false},
+		{s = "", end = -1, ok = false},
+		{s = "", end = +1, ok = false},
+		{s = "Hello", end = len("Hello"), sub = "Hello", ok = true},
+		{s = "Hello", start = 1, end = len("Hello"), sub = "ello", ok = true},
+		{s = "Hello", start = 1, end = len("Hello") - 1, sub = "ell", ok = true},
+		{s = "Hello", end = len("Hello") + 1, sub = "Hello", ok = false},
+		{s = "小猫咪", start = 0, end = 3, sub = "小猫咪", ok = true},
+		{s = "小猫咪", start = 1, end = 3, sub = "猫咪", ok = true},
+		{s = "小猫咪", start = 1, end = 5, sub = "猫咪", ok = false},
+		{s = "小猫咪", start = 1, end = 1, sub = "", ok = true},
+	}
+
+	for tc in cases {
+		sub, ok := strings.substring(tc.s, tc.start, tc.end)
+		testing.expectf(t, ok == tc.ok, "expected %v[%v:%v] to return ok: %v", tc.s, tc.start, tc.end, tc.ok)
+		testing.expectf(t, sub == tc.sub, "expected %v[%v:%v] to return sub: %v, got: %v", tc.s, tc.start, tc.end, tc.sub, sub)
+	}
+}