123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- package utf8string
- import "core:unicode/utf8"
- import "base:runtime"
- import "base:builtin"
- String :: struct {
- contents: string,
- rune_count: int,
- // cached information
- non_ascii: int, // index to non-ascii code points
- width: int, // 0 if ascii
- byte_pos: int,
- rune_pos: int,
- }
- @(private)
- _len :: builtin.len // helper procedure
- init :: proc(s: ^String, contents: string) -> ^String {
- s.contents = contents
- s.byte_pos = 0
- s.rune_pos = 0
- for i in 0..<_len(contents) {
- if contents[i] >= utf8.RUNE_SELF {
- s.rune_count = utf8.rune_count_in_string(contents)
- _, s.width = utf8.decode_rune_in_string(contents)
- s.non_ascii = i
- return s
- }
- }
- s.rune_count = _len(contents)
- s.width = 0
- s.non_ascii = _len(contents)
- return s
- }
- to_string :: proc(s: ^String) -> string {
- return s.contents
- }
- len :: proc(s: ^String) -> int {
- return s.rune_count
- }
- is_ascii :: proc(s: ^String) -> bool {
- return s.width == 0
- }
- at :: proc(s: ^String, i: int, loc := #caller_location) -> (r: rune) {
- runtime.bounds_check_error_loc(loc, i, s.rune_count)
- if i < s.non_ascii {
- return rune(s.contents[i])
- }
- switch i {
- case 0:
- r, s.width = utf8.decode_rune_in_string(s.contents)
- s.rune_pos = 0
- s.byte_pos = 0
- return
- case s.rune_count-1:
- r, s.width = utf8.decode_rune_in_string(s.contents)
- s.rune_pos = i
- s.byte_pos = _len(s.contents) - s.width
- return
- case s.rune_pos-1:
- r, s.width = utf8.decode_rune_in_string(s.contents[0:s.byte_pos])
- s.rune_pos = i
- s.byte_pos -= s.width
- return
- case s.rune_pos+1:
- s.rune_pos = i
- s.byte_pos += s.width
- fallthrough
- case s.rune_pos:
- r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:])
- return
- }
- // Linear scan
- scan_forward := true
- if i < s.rune_pos {
- if i < (s.rune_pos-s.non_ascii)/2 {
- s.byte_pos, s.rune_pos = s.non_ascii, s.non_ascii
- } else {
- scan_forward = false
- }
- } else if i-s.rune_pos < (s.rune_count-s.rune_pos)/2 {
- // scan_forward = true
- } else {
- s.byte_pos, s.rune_pos = _len(s.contents), s.rune_count
- scan_forward = false
- }
- if scan_forward {
- for {
- r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:])
- if s.rune_pos == i {
- return
- }
- s.rune_pos += 1
- s.byte_pos += s.width
- }
- } else {
- for {
- r, s.width = utf8.decode_last_rune_in_string(s.contents[:s.byte_pos])
- s.rune_pos -= 1
- s.byte_pos -= s.width
- if s.rune_pos == i {
- return
- }
- }
- }
- }
- slice :: proc(s: ^String, i, j: int, loc := #caller_location) -> string {
- runtime.slice_expr_error_lo_hi_loc(loc, i, j, s.rune_count)
- if j < s.non_ascii {
- return s.contents[i:j]
- }
- if i == j {
- return ""
- }
- lo, hi: int
- if i < s.non_ascii {
- lo = i
- } else if i == s.rune_count {
- lo = _len(s.contents)
- } else {
- at(s, i, loc)
- lo = s.byte_pos
- }
- if j == s.rune_count {
- hi = _len(s.contents)
- } else {
- at(s, j, loc)
- hi = s.byte_pos
- }
- return s.contents[lo:hi]
- }
|