string.odin 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. package utf8string
  2. import "core:unicode/utf8"
  3. import "base:runtime"
  4. import "base:builtin"
  5. String :: struct {
  6. contents: string,
  7. rune_count: int,
  8. // cached information
  9. non_ascii: int, // index to non-ascii code points
  10. width: int, // 0 if ascii
  11. byte_pos: int,
  12. rune_pos: int,
  13. }
  14. @(private)
  15. _len :: builtin.len // helper procedure
  16. init :: proc(s: ^String, contents: string) -> ^String {
  17. s.contents = contents
  18. s.byte_pos = 0
  19. s.rune_pos = 0
  20. for i in 0..<_len(contents) {
  21. if contents[i] >= utf8.RUNE_SELF {
  22. s.rune_count = utf8.rune_count_in_string(contents)
  23. _, s.width = utf8.decode_rune_in_string(contents)
  24. s.non_ascii = i
  25. return s
  26. }
  27. }
  28. s.rune_count = _len(contents)
  29. s.width = 0
  30. s.non_ascii = _len(contents)
  31. return s
  32. }
  33. to_string :: proc(s: ^String) -> string {
  34. return s.contents
  35. }
  36. len :: proc(s: ^String) -> int {
  37. return s.rune_count
  38. }
  39. is_ascii :: proc(s: ^String) -> bool {
  40. return s.width == 0
  41. }
  42. at :: proc(s: ^String, i: int, loc := #caller_location) -> (r: rune) {
  43. runtime.bounds_check_error_loc(loc, i, s.rune_count)
  44. if i < s.non_ascii {
  45. return rune(s.contents[i])
  46. }
  47. switch i {
  48. case 0:
  49. r, s.width = utf8.decode_rune_in_string(s.contents)
  50. s.rune_pos = 0
  51. s.byte_pos = 0
  52. return
  53. case s.rune_count-1:
  54. r, s.width = utf8.decode_rune_in_string(s.contents)
  55. s.rune_pos = i
  56. s.byte_pos = _len(s.contents) - s.width
  57. return
  58. case s.rune_pos-1:
  59. r, s.width = utf8.decode_rune_in_string(s.contents[0:s.byte_pos])
  60. s.rune_pos = i
  61. s.byte_pos -= s.width
  62. return
  63. case s.rune_pos+1:
  64. s.rune_pos = i
  65. s.byte_pos += s.width
  66. fallthrough
  67. case s.rune_pos:
  68. r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:])
  69. return
  70. }
  71. // Linear scan
  72. scan_forward := true
  73. if i < s.rune_pos {
  74. if i < (s.rune_pos-s.non_ascii)/2 {
  75. s.byte_pos, s.rune_pos = s.non_ascii, s.non_ascii
  76. } else {
  77. scan_forward = false
  78. }
  79. } else if i-s.rune_pos < (s.rune_count-s.rune_pos)/2 {
  80. // scan_forward = true
  81. } else {
  82. s.byte_pos, s.rune_pos = _len(s.contents), s.rune_count
  83. scan_forward = false
  84. }
  85. if scan_forward {
  86. for {
  87. r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:])
  88. if s.rune_pos == i {
  89. return
  90. }
  91. s.rune_pos += 1
  92. s.byte_pos += s.width
  93. }
  94. } else {
  95. for {
  96. r, s.width = utf8.decode_last_rune_in_string(s.contents[:s.byte_pos])
  97. s.rune_pos -= 1
  98. s.byte_pos -= s.width
  99. if s.rune_pos == i {
  100. return
  101. }
  102. }
  103. }
  104. }
  105. slice :: proc(s: ^String, i, j: int, loc := #caller_location) -> string {
  106. runtime.slice_expr_error_lo_hi_loc(loc, i, j, s.rune_count)
  107. if j < s.non_ascii {
  108. return s.contents[i:j]
  109. }
  110. if i == j {
  111. return ""
  112. }
  113. lo, hi: int
  114. if i < s.non_ascii {
  115. lo = i
  116. } else if i == s.rune_count {
  117. lo = _len(s.contents)
  118. } else {
  119. at(s, i, loc)
  120. lo = s.byte_pos
  121. }
  122. if j == s.rune_count {
  123. hi = _len(s.contents)
  124. } else {
  125. at(s, j, loc)
  126. hi = s.byte_pos
  127. }
  128. return s.contents[lo:hi]
  129. }