utf16.odin 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. package utf16
  2. import "core:unicode/utf8"
  3. REPLACEMENT_CHAR :: '\ufffd'
  4. MAX_RUNE :: '\U0010ffff'
  5. _surr1 :: 0xd800
  6. _surr2 :: 0xdc00
  7. _surr3 :: 0xe000
  8. _surr_self :: 0x10000
  9. is_surrogate :: proc(r: rune) -> bool {
  10. return _surr1 <= r && r < _surr3
  11. }
  12. decode_surrogate_pair :: proc(r1, r2: rune) -> rune {
  13. if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
  14. return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
  15. }
  16. return REPLACEMENT_CHAR
  17. }
  18. encode_surrogate_pair :: proc(c: rune) -> (r1, r2: rune) {
  19. r := c
  20. if r < _surr_self || r > MAX_RUNE {
  21. return REPLACEMENT_CHAR, REPLACEMENT_CHAR
  22. }
  23. r -= _surr_self
  24. return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff
  25. }
  26. encode :: proc(d: []u16, s: []rune) -> int {
  27. n, m := 0, len(d)
  28. loop: for r in s {
  29. switch r {
  30. case 0..<_surr1, _surr3 ..< _surr_self:
  31. if m+1 < n { break loop }
  32. d[n] = u16(r)
  33. n += 1
  34. case _surr_self ..= MAX_RUNE:
  35. if m+2 < n { break loop }
  36. r1, r2 := encode_surrogate_pair(r)
  37. d[n] = u16(r1)
  38. d[n+1] = u16(r2)
  39. n += 2
  40. case:
  41. if m+1 < n { break loop }
  42. d[n] = u16(REPLACEMENT_CHAR)
  43. n += 1
  44. }
  45. }
  46. return n
  47. }
  48. encode_string :: proc(d: []u16, s: string) -> int {
  49. n, m := 0, len(d)
  50. loop: for r in s {
  51. switch r {
  52. case 0..<_surr1, _surr3 ..< _surr_self:
  53. if m+1 < n { break loop }
  54. d[n] = u16(r)
  55. n += 1
  56. case _surr_self ..= MAX_RUNE:
  57. if m+2 < n { break loop }
  58. r1, r2 := encode_surrogate_pair(r)
  59. d[n] = u16(r1)
  60. d[n+1] = u16(r2)
  61. n += 2
  62. case:
  63. if m+1 < n { break loop }
  64. d[n] = u16(REPLACEMENT_CHAR)
  65. n += 1
  66. }
  67. }
  68. return n
  69. }
  70. decode :: proc(d: []rune, s: []u16) -> (n: int) {
  71. for i := 0; i < len(s); i += 1 {
  72. if n >= len(d) {
  73. return
  74. }
  75. r := rune(REPLACEMENT_CHAR)
  76. switch c := s[i]; {
  77. case c < _surr1, _surr3 <= c:
  78. r = rune(c)
  79. case _surr1 <= c && c < _surr2 && i+1 < len(s) &&
  80. _surr2 <= s[i+1] && s[i+1] < _surr3:
  81. r = decode_surrogate_pair(rune(c), rune(s[i+1]))
  82. i += 1
  83. }
  84. d[n] = r
  85. n += 1
  86. }
  87. return
  88. }
  89. decode_to_utf8 :: proc(d: []byte, s: []u16) -> (n: int) {
  90. for i := 0; i < len(s); i += 1 {
  91. if n >= len(d) {
  92. return
  93. }
  94. r := rune(REPLACEMENT_CHAR)
  95. switch c := s[i]; {
  96. case c < _surr1, _surr3 <= c:
  97. r = rune(c)
  98. case _surr1 <= c && c < _surr2 && i+1 < len(s) &&
  99. _surr2 <= s[i+1] && s[i+1] < _surr3:
  100. r = decode_surrogate_pair(rune(c), rune(s[i+1]))
  101. i += 1
  102. }
  103. b, w := utf8.encode_rune(rune(r))
  104. n += copy(d[n:], b[:w])
  105. }
  106. return
  107. }