utf16.odin 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. package utf16
  2. REPLACEMENT_CHAR :: '\ufffd';
  3. MAX_RUNE :: '\U0010ffff';
  4. _surr1 :: 0xd800;
  5. _surr2 :: 0xdc00;
  6. _surr3 :: 0xe000;
  7. _surr_self :: 0x10000;
  8. is_surrogate :: proc(r: rune) -> bool {
  9. return _surr1 <= r && r < _surr3;
  10. }
  11. decode_surrogate_pair :: proc(r1, r2: rune) -> rune {
  12. if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
  13. return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
  14. }
  15. return REPLACEMENT_CHAR;
  16. }
  17. encode_surrogate_pair :: proc(c: rune) -> (r1, r2: rune) {
  18. r := c;
  19. if r < _surr_self || r > MAX_RUNE {
  20. return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
  21. }
  22. r -= _surr_self;
  23. return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
  24. }
  25. encode :: proc(d: []u16, s: []rune) -> int {
  26. n, m := 0, len(d);
  27. loop: for r in s {
  28. switch r {
  29. case 0..<_surr1, _surr3 ..< _surr_self:
  30. if m+1 < n do break loop;
  31. d[n] = u16(r);
  32. n += 1;
  33. case _surr_self .. MAX_RUNE:
  34. if m+2 < n do break loop;
  35. r1, r2 := encode_surrogate_pair(r);
  36. d[n] = u16(r1);
  37. d[n+1] = u16(r2);
  38. n += 2;
  39. case:
  40. if m+1 < n do break loop;
  41. d[n] = u16(REPLACEMENT_CHAR);
  42. n += 1;
  43. }
  44. }
  45. return n;
  46. }
  47. encode_string :: proc(d: []u16, s: string) -> int {
  48. n, m := 0, len(d);
  49. loop: for r in s {
  50. switch r {
  51. case 0..<_surr1, _surr3 ..< _surr_self:
  52. if m+1 < n do break loop;
  53. d[n] = u16(r);
  54. n += 1;
  55. case _surr_self .. MAX_RUNE:
  56. if m+2 < n do break loop;
  57. r1, r2 := encode_surrogate_pair(r);
  58. d[n] = u16(r1);
  59. d[n+1] = u16(r2);
  60. n += 2;
  61. case:
  62. if m+1 < n do break loop;
  63. d[n] = u16(REPLACEMENT_CHAR);
  64. n += 1;
  65. }
  66. }
  67. return n;
  68. }