utf16.odin 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. REPLACEMENT_CHAR :: '\uFFFD';
  2. MAX_RUNE :: '\U0010FFFF';
  3. _surr1 :: 0xd800;
  4. _surr2 :: 0xdc00;
  5. _surr3 :: 0xe000;
  6. _surr_self :: 0x10000;
  7. is_surrogate :: proc(r: rune) -> bool {
  8. return _surr1 <= r && r < _surr3;
  9. }
  10. decode_surrogate_pair :: proc(r1, r2: rune) -> rune {
  11. if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
  12. return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
  13. }
  14. return REPLACEMENT_CHAR;
  15. }
  16. encode_surrogate_pair :: proc(r: rune) -> (r1, r2: rune) {
  17. if r < _surr_self || r > MAX_RUNE {
  18. return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
  19. }
  20. r -= _surr_self;
  21. return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
  22. }
  23. encode :: proc(d: []u16, s: []rune) {
  24. n := len(s);
  25. for r in s {
  26. if r >= _surr_self {
  27. n++;
  28. }
  29. }
  30. max_n := min(len(d), n);
  31. n = 0;
  32. for r in s {
  33. match r {
  34. case 0..<_surr1, _surr3..<_surr_self:
  35. d[n] = u16(r);
  36. n++;
  37. case _surr_self..MAX_RUNE:
  38. r1, r2 := encode_surrogate_pair(r);
  39. d[n] = u16(r1);
  40. d[n+1] = u16(r2);
  41. n += 2;
  42. case:
  43. d[n] = u16(REPLACEMENT_CHAR);
  44. n++;
  45. }
  46. }
  47. }