utf16.odin 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. const (
  2. REPLACEMENT_CHAR = '\uFFFD';
  3. MAX_RUNE = '\U0010FFFF';
  4. _surr1 = 0xd800;
  5. _surr2 = 0xdc00;
  6. _surr3 = 0xe000;
  7. _surr_self = 0x10000;
  8. )
  9. proc is_surrogate(r: rune) -> bool {
  10. return _surr1 <= r && r < _surr3;
  11. }
  12. proc decode_surrogate_pair(r1, r2: rune) -> rune {
  13. if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
  14. return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
  15. }
  16. return REPLACEMENT_CHAR;
  17. }
  18. proc encode_surrogate_pair(r: rune) -> (r1, r2: rune) {
  19. if r < _surr_self || r > MAX_RUNE {
  20. return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
  21. }
  22. r -= _surr_self;
  23. return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
  24. }
  25. proc encode(d: []u16, s: []rune) {
  26. var n = len(s);
  27. for r in s {
  28. if r >= _surr_self {
  29. n++;
  30. }
  31. }
  32. var max_n = min(len(d), n);
  33. n = 0;
  34. for r in s {
  35. match r {
  36. case 0..<_surr1, _surr3..<_surr_self:
  37. d[n] = u16(r);
  38. n++;
  39. case _surr_self..MAX_RUNE:
  40. var r1, r2 = encode_surrogate_pair(r);
  41. d[n] = u16(r1);
  42. d[n+1] = u16(r2);
  43. n += 2;
  44. case:
  45. d[n] = u16(REPLACEMENT_CHAR);
  46. n++;
  47. }
  48. }
  49. }