1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- const (
- REPLACEMENT_CHAR = '\uFFFD';
- MAX_RUNE = '\U0010FFFF';
- _surr1 = 0xd800;
- _surr2 = 0xdc00;
- _surr3 = 0xe000;
- _surr_self = 0x10000;
- )
- proc is_surrogate(r: rune) -> bool {
- return _surr1 <= r && r < _surr3;
- }
- proc decode_surrogate_pair(r1, r2: rune) -> rune {
- if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
- return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
- }
- return REPLACEMENT_CHAR;
- }
- proc encode_surrogate_pair(r: rune) -> (r1, r2: rune) {
- if r < _surr_self || r > MAX_RUNE {
- return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
- }
- r -= _surr_self;
- return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
- }
- proc encode(d: []u16, s: []rune) {
- var n = len(s);
- for r in s {
- if r >= _surr_self {
- n++;
- }
- }
- var max_n = min(len(d), n);
- n = 0;
- for r in s {
- match r {
- case 0..<_surr1, _surr3..<_surr_self:
- d[n] = u16(r);
- n++;
- case _surr_self..MAX_RUNE:
- var r1, r2 = encode_surrogate_pair(r);
- d[n] = u16(r1);
- d[n+1] = u16(r2);
- n += 2;
- case:
- d[n] = u16(REPLACEMENT_CHAR);
- n++;
- }
- }
- }
|