const ( REPLACEMENT_CHAR = '\uFFFD'; MAX_RUNE = '\U0010FFFF'; _surr1 = 0xd800; _surr2 = 0xdc00; _surr3 = 0xe000; _surr_self = 0x10000; ) proc is_surrogate(r: rune) -> bool { return _surr1 <= r && r < _surr3; } proc decode_surrogate_pair(r1, r2: rune) -> rune { if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 { return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self; } return REPLACEMENT_CHAR; } proc encode_surrogate_pair(r: rune) -> (r1, r2: rune) { if r < _surr_self || r > MAX_RUNE { return REPLACEMENT_CHAR, REPLACEMENT_CHAR; } r -= _surr_self; return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff; } proc encode(d: []u16, s: []rune) { var n = len(s); for r in s { if r >= _surr_self { n++; } } var max_n = min(len(d), n); n = 0; for r in s { match r { case 0..<_surr1, _surr3..<_surr_self: d[n] = u16(r); n++; case _surr_self..MAX_RUNE: var r1, r2 = encode_surrogate_pair(r); d[n] = u16(r1); d[n+1] = u16(r2); n += 2; case: d[n] = u16(REPLACEMENT_CHAR); n++; } } }