| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- #ifndef BASIC_UTF8_H
- #define BASIC_UTF8_H
- typedef unsigned int UCS4;
- namespace utf8 {
- /** Decodes UTF-8 from a string input to a UCS4 character.
- @param src buffer in UTF-8 that should be decoded. If the buffer represent
- a valid character, the pointer will be incremented to the next character.
- @param src_end The end of the string.
- @return a UCS4 character, or 0xFFFF if the buffer didn't represent a valid character.
- */
- UCS4 decode(const char *&src, const char *src_end);
- /** Encodes a UCS4 character to UTF-8.
- @param ch UCS-4 value.
- @param dst buffer to receive UTF-8 encoding (must be at least 8 bytes!)
- @return number of bytes needed to represent character
- */
- int encode(UCS4 ch, char *dst);
- /** Decode the next UCS4 character from a UTF-8 string, and update the index variable.
- @param str The UTF-8 string.
- @param i The index of the current position. This will be increased to the next position.
- @param i_max The last position (size of str).
- */
- UCS4 decode_next(const char *str, int *i, int i_max);
- /** Move to the next character in a UTF-8 string.
- @param str The UTF-8 string.
- @param i The index of the current position. This will be increased to the next position.
- @param i_max The last position (size of str).
- */
- void move_inc(const char *str, int *i, int i_max);
- /** Move to the previous character in a UTF-8 string.
- @param str The UTF-8 string.
- @param i The index of the current position. This will be decreased to the previous position.
- */
- void move_dec(const char *str, int *i);
- /** Count characters before null termination in a UTF-8 string.
- Node: Does not include the null termination!
- @param str The UTF-8 string.
- @param i_max The last position (size of str).
- */
- int count_characters(const char *str, int i_max);
- }; // namespace utf8
- #endif // BASIC_UTF8_H
|