utf8.h 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. #ifndef BASIC_UTF8_H
  2. #define BASIC_UTF8_H
  3. typedef unsigned int UCS4;
  4. namespace utf8 {
  5. /** Decodes UTF-8 from a string input to a UCS4 character.
  6. @param src buffer in UTF-8 that should be decoded. If the buffer represent
  7. a valid character, the pointer will be incremented to the next character.
  8. @param src_end The end of the string.
  9. @return a UCS4 character, or 0xFFFF if the buffer didn't represent a valid character.
  10. */
  11. UCS4 decode(const char *&src, const char *src_end);
  12. /** Encodes a UCS4 character to UTF-8.
  13. @param ch UCS-4 value.
  14. @param dst buffer to receive UTF-8 encoding (must be at least 8 bytes!)
  15. @return number of bytes needed to represent character
  16. */
  17. int encode(UCS4 ch, char *dst);
  18. /** Decode the next UCS4 character from a UTF-8 string, and update the index variable.
  19. @param str The UTF-8 string.
  20. @param i The index of the current position. This will be increased to the next position.
  21. @param i_max The last position (size of str).
  22. */
  23. UCS4 decode_next(const char *str, int *i, int i_max);
  24. /** Move to the next character in a UTF-8 string.
  25. @param str The UTF-8 string.
  26. @param i The index of the current position. This will be increased to the next position.
  27. @param i_max The last position (size of str).
  28. */
  29. void move_inc(const char *str, int *i, int i_max);
  30. /** Move to the previous character in a UTF-8 string.
  31. @param str The UTF-8 string.
  32. @param i The index of the current position. This will be decreased to the previous position.
  33. */
  34. void move_dec(const char *str, int *i);
  35. /** Count characters before null termination in a UTF-8 string.
  36. Node: Does not include the null termination!
  37. @param str The UTF-8 string.
  38. @param i_max The last position (size of str).
  39. */
  40. int count_characters(const char *str, int i_max);
  41. }; // namespace utf8
  42. #endif // BASIC_UTF8_H