UTFString.h 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. //
  2. // UTFString.h
  3. //
  4. // $Id: //poco/1.4/Foundation/include/Poco/UTFString.h#2 $
  5. //
  6. // Library: Foundation
  7. // Package: Text
  8. // Module: UTFString
  9. //
  10. // Definitions of strings for UTF encodings.
  11. //
  12. // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
  13. // and Contributors.
  14. //
  15. // SPDX-License-Identifier: BSL-1.0
  16. //
  17. #ifndef Foundation_UTFString_INCLUDED
  18. #define Foundation_UTFString_INCLUDED
  19. #include "Poco/Foundation.h"
  20. #include "Poco/Types.h"
  21. #include <string>
  22. namespace Poco {
  23. struct UTF16CharTraits
  24. {
  25. typedef std::fpos<mbstate_t> u16streampos;
  26. typedef UInt16 char_type;
  27. typedef int int_type;
  28. typedef std::streamoff off_type;
  29. typedef u16streampos pos_type;
  30. typedef mbstate_t state_type;
  31. static void assign(char_type& c1, const char_type& c2)
  32. {
  33. c1 = c2;
  34. }
  35. static bool eq(char_type c1, char_type c2)
  36. {
  37. return c1 == c2;
  38. }
  39. static bool lt(char_type c1, char_type c2)
  40. {
  41. return c1 < c2;
  42. }
  43. static int compare(const char_type* s1, const char_type* s2, size_t n)
  44. {
  45. for (; n; --n, ++s1, ++s2)
  46. {
  47. if (lt(*s1, *s2))
  48. return -1;
  49. if (lt(*s2, *s1))
  50. return 1;
  51. }
  52. return 0;
  53. }
  54. static size_t length(const char_type* s)
  55. {
  56. size_t len = 0;
  57. for (; !eq(*s, char_type(0)); ++s)
  58. ++len;
  59. return len;
  60. }
  61. static const char_type* find(const char_type* s, size_t n, const char_type& a)
  62. {
  63. for (; n; --n)
  64. {
  65. if (eq(*s, a))
  66. return s;
  67. ++s;
  68. }
  69. return 0;
  70. }
  71. static char_type* move(char_type* s1, const char_type* s2, size_t n)
  72. {
  73. char_type* r = s1;
  74. if (s1 < s2)
  75. {
  76. for (; n; --n, ++s1, ++s2)
  77. assign(*s1, *s2);
  78. }
  79. else if (s2 < s1)
  80. {
  81. s1 += n;
  82. s2 += n;
  83. for (; n; --n)
  84. assign(*--s1, *--s2);
  85. }
  86. return r;
  87. }
  88. static char_type* copy(char_type* s1, const char_type* s2, size_t n)
  89. {
  90. poco_assert(s2 < s1 || s2 >= s1 + n);
  91. char_type* r = s1;
  92. for (; n; --n, ++s1, ++s2)
  93. assign(*s1, *s2);
  94. return r;
  95. }
  96. static char_type* assign(char_type* s, size_t n, char_type a)
  97. {
  98. char_type* r = s;
  99. for (; n; --n, ++s)
  100. assign(*s, a);
  101. return r;
  102. }
  103. static int_type not_eof(int_type c)
  104. {
  105. return eq_int_type(c, eof()) ? ~eof() : c;
  106. }
  107. static char_type to_char_type(int_type c)
  108. {
  109. return char_type(c);
  110. }
  111. static int_type to_int_type(char_type c)
  112. {
  113. return int_type(c);
  114. }
  115. static bool eq_int_type(int_type c1, int_type c2)
  116. {
  117. return c1 == c2;
  118. }
  119. static int_type eof()
  120. {
  121. return int_type(0xDFFF);
  122. }
  123. };
  124. struct UTF32CharTraits
  125. {
  126. typedef std::fpos<mbstate_t> u32streampos;
  127. typedef UInt32 char_type;
  128. typedef int int_type;
  129. typedef std::streamoff off_type;
  130. typedef u32streampos pos_type;
  131. typedef mbstate_t state_type;
  132. static void assign(char_type& c1, const char_type& c2)
  133. {
  134. c1 = c2;
  135. }
  136. static bool eq(char_type c1, char_type c2)
  137. {
  138. return c1 == c2;
  139. }
  140. static bool lt(char_type c1, char_type c2)
  141. {
  142. return c1 < c2;
  143. }
  144. static int compare(const char_type* s1, const char_type* s2, size_t n)
  145. {
  146. for (; n; --n, ++s1, ++s2)
  147. {
  148. if (lt(*s1, *s2))
  149. return -1;
  150. if (lt(*s2, *s1))
  151. return 1;
  152. }
  153. return 0;
  154. }
  155. static size_t length(const char_type* s)
  156. {
  157. size_t len = 0;
  158. for (; !eq(*s, char_type(0)); ++s)
  159. ++len;
  160. return len;
  161. }
  162. static const char_type* find(const char_type* s, size_t n, const char_type& a)
  163. {
  164. for (; n; --n)
  165. {
  166. if (eq(*s, a))
  167. return s;
  168. ++s;
  169. }
  170. return 0;
  171. }
  172. static char_type* move(char_type* s1, const char_type* s2, size_t n)
  173. {
  174. char_type* r = s1;
  175. if (s1 < s2)
  176. {
  177. for (; n; --n, ++s1, ++s2)
  178. assign(*s1, *s2);
  179. }
  180. else if (s2 < s1)
  181. {
  182. s1 += n;
  183. s2 += n;
  184. for (; n; --n)
  185. assign(*--s1, *--s2);
  186. }
  187. return r;
  188. }
  189. static char_type* copy(char_type* s1, const char_type* s2, size_t n)
  190. {
  191. poco_assert(s2 < s1 || s2 >= s1 + n);
  192. char_type* r = s1;
  193. for (; n; --n, ++s1, ++s2)
  194. assign(*s1, *s2);
  195. return r;
  196. }
  197. static char_type* assign(char_type* s, size_t n, char_type a)
  198. {
  199. char_type* r = s;
  200. for (; n; --n, ++s)
  201. assign(*s, a);
  202. return r;
  203. }
  204. static int_type not_eof(int_type c)
  205. {
  206. return eq_int_type(c, eof()) ? ~eof() : c;
  207. }
  208. static char_type to_char_type(int_type c)
  209. {
  210. return char_type(c);
  211. }
  212. static int_type to_int_type(char_type c)
  213. {
  214. return int_type(c);
  215. }
  216. static bool eq_int_type(int_type c1, int_type c2)
  217. {
  218. return c1 == c2;
  219. }
  220. static int_type eof()
  221. {
  222. return int_type(0xDFFF);
  223. }
  224. };
  225. //#if defined(POCO_ENABLE_CPP11) //TODO
  226. // typedef char16_t UTF16Char;
  227. // typedef std::u16string UTF16String;
  228. // typedef char32_t UTF32Char;
  229. // typedef std::u32string UTF32String;
  230. //#else
  231. #ifdef POCO_NO_WSTRING
  232. typedef Poco::UInt16 UTF16Char;
  233. typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String;
  234. typedef UInt32 UTF32Char;
  235. typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String;
  236. #else // POCO_NO_WSTRING
  237. #if defined(POCO_OS_FAMILY_WINDOWS)
  238. typedef wchar_t UTF16Char;
  239. typedef std::wstring UTF16String;
  240. typedef UInt32 UTF32Char;
  241. typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String;
  242. #elif defined(__SIZEOF_WCHAR_T__) //gcc
  243. #if (__SIZEOF_WCHAR_T__ == 2)
  244. typedef wchar_t UTF16Char;
  245. typedef std::wstring UTF16String;
  246. typedef UInt32 UTF32Char;
  247. typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String;
  248. #elif (__SIZEOF_WCHAR_T__ == 4)
  249. typedef Poco::UInt16 UTF16Char;
  250. typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String;
  251. typedef wchar_t UTF32Char;
  252. typedef std::wstring UTF32String;
  253. #endif
  254. #else // default to 32-bit wchar_t
  255. typedef Poco::UInt16 UTF16Char;
  256. typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String;
  257. typedef wchar_t UTF32Char;
  258. typedef std::wstring UTF32String;
  259. #endif //POCO_OS_FAMILY_WINDOWS
  260. #endif //POCO_NO_WSTRING
  261. //#endif // POCO_ENABLE_CPP11
  262. } // namespace Poco
  263. #endif // Foundation_UTFString_INCLUDED