Unicode.cpp 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // Unicode.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Provides utitlity functions to work with Unicode and other encodings. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #ifdef _WIN32
  12. #include <specstrings.h>
  13. #endif
  14. #include <string>
  15. #include "dxc/Support/Global.h"
  16. #include "dxc/Support/Unicode.h"
  17. #include "dxc/Support/WinIncludes.h"
  18. namespace Unicode {
  19. _Success_(return != false)
  20. bool UTF16ToEncodedString(_In_z_ const wchar_t* text, DWORD cp, DWORD flags, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
  21. BOOL usedDefaultChar;
  22. LPBOOL pUsedDefaultChar = (lossy == nullptr) ? nullptr : &usedDefaultChar;
  23. size_t cUTF16 = wcslen(text);
  24. if (lossy != nullptr) *lossy = false;
  25. // Handle zero-length as a special case; it's a special value to indicate errors in WideCharToMultiByte.
  26. if (cUTF16 == 0) {
  27. pValue->resize(0);
  28. DXASSERT(lossy == nullptr || *lossy == false, "otherwise earlier initialization in this function was updated");
  29. return true;
  30. }
  31. int cbUTF8 = ::WideCharToMultiByte(cp, flags, text, cUTF16, nullptr, 0, nullptr, pUsedDefaultChar);
  32. if (cbUTF8 == 0)
  33. return false;
  34. pValue->resize(cbUTF8);
  35. cbUTF8 = ::WideCharToMultiByte(cp, flags, text, cUTF16, &(*pValue)[0], pValue->size(), nullptr, pUsedDefaultChar);
  36. DXASSERT(cbUTF8 > 0, "otherwise contents have changed");
  37. DXASSERT((*pValue)[pValue->size()] == '\0', "otherwise string didn't null-terminate after resize() call");
  38. if (lossy != nullptr) *lossy = usedDefaultChar;
  39. return true;
  40. }
  41. _Use_decl_annotations_
  42. bool UTF8ToUTF16String(const char *pUTF8, std::wstring *pUTF16) {
  43. size_t cbUTF8 = (pUTF8 == nullptr) ? 0 : strlen(pUTF8);
  44. return UTF8ToUTF16String(pUTF8, cbUTF8, pUTF16);
  45. }
  46. _Use_decl_annotations_
  47. bool UTF8ToUTF16String(const char *pUTF8, size_t cbUTF8, std::wstring *pUTF16) {
  48. DXASSERT_NOMSG(pUTF16 != nullptr);
  49. // Handle zero-length as a special case; it's a special value to indicate
  50. // errors in MultiByteToWideChar.
  51. if (cbUTF8 == 0) {
  52. pUTF16->resize(0);
  53. return true;
  54. }
  55. int cUTF16 = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8,
  56. cbUTF8, nullptr, 0);
  57. if (cUTF16 == 0)
  58. return false;
  59. pUTF16->resize(cUTF16);
  60. cUTF16 = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8, cbUTF8,
  61. &(*pUTF16)[0], pUTF16->size());
  62. DXASSERT(cUTF16 > 0, "otherwise contents changed");
  63. DXASSERT((*pUTF16)[pUTF16->size()] == L'\0',
  64. "otherwise wstring didn't null-terminate after resize() call");
  65. return true;
  66. }
  67. std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8) {
  68. std::wstring result;
  69. if (!UTF8ToUTF16String(pUTF8, &result)) {
  70. throw hlsl::Exception(DXC_E_STRING_ENCODING_FAILED);
  71. }
  72. return result;
  73. }
  74. _Use_decl_annotations_
  75. bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
  76. DXASSERT_NOMSG(text != nullptr);
  77. DXASSERT_NOMSG(pValue != nullptr);
  78. std::wstring text16;
  79. if (lossy != nullptr) *lossy = false;
  80. if (!UTF8ToUTF16String(text, &text16)) {
  81. return false;
  82. }
  83. return UTF16ToConsoleString(text16.c_str(), pValue, lossy);
  84. }
  85. _Use_decl_annotations_
  86. bool UTF16ToConsoleString(const wchar_t* text, std::string* pValue, bool* lossy) {
  87. DXASSERT_NOMSG(text != nullptr);
  88. DXASSERT_NOMSG(pValue != nullptr);
  89. UINT cp = GetConsoleOutputCP();
  90. return UTF16ToEncodedString(text, cp, 0, pValue, lossy);
  91. }
  92. _Use_decl_annotations_
  93. bool UTF16ToUTF8String(const wchar_t *pUTF16, std::string *pUTF8) {
  94. DXASSERT_NOMSG(pUTF16 != nullptr);
  95. DXASSERT_NOMSG(pUTF8 != nullptr);
  96. return UTF16ToEncodedString(pUTF16, CP_UTF8, 0, pUTF8, nullptr);
  97. }
  98. std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16) {
  99. std::string result;
  100. if (!UTF16ToUTF8String(pUTF16, &result)) {
  101. throw hlsl::Exception(DXC_E_STRING_ENCODING_FAILED);
  102. }
  103. return result;
  104. }
  105. _Use_decl_annotations_
  106. bool UTF8BufferToUTF16ComHeap(const char *pUTF8, wchar_t **ppUTF16) throw() {
  107. *ppUTF16 = nullptr;
  108. int c = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8, -1,
  109. nullptr, 0);
  110. if (c == 0)
  111. return false;
  112. CComHeapPtr<wchar_t> p;
  113. if (!p.Allocate(c))
  114. return false;
  115. DXVERIFY_NOMSG(0 < ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8,
  116. -1, p.m_pData, c));
  117. *ppUTF16 = p.Detach();
  118. return true;
  119. }
  120. _Use_decl_annotations_
  121. bool UTF8BufferToUTF16Buffer(const char *pUTF8, int cbUTF8, wchar_t **ppUTF16, size_t *pcUTF16) throw() {
  122. *ppUTF16 = nullptr;
  123. *pcUTF16 = 0;
  124. if (cbUTF8 == 0 || (cbUTF8 == -1 && *pUTF8 == '\0')) {
  125. *ppUTF16 = new (std::nothrow) wchar_t[1];
  126. if (*ppUTF16 == nullptr)
  127. return false;
  128. (*ppUTF16)[0] = L'\0';
  129. *pcUTF16 = 1;
  130. return true;
  131. }
  132. int c = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8, cbUTF8, nullptr, 0);
  133. if (c == 0)
  134. return false;
  135. // add space for null-terminator if we're not accounting for it
  136. if (cbUTF8 != -1)
  137. c += 1;
  138. wchar_t *p = new (std::nothrow) wchar_t[c];
  139. if (p == nullptr)
  140. return false;
  141. int converted = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
  142. pUTF8, cbUTF8,
  143. p, c);
  144. (void)converted;
  145. DXASSERT(converted > 0, "otherwise contents have changed");
  146. p[c - 1] = L'\0';
  147. *ppUTF16 = p;
  148. *pcUTF16 = c;
  149. return true;
  150. }
  151. _Use_decl_annotations_
  152. bool UTF16BufferToUTF8Buffer(const wchar_t *pUTF16, int cUTF16, char **ppUTF8, size_t *pcUTF8) throw() {
  153. *ppUTF8 = nullptr;
  154. *pcUTF8 = 0;
  155. if (cUTF16 == 0 || (cUTF16 == -1 && *pUTF16 == '\0')) {
  156. *ppUTF8 = new (std::nothrow) char[1];
  157. if (*ppUTF8 == nullptr)
  158. return false;
  159. (*ppUTF8)[0] = '\0';
  160. *pcUTF8 = 1;
  161. return true;
  162. }
  163. int c1 = ::WideCharToMultiByte(CP_UTF8, // code page
  164. 0, // flags
  165. pUTF16, // string to convert
  166. cUTF16, // size, in chars, of string to convert
  167. nullptr, // output buffer
  168. 0, // size of output buffer
  169. nullptr, nullptr);
  170. if (c1 == 0)
  171. return false;
  172. // add space for null-terminator if we're not accounting for it
  173. if (cUTF16 != -1)
  174. c1 += 1;
  175. char *p = new (std::nothrow) char[c1];
  176. if (p == nullptr)
  177. return false;
  178. int converted = ::WideCharToMultiByte(CP_UTF8, 0,
  179. pUTF16, cUTF16,
  180. p, c1,
  181. nullptr, nullptr);
  182. (void)converted;
  183. DXASSERT(converted > 0, "otherwise contents have changed");
  184. p[c1 - 1] = '\0';
  185. *ppUTF8 = p;
  186. *pcUTF8 = c1;
  187. return true;
  188. }
  189. template<typename TChar>
  190. static
  191. bool IsStarMatchT(const TChar *pMask, size_t maskLen, const TChar *pName, size_t nameLen, TChar star) {
  192. if (maskLen == 0 && nameLen == 0) {
  193. return true;
  194. }
  195. if (maskLen == 0 || nameLen == 0) {
  196. return false;
  197. }
  198. if (pMask[maskLen - 1] == star) {
  199. // Prefix match.
  200. if (maskLen == 1) { // For just '*', everything is a match.
  201. return true;
  202. }
  203. --maskLen;
  204. if (maskLen > nameLen) { // Mask is longer than name, can't be a match.
  205. return false;
  206. }
  207. return 0 == memcmp(pMask, pName, sizeof(TChar) * maskLen);
  208. }
  209. else {
  210. // Exact match.
  211. if (nameLen != maskLen) {
  212. return false;
  213. }
  214. return 0 == memcmp(pMask, pName, sizeof(TChar) * nameLen);
  215. }
  216. }
  217. _Use_decl_annotations_
  218. bool IsStarMatchUTF8(const char *pMask, size_t maskLen, const char *pName, size_t nameLen) {
  219. return IsStarMatchT<char>(pMask, maskLen, pName, nameLen, '*');
  220. }
  221. _Use_decl_annotations_
  222. bool IsStarMatchUTF16(const wchar_t *pMask, size_t maskLen, const wchar_t *pName, size_t nameLen) {
  223. return IsStarMatchT<wchar_t>(pMask, maskLen, pName, nameLen, L'*');
  224. }
  225. } // namespace Unicode