Unicode.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // Unicode.h //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Provides utitlity functions to work with Unicode and other encodings. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #pragma once
  12. #include <string>
  13. #ifdef _WIN32
  14. #include <specstrings.h>
  15. #else
  16. // MultiByteToWideChar which is a Windows-specific method.
  17. // This is a very simplistic implementation for non-Windows platforms. This
  18. // implementation completely ignores CodePage and dwFlags.
  19. int MultiByteToWideChar(uint32_t CodePage, uint32_t dwFlags,
  20. const char *lpMultiByteStr, int cbMultiByte,
  21. wchar_t *lpWideCharStr, int cchWideChar);
  22. // WideCharToMultiByte is a Windows-specific method.
  23. // This is a very simplistic implementation for non-Windows platforms. This
  24. // implementation completely ignores CodePage and dwFlags.
  25. int WideCharToMultiByte(uint32_t CodePage, uint32_t dwFlags,
  26. const wchar_t *lpWideCharStr, int cchWideChar,
  27. char *lpMultiByteStr, int cbMultiByte,
  28. const char *lpDefaultChar = nullptr,
  29. bool *lpUsedDefaultChar = nullptr);
  30. #endif // _WIN32
  31. namespace Unicode
  32. {
  33. // Based on http://msdn.microsoft.com/en-us/library/windows/desktop/dd374101(v=vs.85).aspx.
  34. enum class Encoding { ASCII = 0, UTF8, UTF8_BOM, UTF16_LE, UTF16_BE, UTF32_LE, UTF32_BE };
  35. // An acp_char is a character encoded in the current Windows ANSI code page.
  36. typedef char acp_char;
  37. // A ccp_char is a character encoded in the console code page.
  38. typedef char ccp_char;
  39. _Success_(return != false)
  40. bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
  41. _Success_(return != false)
  42. bool UTF16ToConsoleString(_In_z_ const wchar_t* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy);
  43. _Success_(return != false)
  44. bool UTF8ToUTF16String(_In_opt_z_ const char *pUTF8, _Inout_ std::wstring *pUTF16);
  45. _Success_(return != false)
  46. bool UTF8ToUTF16String(_In_opt_count_(cbUTF8) const char *pUTF8, size_t cbUTF8, _Inout_ std::wstring *pUTF16);
  47. std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8);
  48. _Success_(return != false)
  49. bool UTF16ToUTF8String(_In_z_ const wchar_t *pUTF16, _Inout_ std::string *pUTF8);
  50. std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16);
  51. bool IsStarMatchUTF8(_In_reads_opt_(maskLen) const char *pMask, size_t maskLen,
  52. _In_reads_opt_(nameLen) const char *pName, size_t nameLen);
  53. bool IsStarMatchUTF16(_In_reads_opt_(maskLen) const wchar_t *pMask, size_t maskLen,
  54. _In_reads_opt_(nameLen) const wchar_t *pName, size_t nameLen);
  55. _Success_(return != false)
  56. bool UTF8BufferToUTF16ComHeap(_In_z_ const char *pUTF8,
  57. _Outptr_result_z_ wchar_t **ppUTF16) throw();
  58. _Success_(return != false)
  59. bool UTF8BufferToUTF16Buffer(
  60. _In_NLS_string_(cbUTF8) const char *pUTF8,
  61. int cbUTF8,
  62. _Outptr_result_buffer_(*pcchUTF16) wchar_t **ppUTF16,
  63. size_t *pcchUTF16) throw();
  64. _Success_(return != false)
  65. bool UTF16BufferToUTF8Buffer(
  66. _In_NLS_string_(cchUTF16) const wchar_t *pUTF16,
  67. int cchUTF16,
  68. _Outptr_result_buffer_(*pcbUTF8) char **ppUTF8,
  69. size_t *pcbUTF8) throw();
  70. } // namespace Unicode