Browse Source

Unicode: UTF32 support improvements (#2541, #2538, #2815)

- Make ImWchar32 unsigned.
 - Fix Win32 version of ImFileOpen by including windows.h sooner.
 - Make ImGuiIO::AddInputCharacterUTF16() more robust by disallowing illegal
surrogate pairs.
 - Allow pushing higher plane codepoints through ImGuiIO::AddInputCharacter().
 - Minor cleaning up in the high-plane Unicode support.
 - Fix Clang -Wunreachable-code warning
Sam Hocevar 6 years ago
parent
commit
c8ea0a017d
3 changed files with 46 additions and 35 deletions
  1. 36 21
      imgui.cpp
  2. 8 12
      imgui.h
  3. 2 2
      imgui_draw.cpp

+ 36 - 21
imgui.cpp

@@ -1094,30 +1094,33 @@ ImGuiIO::ImGuiIO()
 // - on Windows you can get those using ToAscii+keyboard state, or via the WM_CHAR message
 // - on Windows you can get those using ToAscii+keyboard state, or via the WM_CHAR message
 void ImGuiIO::AddInputCharacter(unsigned int c)
 void ImGuiIO::AddInputCharacter(unsigned int c)
 {
 {
-    if (c > 0 && c <= IM_UNICODE_CODEPOINT_MAX)
-        InputQueueCharacters.push_back((ImWchar)c);
+    InputQueueCharacters.push_back(c > 0 && c <= IM_UNICODE_CODEPOINT_MAX ? (ImWchar)c : IM_UNICODE_CODEPOINT_INVALID);
 }
 }
 
 
-// UTF16 string use Surrogate to encode unicode > 0x10000, so we should save the Surrogate.
+// UTF16 strings use surrogate pairs to encode codepoints >= 0x10000, so
+// we should save the high surrogate.
 void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c)
 void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c)
 {
 {
-    if (c >= 0xD800 && c <= 0xDBFF)
+    if ((c & 0xFC00) == 0xD800) // High surrogate, must save
     {
     {
-        Surrogate = c;
+        if (InputQueueSurrogate != 0)
+            InputQueueCharacters.push_back(0xFFFD);
+        InputQueueSurrogate = c;
+        return;
     }
     }
-    else
+
+    ImWchar cp = c;
+    if (InputQueueSurrogate != 0)
     {
     {
-        ImWchar cp = c;
-        if (c >= 0xDC00 && c <= 0xDFFF)
-        {
-            if (sizeof(ImWchar) == 2)
-                cp = IM_UNICODE_CODEPOINT_INVALID;
-            else
-                cp = ((ImWchar)(Surrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000;
-            Surrogate = 0;
-        }
-        InputQueueCharacters.push_back(cp);
+        if ((c & 0xFC00) != 0xDC00) // Invalid low surrogate
+            InputQueueCharacters.push_back(IM_UNICODE_CODEPOINT_INVALID);
+        else if (IM_UNICODE_CODEPOINT_MAX == (0xFFFF)) // Codepoint will not fit in ImWchar (extra parenthesis around 0xFFFF somehow fixes -Wunreachable-code with Clang)
+            cp = IM_UNICODE_CODEPOINT_INVALID;
+        else
+            cp = (ImWchar)(((InputQueueSurrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000);
+        InputQueueSurrogate = 0;
     }
     }
+    InputQueueCharacters.push_back(cp);
 }
 }
 
 
 void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars)
 void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars)
@@ -1506,6 +1509,18 @@ ImU32 ImHashStr(const char* data_p, size_t data_size, ImU32 seed)
 
 
 // Default file functions
 // Default file functions
 #ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS
 #ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS
+
+#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef __MINGW32__
+#include <Windows.h>
+#else
+#include <windows.h>
+#endif
+#endif
+
 ImFileHandle ImFileOpen(const char* filename, const char* mode)
 ImFileHandle ImFileOpen(const char* filename, const char* mode)
 {
 {
 #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__)
 #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__)
@@ -1514,9 +1529,9 @@ ImFileHandle ImFileOpen(const char* filename, const char* mode)
     const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0);
     const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0);
     ImVector<ImWchar> buf;
     ImVector<ImWchar> buf;
     buf.resize(filename_wsize + mode_wsize);
     buf.resize(filename_wsize + mode_wsize);
-    ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, &buf[0], filename_wsize);
-    ::MultiByteToWideChar(CP_UTF8, 0, mode, -1,&buf[filename_wsize], mode_wsize);
-    return _wfopen(&buf[0], &buf[filename_wsize]);
+    ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, (wchar_t*)&buf[0], filename_wsize);
+    ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, (wchar_t*)&buf[filename_wsize], mode_wsize);
+    return _wfopen((const wchar_t*)&buf[0], (const wchar_t*)&buf[filename_wsize]);
 #else
 #else
     return fopen(filename, mode);
     return fopen(filename, mode);
 #endif
 #endif
@@ -1628,8 +1643,8 @@ int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char*
         c += (*str++ & 0x3f);
         c += (*str++ & 0x3f);
         // utf-8 encodings of values used in surrogate pairs are invalid
         // utf-8 encodings of values used in surrogate pairs are invalid
         if ((c & 0xFFFFF800) == 0xD800) return 4;
         if ((c & 0xFFFFF800) == 0xD800) return 4;
-        // If ImWchar is 16bit, use replacement character U+FFFD instead
-        if (sizeof(ImWchar) == 2 && c >= 0x10000) c = IM_UNICODE_CODEPOINT_INVALID;
+        // If codepoint does not fit in ImWchar, use replacement character U+FFFD instead
+        if (c > IM_UNICODE_CODEPOINT_MAX) c = IM_UNICODE_CODEPOINT_INVALID;
         *out_char = c;
         *out_char = c;
         return 4;
         return 4;
     }
     }

+ 8 - 12
imgui.h

@@ -92,7 +92,7 @@ Index of this file:
 #else
 #else
 #define IM_OFFSETOF(_TYPE,_MEMBER)  ((size_t)&(((_TYPE*)0)->_MEMBER))           // Offset of _MEMBER within _TYPE. Old style macro.
 #define IM_OFFSETOF(_TYPE,_MEMBER)  ((size_t)&(((_TYPE*)0)->_MEMBER))           // Offset of _MEMBER within _TYPE. Old style macro.
 #endif
 #endif
-#define IM_UNICODE_CODEPOINT_MAX     0xFFFF                                     // Last Unicode code point supported by this build.
+#define IM_UNICODE_CODEPOINT_MAX    (sizeof(ImWchar) == 2 ? 0xFFFF : 0x10FFFF)  // Last Unicode code point supported by this build.
 #define IM_UNICODE_CODEPOINT_INVALID 0xFFFD                                     // Standard invalid Unicode code point.
 #define IM_UNICODE_CODEPOINT_INVALID 0xFFFD                                     // Standard invalid Unicode code point.
 
 
 // Warnings
 // Warnings
@@ -147,7 +147,7 @@ typedef unsigned int ImGuiID;       // Unique ID used by widgets (typically hash
 #define ImWchar ImWchar16
 #define ImWchar ImWchar16
 #endif
 #endif
 typedef unsigned short ImWchar16;   // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings.
 typedef unsigned short ImWchar16;   // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings.
-typedef int ImWchar32;              // A single 32bit character for keyboard input/display, define ImWchar to ImWchar32 to use it. See imconfig.h .
+typedef unsigned int ImWchar32;     // A single U32 character for keyboard input/display. Define ImWchar to ImWchar32 to use it. See imconfig.h .
 typedef int ImGuiCol;               // -> enum ImGuiCol_             // Enum: A color identifier for styling
 typedef int ImGuiCol;               // -> enum ImGuiCol_             // Enum: A color identifier for styling
 typedef int ImGuiCond;              // -> enum ImGuiCond_            // Enum: A condition for many Set*() functions
 typedef int ImGuiCond;              // -> enum ImGuiCond_            // Enum: A condition for many Set*() functions
 typedef int ImGuiDataType;          // -> enum ImGuiDataType_        // Enum: A primary data type
 typedef int ImGuiDataType;          // -> enum ImGuiDataType_        // Enum: A primary data type
@@ -1512,7 +1512,7 @@ struct ImGuiIO
     float       KeysDownDurationPrev[512];      // Previous duration the key has been down
     float       KeysDownDurationPrev[512];      // Previous duration the key has been down
     float       NavInputsDownDuration[ImGuiNavInput_COUNT];
     float       NavInputsDownDuration[ImGuiNavInput_COUNT];
     float       NavInputsDownDurationPrev[ImGuiNavInput_COUNT];
     float       NavInputsDownDurationPrev[ImGuiNavInput_COUNT];
-    ImWchar16   Surrogate;                      // For AddInputCharacterUTF16
+    ImWchar16   InputQueueSurrogate;            // For AddInputCharacterUTF16
     ImVector<ImWchar> InputQueueCharacters;     // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper.
     ImVector<ImWchar> InputQueueCharacters;     // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper.
 
 
     IMGUI_API   ImGuiIO();
     IMGUI_API   ImGuiIO();
@@ -2097,15 +2097,11 @@ struct ImFontGlyphRangesBuilder
 {
 {
     ImVector<ImU32> UsedChars;            // Store 1-bit per Unicode code point (0=unused, 1=used)
     ImVector<ImU32> UsedChars;            // Store 1-bit per Unicode code point (0=unused, 1=used)
 
 
-    ImFontGlyphRangesBuilder()          { Clear(); }
-    inline void     Clear()             
-    {
-        int MaxUnicode = sizeof(ImWchar) == 2 ? 0x10000 : 0x110000;
-        UsedChars.resize(MaxUnicode / sizeof(int)); memset(UsedChars.Data, 0, MaxUnicode / sizeof(int));
-    }
-    inline bool     GetBit(int n) const { int off = (n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; }  // Get bit n in the array
-    inline void     SetBit(int n)       { int off = (n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; }               // Set bit n in the array
-    inline void     AddChar(ImWchar c)  { SetBit(c); }                          // Add character
+    ImFontGlyphRangesBuilder()              { Clear(); }
+    inline void     Clear()                 { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX + 1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); }
+    inline bool     GetBit(size_t n) const  { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; }  // Get bit n in the array
+    inline void     SetBit(size_t n)        { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; }               // Set bit n in the array
+    inline void     AddChar(ImWchar c)      { SetBit(c); }                          // Add character
     IMGUI_API void  AddText(const char* text, const char* text_end = NULL);     // Add string (each character of the UTF-8 string are added)
     IMGUI_API void  AddText(const char* text, const char* text_end = NULL);     // Add string (each character of the UTF-8 string are added)
     IMGUI_API void  AddRanges(const ImWchar* ranges);                           // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext
     IMGUI_API void  AddRanges(const ImWchar* ranges);                           // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext
     IMGUI_API void  BuildRanges(ImVector<ImWchar>* out_ranges);                 // Output new ranges
     IMGUI_API void  BuildRanges(ImVector<ImWchar>* out_ranges);                 // Output new ranges

+ 2 - 2
imgui_draw.cpp

@@ -2724,7 +2724,7 @@ void ImFont::AddRemapChar(ImWchar dst, ImWchar src, bool overwrite_dst)
 
 
 const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const
 const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const
 {
 {
-    if (c >= IndexLookup.Size)
+    if (c >= (size_t)IndexLookup.Size)
         return FallbackGlyph;
         return FallbackGlyph;
     const ImWchar i = IndexLookup.Data[c];
     const ImWchar i = IndexLookup.Data[c];
     if (i == (ImWchar)-1)
     if (i == (ImWchar)-1)
@@ -2734,7 +2734,7 @@ const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const
 
 
 const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const
 const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const
 {
 {
-    if (c >= IndexLookup.Size)
+    if (c >= (size_t)IndexLookup.Size)
         return NULL;
         return NULL;
     const ImWchar i = IndexLookup.Data[c];
     const ImWchar i = IndexLookup.Data[c];
     if (i == (ImWchar)-1)
     if (i == (ImWchar)-1)