Browse Source

text: Properly handle surrogate pairs in text on Windows

Fixes #1629
rdb 1 year ago
parent
commit
5da013e2e9
3 changed files with 90 additions and 25 deletions
  1. 14 2
      panda/src/text/textAssembler.I
  2. 71 20
      panda/src/text/textAssembler.cxx
  3. 5 3
      panda/src/text/textAssembler.h

+ 14 - 2
panda/src/text/textAssembler.I

@@ -168,7 +168,7 @@ get_num_characters() const {
  * string.  If the object at this position is a graphic object instead of a
  * character, returns 0.
  */
-INLINE wchar_t TextAssembler::
+INLINE char32_t TextAssembler::
 get_character(int n) const {
   nassertr(n >= 0 && n < (int)_text_string.size(), 0);
   return _text_string[n]._character;
@@ -232,7 +232,7 @@ get_num_cols(int r) const {
  * the object at this position is a graphic object instead of a character,
  * returns 0.
  */
-INLINE wchar_t TextAssembler::
+INLINE char32_t TextAssembler::
 get_character(int r, int c) const {
   nassertr(r >= 0 && r < (int)_text_block.size(), 0);
   nassertr(c >= 0 && c < (int)_text_block[r]._string.size(), 0);
@@ -315,6 +315,18 @@ TextCharacter(wchar_t character,
 {
 }
 
+/**
+ *
+ */
+INLINE TextAssembler::TextCharacter::
+TextCharacter(char32_t character,
+              TextAssembler::ComputedProperties *cprops) :
+  _character(character),
+  _graphic(nullptr),
+  _cprops(cprops)
+{
+}
+
 /**
  *
  */

+ 71 - 20
panda/src/text/textAssembler.cxx

@@ -235,11 +235,16 @@ wstring TextAssembler::
 get_plain_wtext() const {
   wstring wtext;
 
-  TextString::const_iterator si;
-  for (si = _text_string.begin(); si != _text_string.end(); ++si) {
-    const TextCharacter &tch = (*si);
+  for (const TextCharacter &tch : _text_string) {
     if (tch._graphic == nullptr) {
-      wtext += tch._character;
+      if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
+        wtext += (wchar_t)tch._character;
+      } else {
+        // Use a surrogate pair.
+        char32_t v = (char32_t)tch._character - 0x10000u;
+        wtext += (wchar_t)((v >> 10u) | 0xd800u);
+        wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
+      }
     } else {
       wtext.push_back(0);
     }
@@ -269,11 +274,16 @@ get_wordwrapped_plain_wtext() const {
       wtext += '\n';
     }
 
-    TextString::const_iterator si;
-    for (si = row._string.begin(); si != row._string.end(); ++si) {
-      const TextCharacter &tch = (*si);
+    for (const TextCharacter &tch : row._string) {
       if (tch._graphic == nullptr) {
-        wtext += tch._character;
+        if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
+          wtext += (wchar_t)tch._character;
+        } else {
+          // Use a surrogate pair.
+          char32_t v = (char32_t)tch._character - 0x10000u;
+          wtext += (wchar_t)((v >> 10u) | 0xd800u);
+          wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
+        }
       } else {
         wtext.push_back(0);
       }
@@ -295,12 +305,17 @@ get_wtext() const {
   wstring wtext;
   PT(ComputedProperties) current_cprops = _initial_cprops;
 
-  TextString::const_iterator si;
-  for (si = _text_string.begin(); si != _text_string.end(); ++si) {
-    const TextCharacter &tch = (*si);
+  for (const TextCharacter &tch : _text_string) {
     current_cprops->append_delta(wtext, tch._cprops);
     if (tch._graphic == nullptr) {
-      wtext += tch._character;
+      if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
+        wtext += (wchar_t)tch._character;
+      } else {
+        // Use a surrogate pair.
+        char32_t v = (char32_t)tch._character - 0x10000u;
+        wtext += (wchar_t)((v >> 10u) | 0xd800u);
+        wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
+      }
     } else {
       wtext.push_back(text_embed_graphic_key);
       wtext += tch._graphic_wname;
@@ -341,12 +356,17 @@ get_wordwrapped_wtext() const {
       wtext += '\n';
     }
 
-    TextString::const_iterator si;
-    for (si = row._string.begin(); si != row._string.end(); ++si) {
-      const TextCharacter &tch = (*si);
+    for (const TextCharacter &tch : row._string) {
       current_cprops->append_delta(wtext, tch._cprops);
       if (tch._graphic == nullptr) {
-        wtext += tch._character;
+        if (sizeof(wchar_t) >= 4 || (tch._character & ~0xffff) == 0) {
+          wtext += (wchar_t)tch._character;
+        } else {
+          // Use a surrogate pair.
+          char32_t v = (char32_t)tch._character - 0x10000u;
+          wtext += (wchar_t)((v >> 10u) | 0xd800u);
+          wtext += (wchar_t)((v & 0x3ffu) | 0xdc00u);
+        }
       } else {
         wtext.push_back(text_embed_graphic_key);
         wtext += tch._graphic_wname;
@@ -623,6 +643,18 @@ assemble_text() {
  */
 PN_stdfloat TextAssembler::
 calc_width(wchar_t character, const TextProperties &properties) {
+  return calc_width((char32_t)character, properties);
+}
+
+/**
+ * Returns the width of a single character, according to its associated font.
+ * This also correctly calculates the width of cheesy ligatures and accented
+ * characters, which may not exist in the font as such.
+ *
+ * This does not take kerning into account, however.
+ */
+PN_stdfloat TextAssembler::
+calc_width(char32_t character, const TextProperties &properties) {
   if (character == ' ') {
     // A space is a special case.
     TextFont *font = properties.get_font();
@@ -846,6 +878,27 @@ scan_wtext(TextAssembler::TextString &output_string,
           << "Unknown TextGraphic: " << graphic_name << "\n";
       }
 
+#if WCHAR_MAX < 0x10FFFF
+    } else if (*si >= 0xd800 && *si < 0xdc00) {
+      // This is a high surrogate.  Look for a subsequent low surrogate.
+      wchar_t ch = *si;
+      ++si;
+      if (si == send) {
+        text_cat.warning()
+          << "High surrogate at end of text.\n";
+        return;
+      }
+      wchar_t ch2 = *si;
+      if (ch2 >= 0xdc00 && ch2 < 0xe000) {
+        char32_t code_point = 0x10000 + ((ch - 0xd800) << 10) + (ch2 - 0xdc00);
+        output_string.push_back(TextCharacter(code_point, current_cprops));
+        ++si;
+      } else {
+        text_cat.warning()
+          << "High surrogate was not followed by low surrogate in text.\n";
+      }
+#endif
+
     } else {
       // A normal character.  Apply it.
       output_string.push_back(TextCharacter(*si, current_cprops));
@@ -1422,10 +1475,8 @@ assemble_row(TextAssembler::TextRow &row,
   hb_buffer_t *harfbuff = nullptr;
 #endif
 
-  TextString::const_iterator si;
-  for (si = row._string.begin(); si != row._string.end(); ++si) {
-    const TextCharacter &tch = (*si);
-    wchar_t character = tch._character;
+  for (const TextCharacter &tch : row._string) {
+    char32_t character = tch._character;
     const TextGraphic *graphic = tch._graphic;
     const TextProperties *properties = &(tch._cprops->_properties);
 

+ 5 - 3
panda/src/text/textAssembler.h

@@ -78,14 +78,14 @@ PUBLISHED:
   int calc_index(int r, int c) const;
 
   INLINE int get_num_characters() const;
-  INLINE wchar_t get_character(int n) const;
+  INLINE char32_t get_character(int n) const;
   INLINE const TextGraphic *get_graphic(int n) const;
   INLINE const TextProperties &get_properties(int n) const;
   INLINE PN_stdfloat get_width(int n) const;
 
   INLINE int get_num_rows() const;
   INLINE int get_num_cols(int r) const;
-  INLINE wchar_t get_character(int r, int c) const;
+  INLINE char32_t get_character(int r, int c) const;
   INLINE const TextGraphic *get_graphic(int r, int c) const;
   INLINE const TextProperties &get_properties(int r, int c) const;
   INLINE PN_stdfloat get_width(int r, int c) const;
@@ -98,6 +98,7 @@ PUBLISHED:
   INLINE const LVector2 &get_lr() const;
 
   static PN_stdfloat calc_width(wchar_t character, const TextProperties &properties);
+  static PN_stdfloat calc_width(char32_t character, const TextProperties &properties);
   static PN_stdfloat calc_width(const TextGraphic *graphic, const TextProperties &properties);
 
   static bool has_exact_character(wchar_t character, const TextProperties &properties);
@@ -132,13 +133,14 @@ private:
   class TextCharacter {
   public:
     INLINE TextCharacter(wchar_t character, ComputedProperties *cprops);
+    INLINE TextCharacter(char32_t character, ComputedProperties *cprops);
     INLINE TextCharacter(const TextGraphic *graphic,
                          const std::wstring &graphic_wname,
                          ComputedProperties *cprops);
     INLINE TextCharacter(const TextCharacter &copy);
     INLINE void operator = (const TextCharacter &copy);
 
-    wchar_t _character;
+    char32_t _character;
     const TextGraphic *_graphic;
     std::wstring _graphic_wname;
     PT(ComputedProperties) _cprops;