Browse Source

better, more robust unicode support

David Rose 24 years ago
parent
commit
d853aef780

+ 17 - 1
panda/src/text/stringDecoder.I

@@ -25,6 +25,7 @@
 INLINE StringDecoder::
 StringDecoder(const string &input) : _input(input) {
   _p = 0;
+  _eof = false;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -36,7 +37,22 @@ StringDecoder(const string &input) : _input(input) {
 ////////////////////////////////////////////////////////////////////
 INLINE bool StringDecoder::
 is_eof() {
-  return (_p >= _input.size());
+  return _eof;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: StringDecoder::test_eof
+//       Access: Protected
+//  Description: If the pointer is past the last character of the
+//               string, set the eof flag and return true.
+////////////////////////////////////////////////////////////////////
+INLINE bool StringDecoder::
+test_eof() {
+  if (_p >= _input.size()) {
+    _eof = true;
+    return true;
+  }
+  return false;
 }
 
 ////////////////////////////////////////////////////////////////////

+ 7 - 7
panda/src/text/stringDecoder.cxx

@@ -34,7 +34,7 @@ StringDecoder::
 ////////////////////////////////////////////////////////////////////
 int StringDecoder::
 get_next_character() {
-  if (is_eof()) {
+  if (test_eof()) {
     return -1;
   }
   return (unsigned char)_input[_p++];
@@ -71,7 +71,7 @@ The value of each individual byte indicates its UTF-8 function, as follows:
 ////////////////////////////////////////////////////////////////////
 int StringUtf8Decoder::
 get_next_character() {
-  if (is_eof()) {
+  if (test_eof()) {
     return -1;
   }
 
@@ -79,7 +79,7 @@ get_next_character() {
   if ((result & 0xe0) == 0xc0) {
     // First byte of two.
     unsigned int two = 0;
-    if (!is_eof()) {
+    if (!test_eof()) {
       two = (unsigned char)_input[_p++];
     }
     result = ((result & 0x1f) << 6) | (two & 0x3f);
@@ -88,10 +88,10 @@ get_next_character() {
     // First byte of three.
     unsigned int two = 0;
     unsigned int three = 0;
-    if (!is_eof()) {
+    if (!test_eof()) {
       two = (unsigned char)_input[_p++];
     }
-    if (!is_eof()) {
+    if (!test_eof()) {
       three = (unsigned char)_input[_p++];
     }
     result = ((result & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
@@ -107,13 +107,13 @@ get_next_character() {
 ////////////////////////////////////////////////////////////////////
 int StringUnicodeDecoder::
 get_next_character() {
-  if (is_eof()) {
+  if (test_eof()) {
     return -1;
   }
 
   unsigned int high = (unsigned char)_input[_p++];
   unsigned int low = 0;
-  if (!is_eof()) {
+  if (!test_eof()) {
     low = (unsigned char)_input[_p++];
   }
   return ((high << 8) | low);

+ 3 - 0
panda/src/text/stringDecoder.h

@@ -39,8 +39,11 @@ public:
   INLINE bool is_eof();
 
 protected:
+  INLINE bool test_eof();
+
   string _input;
   size_t _p;
+  bool _eof;
 };
 
 ////////////////////////////////////////////////////////////////////

+ 143 - 2
panda/src/text/textFont.cxx

@@ -29,7 +29,7 @@ TypeHandle TextFont::_type_handle;
 //               does not consider newlines to be whitespace.
 ////////////////////////////////////////////////////////////////////
 INLINE bool
-isblank(char ch) {
+isblank(int ch) {
   return (ch == ' ' || ch == '\t');
 }
 
@@ -126,7 +126,7 @@ wordwrap_to(const string &text, float wordwrap_width,
   bool needs_newline = false;
 
   while (p < text.length()) {
-    nassertr(!isspace(text[p]), "");
+    nassertr(!isspace(text[p]), string());
 
     // Scan the next n characters, until the end of the string or an
     // embedded newline character, or we exceed wordwrap_width.
@@ -228,3 +228,144 @@ write(ostream &out, int indent_level) const {
   indent(out, indent_level)
     << "TextFont " << get_name() << "\n";
 }
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextFont::calc_width (wide char)
+//       Access: Public
+//  Description: Returns the width of a line of text of arbitrary
+//               characters.  The line should not include the newline
+//               character.
+////////////////////////////////////////////////////////////////////
+float TextFont::
+calc_width(const wstring &line) {
+  float width = 0.0f;
+
+  wstring::const_iterator si;
+  for (si = line.begin(); si != line.end(); ++si) {
+    width += calc_width(*si);
+  }
+
+  return width;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextFont::wordwrap_to (wide char)
+//       Access: Public
+//  Description: Inserts newlines into the given text at the
+//               appropriate places in order to make each line be the
+//               longest possible line that is not longer than
+//               wordwrap_width (and does not break any words, if
+//               possible).  Returns the new string.
+////////////////////////////////////////////////////////////////////
+wstring TextFont::
+wordwrap_to(const wstring &text, float wordwrap_width, 
+            bool preserve_trailing_whitespace) {
+  wstring output_text;
+
+  size_t p = 0;
+
+  // Preserve any initial whitespace and newlines.
+  float initial_width = 0.0f;
+  while (p < text.length() && isspace(text[p])) {
+    if (text[p] == '\n') {
+      initial_width = 0.0f;
+    } else {
+      initial_width += calc_width(text[p]);
+    }
+    output_text += text[p];
+    p++;
+  }
+  bool needs_newline = false;
+
+  while (p < text.length()) {
+    nassertr(!isspace(text[p]), wstring());
+
+    // Scan the next n characters, until the end of the string or an
+    // embedded newline character, or we exceed wordwrap_width.
+
+    size_t q = p;
+    bool any_spaces = false;
+    bool overflow = false;
+
+    float width = initial_width;
+    while (q < text.length() && text[q] != '\n') {
+      if (isspace(text[q])) {
+        any_spaces = true;
+      }
+
+      width += calc_width(text[q]);
+      q++;
+
+      if (width > wordwrap_width) {
+        // Oops, too many.
+        q--;
+        overflow = true;
+        break;
+      }
+    }
+
+    if (overflow && any_spaces) {
+      // If we stopped because we exceeded the wordwrap width, then
+      // back up to the end of the last complete word.
+      while (q > p && !isspace(text[q])) {
+        q--;
+      }
+    }
+
+    // Skip additional whitespace between the lines.
+    size_t next_start = q;
+    while (next_start < text.length() && isblank(text[next_start])) {
+      next_start++;
+    }
+
+    // Trim off any more blanks on the end.
+    while (q > p && isspace(text[q - 1])) {
+      q--;
+    }
+
+    if (next_start == p) {
+      // No characters got in at all.  This could only happen if the
+      // wordwrap width is narrower than a single character, or if we
+      // have a substantial number of leading spaces in a line.
+      q++;
+      next_start++;
+      while (next_start < text.length() && isblank(text[next_start])) {
+        next_start++;
+      }
+    }
+    
+    if (needs_newline) {
+      output_text += '\n';
+    }
+    needs_newline = true;
+
+    if (preserve_trailing_whitespace) {
+      q = next_start;
+    }
+    output_text += text.substr(p, q - p);
+
+    // Now prepare to wrap the next line.
+
+    if (next_start < text.length() && text[next_start] == '\n') {
+      // Preserve a single embedded newline.
+      output_text += '\n';
+      next_start++;
+      needs_newline = false;
+    }
+    p = next_start;
+
+    // Preserve any initial whitespace and newlines.
+    initial_width = 0.0f;
+    while (p < text.length() && isspace(text[p])) {
+      if (text[p] == '\n') {
+        initial_width = 0.0f;
+      } else {
+        initial_width += calc_width(text[p]);
+      }
+      output_text += text[p];
+      p++;
+    }
+  }
+
+  return output_text;
+}

+ 7 - 0
panda/src/text/textFont.h

@@ -29,6 +29,9 @@
 class Node;
 class TextGlyph;
 
+// For some reason, gcc's <string> doesn't define this.
+typedef basic_string<wchar_t> wstring;
+
 ////////////////////////////////////////////////////////////////////
 //       Class : TextFont
 // Description : An encapsulation of a font; i.e. a set of glyphs that
@@ -57,6 +60,10 @@ PUBLISHED:
   virtual void write(ostream &out, int indent_level) const;
 
 public:
+  float calc_width(const wstring &line);
+  wstring wordwrap_to(const wstring &text, float wordwrap_width,
+                     bool preserve_trailing_whitespace);
+
   INLINE float get_space_advance() const;
   virtual bool get_glyph(int character, const TextGlyph *&glyph,
                          float &glyph_scale)=0;

+ 8 - 20
panda/src/text/textNode.I

@@ -118,13 +118,13 @@ get_font() const {
 //               (i.e. ASCII).  Other encodings are possible to take
 //               advantage of character sets with more than 256
 //               characters.
+//
+//               This affects only future calls to set_text(); it does
+//               not change text that was set previously.
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 set_encoding(TextNode::Encoding encoding) {
-  if (_encoding != encoding) {
-    _encoding = encoding;
-    rebuild(true);
-  }
+  _encoding = encoding;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -146,16 +146,16 @@ get_encoding() const {
 //               expanded to special characters according to a subset
 //               of the HTML conventions.  When this is false,
 //               ampersands are treated as ordinary characters.
+//
+//               This affects only future calls to set_text(); it does
+//               not change text that was set previously.
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 set_expand_amp(bool expand_amp) {
-  bool current_expand_amp = get_expand_amp();
-  if (expand_amp && !current_expand_amp) {
+  if (expand_amp) {
     _flags |= F_expand_amp;
-    rebuild(true);
   } else {
     _flags &= ~F_expand_amp;
-    rebuild(true);
   }
 }
 
@@ -1022,18 +1022,6 @@ get_coordinate_system() const {
   return _coordinate_system;
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::set_text
-//       Access: Published
-//  Description: Changes the text that is displayed under the
-//               TextNode.
-////////////////////////////////////////////////////////////////////
-INLINE void TextNode::
-set_text(const string &text) {
-  _text = text;
-  rebuild(true);
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::clear_text
 //       Access: Published

+ 183 - 167
panda/src/text/textNode.cxx

@@ -78,7 +78,7 @@ TextNode(const string &name) : NamedNode(name) {
   _num_rows = 0;
 
   _freeze_level = 0;
-  _needs_rebuild = false;
+ _needs_rebuild = false;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -90,6 +90,41 @@ TextNode::
 ~TextNode() {
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: TextNode::set_text
+//       Access: Published
+//  Description: Changes the text that is displayed under the
+//               TextNode.
+////////////////////////////////////////////////////////////////////
+void TextNode::
+set_text(const string &text) {
+  _text = text;
+  switch (_encoding) {
+  case E_utf8:
+    {
+      StringUtf8Decoder decoder(_text);
+      decode_wtext(decoder);
+    }
+    break;
+
+  case E_unicode:
+    {
+      StringUnicodeDecoder decoder(_text);
+      decode_wtext(decoder);
+    }
+    break;
+
+  case E_iso8859:
+  default:
+    {
+      StringDecoder decoder(_text);
+      decode_wtext(decoder);
+    }
+  };
+
+  rebuild(true);
+}
+
 
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::write
@@ -242,21 +277,18 @@ generate() {
 
   root_arc->set_transition(new TransformTransition(mat));
 
-  string text = _text;
+  wstring wtext = _wtext;
   if (has_wordwrap()) {
-    text = wordwrap_to(text, _wordwrap_width, false);
+    wtext = _font->wordwrap_to(wtext, _wordwrap_width, false);
   }
 
-  StringDecoder *decoder = make_decoder(text);
-
   // Assemble the text.
   LVector2f ul, lr;
   int num_rows = 0;
-  PT_Node text_root = assemble_text(decoder, ul, lr, num_rows);
+  PT_Node text_root = assemble_text(wtext.begin(), wtext.end(), ul, lr, num_rows);
   RenderRelation *text_arc =
     new RenderRelation(sub_root, text_root, _draw_order + 2);
 
-  delete decoder;
 
   if (has_text_color()) {
     text_arc->set_transition(new ColorTransition(_text_color));
@@ -361,6 +393,120 @@ generate() {
   return root;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: TextNode::decode_wtext
+//       Access: Private
+//  Description: Decodes the eight-bit stream from the indicated
+//               decoder, storing the decoded unicode characters in
+//               _wtext.
+////////////////////////////////////////////////////////////////////
+void TextNode::
+decode_wtext(StringDecoder &decoder) {
+  _wtext.erase(_wtext.begin(), _wtext.end());
+  bool expand_amp = get_expand_amp();
+
+  wchar_t character = decoder.get_next_character();
+  while (!decoder.is_eof()) {
+    if (character == '&' && expand_amp) {
+      // An ampersand in expand_amp mode is treated as an escape
+      // character.
+      character = expand_amp_sequence(decoder);
+    }
+    _wtext += character;
+    character = decoder.get_next_character();
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextNode::expand_amp_sequence
+//       Access: Private
+//  Description: Given that we have just read an ampersand from the
+//               StringDecoder, and that we have expand_amp in effect
+//               and are therefore expected to expand the sequence
+//               that this ampersand begins into a single unicode
+//               character, do the expansion and return the character.
+////////////////////////////////////////////////////////////////////
+int TextNode::
+expand_amp_sequence(StringDecoder &decoder) {
+  int result = 0;
+
+  int character = decoder.get_next_character();
+  if (!decoder.is_eof() && character == '#') {
+    // An explicit numeric sequence: &#nnn;
+    result = 0;
+    character = decoder.get_next_character();
+    while (!decoder.is_eof() && character < 128 && isdigit(character)) {
+      result = (result * 10) + (character - '0');
+      character = decoder.get_next_character();
+    }
+    if (character != ';') {
+      // Invalid sequence.
+      return 0;
+    }
+
+    return result;
+  }
+
+  string sequence;
+  
+  // Some non-numeric sequence.
+  while (!decoder.is_eof() && character < 128 && isalpha(character)) {
+    sequence += character;
+    character = decoder.get_next_character();
+  }
+  if (character != ';') {
+    // Invalid sequence.
+    return 0;
+  }
+
+  static const struct {
+    const char *name;
+    int code;
+  } tokens[] = {
+    { "amp", '&' }, { "lt", '<' }, { "gt", '>' }, { "quot", '"' },
+    { "nbsp", ' ' /* 160 */ },
+
+    { "iexcl", 161 }, { "cent", 162 }, { "pound", 163 }, { "curren", 164 },
+    { "yen", 165 }, { "brvbar", 166 }, { "brkbar", 166 }, { "sect", 167 },
+    { "uml", 168 }, { "die", 168 }, { "copy", 169 }, { "ordf", 170 },
+    { "laquo", 171 }, { "not", 172 }, { "shy", 173 }, { "reg", 174 },
+    { "macr", 175 }, { "hibar", 175 }, { "deg", 176 }, { "plusmn", 177 },
+    { "sup2", 178 }, { "sup3", 179 }, { "acute", 180 }, { "micro", 181 },
+    { "para", 182 }, { "middot", 183 }, { "cedil", 184 }, { "sup1", 185 },
+    { "ordm", 186 }, { "raquo", 187 }, { "frac14", 188 }, { "frac12", 189 },
+    { "frac34", 190 }, { "iquest", 191 }, { "Agrave", 192 }, { "Aacute", 193 },
+    { "Acirc", 194 }, { "Atilde", 195 }, { "Auml", 196 }, { "Aring", 197 },
+    { "AElig", 198 }, { "Ccedil", 199 }, { "Egrave", 200 }, { "Eacute", 201 },
+    { "Ecirc", 202 }, { "Euml", 203 }, { "Igrave", 204 }, { "Iacute", 205 },
+    { "Icirc", 206 }, { "Iuml", 207 }, { "ETH", 208 }, { "Dstrok", 208 },
+    { "Ntilde", 209 }, { "Ograve", 210 }, { "Oacute", 211 }, { "Ocirc", 212 },
+    { "Otilde", 213 }, { "Ouml", 214 }, { "times", 215 }, { "Oslash", 216 },
+    { "Ugrave", 217 }, { "Uacute", 218 }, { "Ucirc", 219 }, { "Uuml", 220 },
+    { "Yacute", 221 }, { "THORN", 222 }, { "szlig", 223 }, { "agrave", 224 },
+    { "aacute", 225 }, { "acirc", 226 }, { "atilde", 227 }, { "auml", 228 },
+    { "aring", 229 }, { "aelig", 230 }, { "ccedil", 231 }, { "egrave", 232 },
+    { "eacute", 233 }, { "ecirc", 234 }, { "euml", 235 }, { "igrave", 236 },
+    { "iacute", 237 }, { "icirc", 238 }, { "iuml", 239 }, { "eth", 240 },
+    { "ntilde", 241 }, { "ograve", 242 }, { "oacute", 243 }, { "ocirc", 244 },
+    { "otilde", 245 }, { "ouml", 246 }, { "divide", 247 }, { "oslash", 248 },
+    { "ugrave", 249 }, { "uacute", 250 }, { "ucirc", 251 }, { "uuml", 252 },
+    { "yacute", 253 }, { "thorn", 254 }, { "yuml", 255 },
+
+    { NULL, 0 },
+  };
+
+  for (int i = 0; tokens[i].name != NULL; i++) {
+    if (sequence == tokens[i].name) {
+      // Here's a match.
+      return tokens[i].code;
+    }
+  }
+
+  // Some unrecognized sequence.
+  return 0;
+}
+
+
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::do_rebuild
 //       Access: Private
@@ -411,18 +557,14 @@ do_measure() {
     return;
   }
 
-  string text = _text;
+  wstring wtext = _wtext;
   if (has_wordwrap()) {
-    text = wordwrap_to(text, _wordwrap_width, false);
+    wtext = _font->wordwrap_to(wtext, _wordwrap_width, false);
   }
 
-  StringDecoder *decoder = make_decoder(text);
-
   LVector2f ul, lr;
   int num_rows = 0;
-  measure_text(decoder, ul, lr, num_rows);
-
-  delete decoder;
+  measure_text(wtext.begin(), wtext.end(), ul, lr, num_rows);
 
   _num_rows = num_rows;
   _ul2d = ul;
@@ -434,28 +576,7 @@ do_measure() {
   _lr3d = _lr3d * _transform;
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::make_decoder
-//       Access: Private
-//  Description: Creates and returns a new StringDecoder suitable for
-//               decoding the given input text, and corresponding to
-//               our input encoding type.  The decoder must be freed
-//               via delete later.
-////////////////////////////////////////////////////////////////////
-StringDecoder *TextNode::
-make_decoder(const string &text) {
-  switch (_encoding) {
-  case E_utf8:
-    return new StringUtf8Decoder(text);
-
-  case E_unicode:
-    return new StringUnicodeDecoder(text);
-
-  case E_iso8859:
-  default:
-    return new StringDecoder(text);
-  };
-}
+#ifndef CPPPARSER  // interrogate has a bit of trouble with wstring.
 
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::assemble_row
@@ -467,21 +588,13 @@ make_decoder(const string &text) {
 //               to the terminating character.
 ////////////////////////////////////////////////////////////////////
 float TextNode::
-assemble_row(StringDecoder *decoder, Node *dest) {
+assemble_row(wstring::iterator &si, const wstring::iterator &send, 
+             Node *dest) {
   nassertr(_font != (TextFont *)NULL, 0.0f);
 
   float xpos = 0.0f;
-  bool expand_amp = get_expand_amp();
-  if (decoder->is_eof()) {
-    return xpos;
-  }
-  int character = decoder->get_next_character();
-  while (character != '\n') {
-    if (character == '&' && expand_amp) {
-      // An ampersand in expand_amp mode is treated as an escape
-      // character.
-      character = expand_amp_sequence(decoder);
-    }
+  while (si != send && (*si) != '\n') {
+    wchar_t character = *si;
 
     if (character == ' ') {
       // A space is a special case.
@@ -523,10 +636,7 @@ assemble_row(StringDecoder *decoder, Node *dest) {
         xpos += glyph->get_advance() * glyph_scale;
       }
     }
-    if (decoder->is_eof()) {
-      return xpos;
-    }
-    character = decoder->get_next_character();
+    ++si;
   }
 
   return xpos;
@@ -540,8 +650,8 @@ assemble_row(StringDecoder *decoder, Node *dest) {
 //               returns it.  Also sets the ul, lr corners.
 ////////////////////////////////////////////////////////////////////
 Node *TextNode::
-assemble_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
-              int &num_rows) {
+assemble_text(wstring::iterator si, const wstring::iterator &send,
+              LVector2f &ul, LVector2f &lr, int &num_rows) {
   nassertr(_font != (TextFont *)NULL, (Node *)NULL);
   float line_height = get_line_height();
 
@@ -553,13 +663,17 @@ assemble_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
 
   float posy = 0.0f;
   int row_index = 0;
-  while (!decoder->is_eof()) {
+  while (si != send) {
     char numstr[20];
     sprintf(numstr, "row%d", row_index);
     nassertr(strlen(numstr) < 20, root_node);
 
     Node *row = new NamedNode(numstr);
-    float row_width = assemble_row(decoder, row);
+    float row_width = assemble_row(si, send, row);
+    if (si != send) {
+      // Skip past the newline.
+      ++si;
+    }
 
     LMatrix4f mat = LMatrix4f::ident_mat();
     if (_align == A_left) {
@@ -607,21 +721,10 @@ assemble_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
 //               it.
 ////////////////////////////////////////////////////////////////////
 float TextNode::
-measure_row(StringDecoder *decoder) {
-  nassertr(_font != (TextFont *)NULL, 0.0f);
-
+measure_row(wstring::iterator &si, const wstring::iterator &send) {
   float xpos = 0.0f;
-  bool expand_amp = get_expand_amp();
-  if (decoder->is_eof()) {
-    return xpos;
-  }
-  int character = decoder->get_next_character();
-  while (character != '\n') {
-    if (character == '&' && expand_amp) {
-      // An ampersand in expand_amp mode is treated as an escape
-      // character.
-      character = expand_amp_sequence(decoder);
-    }
+  while (si != send && *si != '\n') {
+    wchar_t character = *si;
 
     if (character == ' ') {
       // A space is a special case.
@@ -636,10 +739,7 @@ measure_row(StringDecoder *decoder) {
         xpos += glyph->get_advance() * glyph_scale;
       }
     }
-    if (decoder->is_eof()) {
-      return xpos;
-    }
-    character = decoder->get_next_character();
+    ++si;
   }
 
   return xpos;
@@ -652,8 +752,8 @@ measure_row(StringDecoder *decoder) {
 //               actually assembling it.
 ////////////////////////////////////////////////////////////////////
 void TextNode::
-measure_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
-             int &num_rows) {
+measure_text(wstring::iterator si, const wstring::iterator &send,
+             LVector2f &ul, LVector2f &lr, int &num_rows) {
   nassertv(_font != (TextFont *)NULL);
   float line_height = get_line_height();
 
@@ -661,8 +761,12 @@ measure_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
   lr.set(0.0f, 0.0f);
 
   float posy = 0.0f;
-  while (!decoder->is_eof()) {
-    float row_width = measure_row(decoder);
+  while (si != send) {
+    float row_width = measure_row(si, send);
+    if (si != send) {
+      // Skip past the newline.
+      ++si;
+    }
 
     if (_align == A_left) {
       lr[0] = max(lr[0], row_width);
@@ -683,95 +787,7 @@ measure_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
 
   lr[1] = posy + 0.8f * line_height;
 }
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::expand_amp_sequence
-//       Access: Private
-//  Description: Given that we have just read an ampersand from the
-//               StringDecoder, and that we have expand_amp in effect
-//               and are therefore expected to expand the sequence
-//               that this ampersand begins into a single unicode
-//               character, do the expansion and return the character.
-////////////////////////////////////////////////////////////////////
-int TextNode::
-expand_amp_sequence(StringDecoder *decoder) {
-  int result = 0;
-
-  int character = decoder->get_next_character();
-  if (character == '#') {
-    // An explicit numeric sequence: &#nnn;
-    result = 0;
-    character = decoder->get_next_character();
-    while (!decoder->is_eof() && character < 128 && isdigit(character)) {
-      result = (result * 10) + (character - '0');
-      character = decoder->get_next_character();
-    }
-    if (character != ';') {
-      // Invalid sequence.
-      return 0;
-    }
-
-    return result;
-  }
-
-  string sequence;
-  
-  // Some non-numeric sequence.
-  while (!decoder->is_eof() && character < 128 && isalpha(character)) {
-    sequence += character;
-    character = decoder->get_next_character();
-  }
-  if (character != ';') {
-    // Invalid sequence.
-    return 0;
-  }
-
-  static const struct {
-    const char *name;
-    int code;
-  } tokens[] = {
-    { "amp", '&' }, { "lt", '<' }, { "gt", '>' }, { "quot", '"' },
-    { "nbsp", ' ' /* 160 */ },
-
-    { "iexcl", 161 }, { "cent", 162 }, { "pound", 163 }, { "curren", 164 },
-    { "yen", 165 }, { "brvbar", 166 }, { "brkbar", 166 }, { "sect", 167 },
-    { "uml", 168 }, { "die", 168 }, { "copy", 169 }, { "ordf", 170 },
-    { "laquo", 171 }, { "not", 172 }, { "shy", 173 }, { "reg", 174 },
-    { "macr", 175 }, { "hibar", 175 }, { "deg", 176 }, { "plusmn", 177 },
-    { "sup2", 178 }, { "sup3", 179 }, { "acute", 180 }, { "micro", 181 },
-    { "para", 182 }, { "middot", 183 }, { "cedil", 184 }, { "sup1", 185 },
-    { "ordm", 186 }, { "raquo", 187 }, { "frac14", 188 }, { "frac12", 189 },
-    { "frac34", 190 }, { "iquest", 191 }, { "Agrave", 192 }, { "Aacute", 193 },
-    { "Acirc", 194 }, { "Atilde", 195 }, { "Auml", 196 }, { "Aring", 197 },
-    { "AElig", 198 }, { "Ccedil", 199 }, { "Egrave", 200 }, { "Eacute", 201 },
-    { "Ecirc", 202 }, { "Euml", 203 }, { "Igrave", 204 }, { "Iacute", 205 },
-    { "Icirc", 206 }, { "Iuml", 207 }, { "ETH", 208 }, { "Dstrok", 208 },
-    { "Ntilde", 209 }, { "Ograve", 210 }, { "Oacute", 211 }, { "Ocirc", 212 },
-    { "Otilde", 213 }, { "Ouml", 214 }, { "times", 215 }, { "Oslash", 216 },
-    { "Ugrave", 217 }, { "Uacute", 218 }, { "Ucirc", 219 }, { "Uuml", 220 },
-    { "Yacute", 221 }, { "THORN", 222 }, { "szlig", 223 }, { "agrave", 224 },
-    { "aacute", 225 }, { "acirc", 226 }, { "atilde", 227 }, { "auml", 228 },
-    { "aring", 229 }, { "aelig", 230 }, { "ccedil", 231 }, { "egrave", 232 },
-    { "eacute", 233 }, { "ecirc", 234 }, { "euml", 235 }, { "igrave", 236 },
-    { "iacute", 237 }, { "icirc", 238 }, { "iuml", 239 }, { "eth", 240 },
-    { "ntilde", 241 }, { "ograve", 242 }, { "oacute", 243 }, { "ocirc", 244 },
-    { "otilde", 245 }, { "ouml", 246 }, { "divide", 247 }, { "oslash", 248 },
-    { "ugrave", 249 }, { "uacute", 250 }, { "ucirc", 251 }, { "uuml", 252 },
-    { "yacute", 253 }, { "thorn", 254 }, { "yuml", 255 },
-
-    { NULL, 0 },
-  };
-
-  for (int i = 0; tokens[i].name != NULL; i++) {
-    if (sequence == tokens[i].name) {
-      // Here's a match.
-      return tokens[i].code;
-    }
-  }
-
-  // Some unrecognized sequence.
-  return 0;
-}
+#endif  // CPPPARSER
 
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::make_frame

+ 14 - 11
panda/src/text/textNode.h

@@ -186,7 +186,7 @@ PUBLISHED:
   INLINE void set_coordinate_system(CoordinateSystem cs);
   INLINE CoordinateSystem get_coordinate_system() const;
 
-  INLINE void set_text(const string &str);
+  void set_text(const string &str);
   INLINE void clear_text();
   INLINE bool has_text() const;
   INLINE string get_text() const;
@@ -218,19 +218,21 @@ PUBLISHED:
   PT_Node generate();
 
 private:
+  void decode_wtext(StringDecoder &decoder);
+  int expand_amp_sequence(StringDecoder &decoder);
+
   void do_rebuild();
   void do_measure();
 
-  StringDecoder *make_decoder(const string &text);
-
-  float assemble_row(StringDecoder *decoder, Node *dest);
-  Node *assemble_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
-                      int &num_rows);
-  float measure_row(StringDecoder *decoder);
-  void measure_text(StringDecoder *decoder, LVector2f &ul, LVector2f &lr,
-                    int &num_rows);
-
-  int expand_amp_sequence(StringDecoder *decoder);
+#ifndef CPPPARSER  // interrogate has a bit of trouble with wstring.
+  float assemble_row(wstring::iterator &si, const wstring::iterator &send, 
+                     Node *dest);
+  Node *assemble_text(wstring::iterator si, const wstring::iterator &send,
+                      LVector2f &ul, LVector2f &lr, int &num_rows);
+  float measure_row(wstring::iterator &si, const wstring::iterator &send);
+  void measure_text(wstring::iterator si, const wstring::iterator &send,
+                    LVector2f &ul, LVector2f &lr, int &num_rows);
+#endif  // CPPPARSER
 
   Node *make_frame();
   Node *make_card();
@@ -280,6 +282,7 @@ private:
   CoordinateSystem _coordinate_system;
 
   string _text;
+  wstring _wtext;
 
   LPoint2f _ul2d, _lr2d;
   LPoint3f _ul3d, _lr3d;