2
0
Эх сурвалжийг харах

split out TextEncoder from TextNode

David Rose 23 жил өмнө
parent
commit
38db0b7a33

+ 11 - 1
panda/src/express/Sources.pp

@@ -47,7 +47,9 @@
     reversedNumericData.I reversedNumericData.h \
     selectThreadImpl.h \
     streamReader.I streamReader.h streamWriter.I streamWriter.h \
+    stringDecoder.h stringDecoder.I \
     subStream.I subStream.h subStreamBuf.h \
+    textEncoder.h textEncoder.I \
     threadDummyImpl.h threadDummyImpl.I thread.h thread.I threadImpl.h \
     threadNsprImpl.h threadNsprImpl.I threadPriority.h \
     tokenBoard.I \
@@ -56,6 +58,7 @@
     typedReferenceCount.I typedReferenceCount.h typedef.h \
     typeRegistry.I typeRegistry.h \
     typeRegistryNode.I typeRegistryNode.h \
+    unicodeLatinMap.h \
     vector_uchar.h \
     virtualFileComposite.h virtualFileComposite.I virtualFile.h \
     virtualFile.I virtualFileList.I virtualFileList.h virtualFileMount.h \
@@ -87,11 +90,15 @@
     pta_uchar.cxx referenceCount.cxx register_type.cxx \
     reversedNumericData.cxx \
     streamReader.cxx streamWriter.cxx \
+    stringDecoder.cxx \
     subStream.cxx subStreamBuf.cxx \
+    textEncoder.cxx \
     thread.cxx threadDummyImpl.cxx threadNsprImpl.cxx \
     trueClock.cxx typeHandle.cxx \
     typedObject.cxx typedReferenceCount.cxx \
-    typeRegistry.cxx typeRegistryNode.cxx vector_uchar.cxx \
+    typeRegistry.cxx typeRegistryNode.cxx \
+    unicodeLatinMap.cxx \
+    vector_uchar.cxx \
     virtualFileComposite.cxx virtualFile.cxx virtualFileList.cxx \
     virtualFileMount.cxx \
     virtualFileMountMultifile.cxx virtualFileMountSystem.cxx \
@@ -132,7 +139,9 @@
     reversedNumericData.I reversedNumericData.h \
     selectThreadImpl.h \
     streamReader.I streamReader.h streamWriter.I streamWriter.h \
+    stringDecoder.h stringDecoder.I \
     subStream.I subStream.h subStreamBuf.h \
+    textEncoder.h textEncoder.I \
     threadDummyImpl.h threadDummyImpl.I thread.h thread.I threadImpl.h \
     threadNsprImpl.h threadNsprImpl.I threadPriority.h \
     tokenBoard.I \
@@ -141,6 +150,7 @@
     typedReferenceCount.h typedef.h \
     typeRegistry.I typeRegistry.h \
     typeRegistryNode.I typeRegistryNode.h \
+    unicodeLatinMap.h \
     vector_uchar.h \
     virtualFileComposite.h virtualFileComposite.I virtualFile.h \
     virtualFile.I virtualFileList.I virtualFileList.h virtualFileMount.h \

+ 33 - 0
panda/src/express/config_express.cxx

@@ -20,6 +20,7 @@
 #include "config_express.h"
 #include "datagram.h"
 #include "referenceCount.h"
+#include "textEncoder.h"
 #include "thread.h"
 #include "typedObject.h"
 #include "typedReferenceCount.h"
@@ -39,8 +40,28 @@ NotifyCategoryDef(thread, "");
 extern void init_system_type_handles();
 
 ConfigureFn(config_express) {
+  init_libexpress();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: init_libexpress
+//  Description: Initializes the library.  This must be called at
+//               least once before any of the functions or classes in
+//               this library can be used.  Normally it will be
+//               called by the static initializers and need not be
+//               called explicitly, but special cases exist.
+////////////////////////////////////////////////////////////////////
+void
+init_libexpress() {
+  static bool initialized = false;
+  if (initialized) {
+    return;
+  }
+  initialized = true;
+
   Datagram::init_type();
   ReferenceCount::init_type();
+  TextEncoder::init_type();
   Thread::init_type();
   TypedObject::init_type();
   TypedReferenceCount::init_type();
@@ -52,6 +73,18 @@ ConfigureFn(config_express) {
   VirtualFileSimple::init_type();
 
   init_system_type_handles();
+
+  string text_encoding = config_express.GetString("text-encoding", "iso8859");
+  if (text_encoding == "iso8859") {
+    TextEncoder::set_default_encoding(TextEncoder::E_iso8859);
+  } else if (text_encoding == "utf8") {
+    TextEncoder::set_default_encoding(TextEncoder::E_utf8);
+  } else if (text_encoding == "unicode") {
+    TextEncoder::set_default_encoding(TextEncoder::E_unicode);
+  } else {
+    express_cat.error()
+      << "Invalid text-encoding: " << text_encoding << "\n";
+  }
 }
 
 

+ 2 - 0
panda/src/express/config_express.h

@@ -56,4 +56,6 @@ typedef Config::Config<ConfigureGetConfig_config_express> ConfigExpress;
 EXPCL_PANDAEXPRESS ConfigExpress &get_config_express();
 END_PUBLISH
 
+extern EXPCL_PANDAEXPRESS void init_libexpress();
+
 #endif /* __CONFIG_UTIL_H__ */

+ 3 - 0
panda/src/express/express_composite2.cxx

@@ -2,8 +2,10 @@
 #include "reversedNumericData.cxx"
 #include "streamReader.cxx"
 #include "streamWriter.cxx"
+#include "stringDecoder.cxx"
 #include "subStream.cxx"
 #include "subStreamBuf.cxx"
+#include "textEncoder.cxx"
 #include "thread.cxx"
 #include "threadDummyImpl.cxx"
 #include "threadNsprImpl.cxx"
@@ -13,6 +15,7 @@
 #include "typedReferenceCount.cxx"
 #include "typeRegistry.cxx"
 #include "typeRegistryNode.cxx"
+#include "unicodeLatinMap.cxx"
 #include "vector_uchar.cxx"
 #include "virtualFile.cxx"
 #include "virtualFileComposite.cxx"

+ 0 - 0
panda/src/text/stringDecoder.I → panda/src/express/stringDecoder.I


+ 6 - 6
panda/src/text/stringDecoder.cxx → panda/src/express/stringDecoder.cxx

@@ -17,7 +17,7 @@
 ////////////////////////////////////////////////////////////////////
 
 #include "stringDecoder.h"
-#include "config_text.h"
+#include "config_express.h"
 
 ////////////////////////////////////////////////////////////////////
 //     Function: StringDecoder::Destructor
@@ -83,7 +83,7 @@ get_next_character() {
       // First byte of two.
       unsigned int two = 0;
       if (test_eof()) {
-        text_cat.warning()
+        express_cat.warning()
           << "utf-8 encoded string ends abruptly.\n";
         return -1;
       }
@@ -94,13 +94,13 @@ get_next_character() {
     } else if ((result & 0xf0) == 0xe0) {
       // First byte of three.
       if (test_eof()) {
-        text_cat.warning()
+        express_cat.warning()
           << "utf-8 encoded string ends abruptly.\n";
         return -1;
       }
       unsigned int two = (unsigned char)_input[_p++];
       if (test_eof()) {
-        text_cat.warning()
+        express_cat.warning()
           << "utf-8 encoded string ends abruptly.\n";
         return -1;
       }
@@ -111,7 +111,7 @@ get_next_character() {
 
     // Otherwise--the high bit is set but it is not one of the
     // introductory utf-8 bytes--we have an error.
-    text_cat.warning()
+    express_cat.warning()
       << "Non utf-8 byte in string: 0x" << hex << result << dec << "\n";
   }
 
@@ -132,7 +132,7 @@ get_next_character() {
 
   unsigned int high = (unsigned char)_input[_p++];
   if (test_eof()) {
-    text_cat.warning()
+    express_cat.warning()
       << "Unicode-encoded string has odd number of bytes.\n";
     return -1;
   }

+ 0 - 0
panda/src/text/stringDecoder.h → panda/src/express/stringDecoder.h


+ 353 - 0
panda/src/express/textEncoder.I

@@ -0,0 +1,353 @@
+// Filename: textEncoder.I
+// Created by:  drose (26Mar03)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://www.panda3d.org/license.txt .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::Constructor
+//       Access: Published
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE TextEncoder::
+TextEncoder() {
+  _encoding = _default_encoding;
+  
+  // Initially, since the text string is empty, we know that both
+  // _text and _wtext accurately reflect the empty state; so we "got"
+  // both of them.
+  _flags = (F_got_text | F_got_wtext);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::set_encoding
+//       Access: Published
+//  Description: Specifies how the string set via set_text() is to be
+//               interpreted.  The default, E_iso8859, means a
+//               standard string with one-byte characters
+//               (i.e. ASCII).  Other encodings are possible to take
+//               advantage of character sets with more than 256
+//               characters.
+//
+//               This affects only future calls to set_text(); it does
+//               not change text that was set previously.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+set_encoding(TextEncoder::Encoding encoding) {
+  // Force the previously-set strings to be encoded or decoded now.
+  get_text();
+  get_wtext();
+  _encoding = encoding;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_encoding
+//       Access: Published
+//  Description: Returns the encoding by which the string set via
+//               set_text() is to be interpreted.  See set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE TextEncoder::Encoding TextEncoder::
+get_encoding() const {
+  return _encoding;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::set_default_encoding
+//       Access: Published, Static
+//  Description: Specifies the default encoding to be used for all
+//               subsequently created TextEncoder objects.  See
+//               set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+set_default_encoding(TextEncoder::Encoding encoding) {
+  _default_encoding = encoding;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_default_encoding
+//       Access: Published, Static
+//  Description: Specifies the default encoding to be used for all
+//               subsequently created TextEncoder objects.  See
+//               set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE TextEncoder::Encoding TextEncoder::
+get_default_encoding() {
+  return _default_encoding;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::set_text
+//       Access: Published
+//  Description: Changes the text that is stored in the encoder.  The
+//               text should be encoded according to the method
+//               indicated by set_encoding().  Subsequent calls to
+//               get_text() will return this same string, while
+//               get_wtext() will return the decoded version of the
+//               string.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+set_text(const string &text) {
+  if (!has_text() || _text != text) {
+    _text = text;
+    _flags = (_flags | F_got_text) & ~F_got_wtext;
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::set_text
+//       Access: Published
+//  Description: The two-parameter version of set_text() accepts an
+//               explicit encoding; the text is immediately decoded
+//               and stored as a wide-character string.  Subsequent
+//               calls to get_text() will return the same text
+//               re-encoded using whichever encoding is specified by
+//               set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+set_text(const string &text, TextEncoder::Encoding encoding) {
+  set_wtext(decode_text(text, encoding));
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::clear_text
+//       Access: Published
+//  Description: Removes the text from the TextEncoder.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+clear_text() {
+  _text = string();
+  _wtext = wstring();
+  _flags |= (F_got_text | F_got_wtext);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::has_text
+//       Access: Published
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE bool TextEncoder::
+has_text() const {
+  if (_flags & F_got_wtext) {
+    return !_wtext.empty();
+  } else {
+    return !_text.empty();
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_text
+//       Access: Published
+//  Description: Returns the current text, as encoded via the current
+//               encoding system.
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+get_text() const {
+  if ((_flags & F_got_text) == 0) {
+    ((TextEncoder *)this)->_text = encode_wtext(_wtext);
+    ((TextEncoder *)this)->_flags |= F_got_text;
+  }
+  return _text;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_text
+//       Access: Published
+//  Description: Returns the current text, as encoded via the indicated
+//               encoding system.
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+get_text(TextEncoder::Encoding encoding) const {
+  return encode_wtext(get_wtext(), encoding);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::append_text
+//       Access: Published
+//  Description: Appends the indicates string to the end of the stored
+//               text.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+append_text(const string &text) {
+  _text = get_text() + text;
+  _flags = (_flags | F_got_text) & ~F_got_wtext;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::append_unicode_char
+//       Access: Published
+//  Description: Appends a single character to the end of the stored
+//               text.  This may be a wide character, up to 16 bits in
+//               Unicode.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+append_unicode_char(int character) {
+  _wtext = get_wtext() + wstring(1, (wchar_t)character);
+  _flags = (_flags | F_got_wtext) & ~F_got_text;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_num_chars
+//       Access: Published
+//  Description: Returns the number of characters in the stored text.
+//               This is a count of wide characters, after the string
+//               has been decoded according to set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE int TextEncoder::
+get_num_chars() const {
+  return get_wtext().length();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_unicode_char
+//       Access: Published
+//  Description: Returns the Unicode value of the nth character in the
+//               stored text.  This may be a wide character (greater
+//               than 255), after the string has been decoded
+//               according to set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE int TextEncoder::
+get_unicode_char(int index) const {
+  get_wtext();
+  nassertr(index >= 0 && index < (int)_wtext.length(), 0);
+  return _wtext[index];
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_encoded_char
+//       Access: Published
+//  Description: Returns the nth char of the stored text, as a one-,
+//               two-, or three-byte encoded string.
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+get_encoded_char(int index) const {
+  return get_encoded_char(index, get_encoding());
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_encoded_char
+//       Access: Published
+//  Description: Returns the nth char of the stored text, as a one-,
+//               two-, or three-byte encoded string.
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+get_encoded_char(int index, TextEncoder::Encoding encoding) const {
+  wstring wch(1, (wchar_t)get_unicode_char(index));
+  return encode_wtext(wch, encoding);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_text_as_ascii
+//       Access: Published
+//  Description: Returns the text associated with the node, converted
+//               as nearly as possible to a fully-ASCII
+//               representation.  This means replacing accented
+//               letters with their unaccented ASCII equivalents.
+//
+//               It is possible that some characters in the string
+//               cannot be converted to ASCII.  (The string may
+//               involve symbols like the copyright symbol, for
+//               instance, or it might involve letters in some other
+//               alphabet such as Greek or Cyrillic, or even Latin
+//               letters like thorn or eth that are not part of the
+//               ASCII character set.)  In this case, as much of the
+//               string as possible will be converted to ASCII, and
+//               the nonconvertible characters will remain encoded in
+//               the encoding specified by set_encoding().
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+get_text_as_ascii() const {
+  return encode_wtext(get_wtext_as_ascii());
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::reencode_text
+//       Access: Published, Static
+//  Description: Given the indicated text string, which is assumed to
+//               be encoded via the encoding "from", decodes it and
+//               then reencodes it into the encoding "to", and returns
+//               the newly encoded string.  This does not change or
+//               affect any properties on the TextEncoder itself.
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+reencode_text(const string &text, TextEncoder::Encoding from, 
+              TextEncoder::Encoding to) {
+  return encode_wtext(decode_text(text, from), to);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::set_wtext
+//       Access: Public
+//  Description: Changes the text that is stored in the encoder.
+//               Subsequent calls to get_wtext() will return this same
+//               string, while get_text() will return the encoded
+//               version of the string.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+set_wtext(const wstring &wtext) {
+  if (!has_text() || _wtext != wtext) {
+    _wtext = wtext;
+    _flags = (_flags | F_got_wtext) & ~F_got_text;
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_wtext
+//       Access: Public
+//  Description: Returns the text associated with the TextEncoder, as
+//               a wide-character string.
+////////////////////////////////////////////////////////////////////
+INLINE const wstring &TextEncoder::
+get_wtext() const {
+  if ((_flags & F_got_wtext) == 0) {
+    ((TextEncoder *)this)->_wtext = decode_text(_text);
+    ((TextEncoder *)this)->_flags |= F_got_wtext;
+  }
+  return _wtext;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::append_wtext
+//       Access: Public
+//  Description: Appends the indicates string to the end of the stored
+//               wide-character text.
+////////////////////////////////////////////////////////////////////
+INLINE void TextEncoder::
+append_wtext(const wstring &wtext) {
+  _wtext = get_wtext() + wtext;
+  _flags = (_flags | F_got_wtext) & ~F_got_text;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::encode_wtext
+//       Access: Public
+//  Description: Encodes a wide-text string into a single-char string,
+//               according to the current encoding.
+////////////////////////////////////////////////////////////////////
+INLINE string TextEncoder::
+encode_wtext(const wstring &wtext) const {
+  return encode_wtext(wtext, _encoding);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::decode_text
+//       Access: Public
+//  Description: Returns the given wstring decoded to a single-byte
+//               string, via the current encoding system.
+////////////////////////////////////////////////////////////////////
+INLINE wstring TextEncoder::
+decode_text(const string &text) const {
+  return decode_text(text, _encoding);
+}

+ 289 - 0
panda/src/express/textEncoder.cxx

@@ -0,0 +1,289 @@
+// Filename: textEncoder.cxx
+// Created by:  drose (26Mar03)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://www.panda3d.org/license.txt .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+#include "textEncoder.h"
+#include "stringDecoder.h"
+#include "unicodeLatinMap.h"
+
+TypeHandle TextEncoder::_type_handle;
+TextEncoder::Encoding TextEncoder::_default_encoding;
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::get_wtext_as_ascii
+//       Access: Published
+//  Description: Returns the text associated with the node, converted
+//               as nearly as possible to a fully-ASCII
+//               representation.  This means replacing accented
+//               letters with their unaccented ASCII equivalents.
+//
+//               It is possible that some characters in the string
+//               cannot be converted to ASCII.  (The string may
+//               involve symbols like the copyright symbol, for
+//               instance, or it might involve letters in some other
+//               alphabet such as Greek or Cyrillic, or even Latin
+//               letters like thorn or eth that are not part of the
+//               ASCII character set.)  In this case, as much of the
+//               string as possible will be converted to ASCII, and
+//               the nonconvertible characters will remain in their
+//               original form.
+////////////////////////////////////////////////////////////////////
+wstring TextEncoder::
+get_wtext_as_ascii() const {
+  get_wtext();
+  wstring result;
+  wstring::const_iterator si;
+  for (si = _wtext.begin(); si != _wtext.end(); ++si) {
+    wchar_t character = (*si);
+
+    const UnicodeLatinMap::Entry *map_entry = 
+      UnicodeLatinMap::look_up(character);
+    if (map_entry != NULL && map_entry->_ascii_equiv != 0) {
+      result += (wchar_t)map_entry->_ascii_equiv;
+      if (map_entry->_ascii_additional != 0) {
+        result += (wchar_t)map_entry->_ascii_additional;
+      }
+
+    } else {
+      result += character;
+    }
+  }
+
+  return result;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::encode_wchar
+//       Access: Public, Static
+//  Description: Encodes a single wide char into a one-, two-, or
+//               three-byte string, according to the given encoding
+//               system.
+////////////////////////////////////////////////////////////////////
+string TextEncoder::
+encode_wchar(wchar_t ch, TextEncoder::Encoding encoding) {
+  switch (encoding) {
+  case E_iso8859:
+    if (ch < 0x100) {
+      return string(1, (char)ch);
+    } else {
+      // The character won't fit in the 8-bit ISO 8859.  See if we can
+      // make it fit by reducing it to its ascii equivalent
+      // (essentially stripping off an unusual accent mark).
+      const UnicodeLatinMap::Entry *map_entry = 
+        UnicodeLatinMap::look_up(ch);
+      if (map_entry != NULL && map_entry->_ascii_equiv != 0) {
+        // Yes, it has an ascii equivalent.
+        if (map_entry->_ascii_additional != 0) {
+          // In fact, it has two of them.
+          return
+            string(1, map_entry->_ascii_equiv) +
+            string(1, map_entry->_ascii_additional);
+        }
+        return string(1, map_entry->_ascii_equiv);
+      }
+      // Nope; return "." for lack of anything better.
+      return ".";
+    }
+
+  case E_utf8:
+    if (ch < 0x80) {
+      return string(1, (char)ch);
+    } else if (ch < 0x800) {
+      return 
+        string(1, (char)((ch >> 6) | 0xc0)) +
+        string(1, (char)((ch & 0x3f) | 0x80));
+    } else {
+      return 
+        string(1, (char)((ch >> 12) | 0xe0)) +
+        string(1, (char)(((ch >> 6) & 0x3f) | 0x80)) +
+        string(1, (char)((ch & 0x3f) | 0x80));
+    }
+
+  case E_unicode:
+    return
+      string(1, (char)(ch >> 8)) + 
+      string(1, (char)(ch & 0xff));
+  }
+
+  return "";
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::encode_wtext
+//       Access: Public, Static
+//  Description: Encodes a wide-text string into a single-char string,
+//               according to the given encoding.
+////////////////////////////////////////////////////////////////////
+string TextEncoder::
+encode_wtext(const wstring &wtext, TextEncoder::Encoding encoding) {
+  string result;
+
+  for (wstring::const_iterator pi = wtext.begin(); pi != wtext.end(); ++pi) {
+    result += encode_wchar(*pi, encoding);
+  }
+
+  return result;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::decode_text
+//       Access: Public, Static
+//  Description: Returns the given wstring decoded to a single-byte
+//               string, via the given encoding system.
+////////////////////////////////////////////////////////////////////
+wstring TextEncoder::
+decode_text(const string &text, TextEncoder::Encoding encoding) {
+  switch (encoding) {
+  case E_utf8:
+    {
+      StringUtf8Decoder decoder(text);
+      return decode_text_impl(decoder);
+    }
+
+  case E_unicode:
+    {
+      StringUnicodeDecoder decoder(text);
+      return decode_text_impl(decoder);
+    }
+
+  case E_iso8859:
+  default:
+    {
+      StringDecoder decoder(text);
+      return decode_text_impl(decoder);
+    }
+  };
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::decode_text_impl
+//       Access: Private, Static
+//  Description: Decodes the eight-bit stream from the indicated
+//               decoder, returning the decoded wide-char string.
+////////////////////////////////////////////////////////////////////
+wstring TextEncoder::
+decode_text_impl(StringDecoder &decoder) {
+  wstring result;
+  //  bool expand_amp = get_expand_amp();
+
+  wchar_t character = decoder.get_next_character();
+  while (!decoder.is_eof()) {
+    /*
+    if (character == '&' && expand_amp) {
+      // An ampersand in expand_amp mode is treated as an escape
+      // character.
+      character = expand_amp_sequence(decoder);
+    }
+    */
+    result += character;
+    character = decoder.get_next_character();
+  }
+
+  return result;
+}
+
+/*
+////////////////////////////////////////////////////////////////////
+//     Function: TextEncoder::expand_amp_sequence
+//       Access: Private
+//  Description: Given that we have just read an ampersand from the
+//               StringDecoder, and that we have expand_amp in effect
+//               and are therefore expected to expand the sequence
+//               that this ampersand begins into a single unicode
+//               character, do the expansion and return the character.
+////////////////////////////////////////////////////////////////////
+int TextEncoder::
+expand_amp_sequence(StringDecoder &decoder) const {
+  int result = 0;
+
+  int character = decoder.get_next_character();
+  if (!decoder.is_eof() && character == '#') {
+    // An explicit numeric sequence: &#nnn;
+    result = 0;
+    character = decoder.get_next_character();
+    while (!decoder.is_eof() && character < 128 && isdigit((unsigned int)character)) {
+      result = (result * 10) + (character - '0');
+      character = decoder.get_next_character();
+    }
+    if (character != ';') {
+      // Invalid sequence.
+      return 0;
+    }
+
+    return result;
+  }
+
+  string sequence;
+  
+  // Some non-numeric sequence.
+  while (!decoder.is_eof() && character < 128 && isalpha((unsigned int)character)) {
+    sequence += character;
+    character = decoder.get_next_character();
+  }
+  if (character != ';') {
+    // Invalid sequence.
+    return 0;
+  }
+
+  static const struct {
+    const char *name;
+    int code;
+  } tokens[] = {
+    { "amp", '&' }, { "lt", '<' }, { "gt", '>' }, { "quot", '"' },
+    { "nbsp", ' ' },
+
+    { "iexcl", 161 }, { "cent", 162 }, { "pound", 163 }, { "curren", 164 },
+    { "yen", 165 }, { "brvbar", 166 }, { "brkbar", 166 }, { "sect", 167 },
+    { "uml", 168 }, { "die", 168 }, { "copy", 169 }, { "ordf", 170 },
+    { "laquo", 171 }, { "not", 172 }, { "shy", 173 }, { "reg", 174 },
+    { "macr", 175 }, { "hibar", 175 }, { "deg", 176 }, { "plusmn", 177 },
+    { "sup2", 178 }, { "sup3", 179 }, { "acute", 180 }, { "micro", 181 },
+    { "para", 182 }, { "middot", 183 }, { "cedil", 184 }, { "sup1", 185 },
+    { "ordm", 186 }, { "raquo", 187 }, { "frac14", 188 }, { "frac12", 189 },
+    { "frac34", 190 }, { "iquest", 191 }, { "Agrave", 192 }, { "Aacute", 193 },
+    { "Acirc", 194 }, { "Atilde", 195 }, { "Auml", 196 }, { "Aring", 197 },
+    { "AElig", 198 }, { "Ccedil", 199 }, { "Egrave", 200 }, { "Eacute", 201 },
+    { "Ecirc", 202 }, { "Euml", 203 }, { "Igrave", 204 }, { "Iacute", 205 },
+    { "Icirc", 206 }, { "Iuml", 207 }, { "ETH", 208 }, { "Dstrok", 208 },
+    { "Ntilde", 209 }, { "Ograve", 210 }, { "Oacute", 211 }, { "Ocirc", 212 },
+    { "Otilde", 213 }, { "Ouml", 214 }, { "times", 215 }, { "Oslash", 216 },
+    { "Ugrave", 217 }, { "Uacute", 218 }, { "Ucirc", 219 }, { "Uuml", 220 },
+    { "Yacute", 221 }, { "THORN", 222 }, { "szlig", 223 }, { "agrave", 224 },
+    { "aacute", 225 }, { "acirc", 226 }, { "atilde", 227 }, { "auml", 228 },
+    { "aring", 229 }, { "aelig", 230 }, { "ccedil", 231 }, { "egrave", 232 },
+    { "eacute", 233 }, { "ecirc", 234 }, { "euml", 235 }, { "igrave", 236 },
+    { "iacute", 237 }, { "icirc", 238 }, { "iuml", 239 }, { "eth", 240 },
+    { "ntilde", 241 }, { "ograve", 242 }, { "oacute", 243 }, { "ocirc", 244 },
+    { "otilde", 245 }, { "ouml", 246 }, { "divide", 247 }, { "oslash", 248 },
+    { "ugrave", 249 }, { "uacute", 250 }, { "ucirc", 251 }, { "uuml", 252 },
+    { "yacute", 253 }, { "thorn", 254 }, { "yuml", 255 },
+
+    { NULL, 0 },
+  };
+
+  for (int i = 0; tokens[i].name != NULL; i++) {
+    if (sequence == tokens[i].name) {
+      // Here's a match.
+      return tokens[i].code;
+    }
+  }
+
+  // Some unrecognized sequence.
+  return 0;
+}
+*/
+

+ 112 - 0
panda/src/express/textEncoder.h

@@ -0,0 +1,112 @@
+// Filename: textEncoder.h
+// Created by:  drose (26Mar03)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://www.panda3d.org/license.txt .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef TEXTENCODER_H
+#define TEXTENCODER_H
+
+#include "pandabase.h"
+
+class StringDecoder;
+
+////////////////////////////////////////////////////////////////////
+//       Class : TextEncoder
+// Description : This class can be used to convert text between
+//               multiple representations, e.g. utf-8 to Unicode.  You
+//               may use it as a static class object, passing the
+//               encoding each time, or you may create an instance and
+//               use that object, which will record the current
+//               encoding and retain the current string.
+//
+//               This class is also a base class of TextNode, which
+//               inherits this functionality.
+////////////////////////////////////////////////////////////////////
+class EXPCL_PANDAEXPRESS TextEncoder {
+PUBLISHED:
+  enum Encoding {
+    E_iso8859,
+    E_utf8,
+    E_unicode
+  };
+
+  INLINE TextEncoder();
+
+  INLINE void set_encoding(Encoding encoding);
+  INLINE Encoding get_encoding() const;
+
+  INLINE static void set_default_encoding(Encoding encoding);
+  INLINE static Encoding get_default_encoding();
+
+  INLINE void set_text(const string &text);
+  INLINE void set_text(const string &text, Encoding encoding);
+  INLINE void clear_text();
+  INLINE bool has_text() const;
+  INLINE string get_text() const;
+  INLINE string get_text(Encoding encoding) const;
+  INLINE void append_text(const string &text);
+  INLINE void append_unicode_char(int character);
+  INLINE int get_num_chars() const;
+  INLINE int get_unicode_char(int index) const;
+  INLINE string get_encoded_char(int index) const;
+  INLINE string get_encoded_char(int index, Encoding encoding) const;
+  INLINE string get_text_as_ascii() const;
+
+  INLINE static string reencode_text(const string &text, Encoding from, Encoding to);
+
+public:
+  // Direct support for wide-character strings.  Not publishable for
+  // now (until we add support for wstring to interrogate).
+  INLINE void set_wtext(const wstring &wtext);
+  INLINE const wstring &get_wtext() const;
+  INLINE void append_wtext(const wstring &text);
+  wstring get_wtext_as_ascii() const;
+
+  static string encode_wchar(wchar_t ch, Encoding encoding);
+  INLINE string encode_wtext(const wstring &wtext) const;
+  static string encode_wtext(const wstring &wtext, Encoding encoding);
+  INLINE wstring decode_text(const string &text) const;
+  static wstring decode_text(const string &text, Encoding encoding);
+
+private:
+  enum Flags {
+    F_got_text         =  0x0001,
+    F_got_wtext        =  0x0002,
+  };
+  static wstring decode_text_impl(StringDecoder &decoder);
+
+  int _flags;
+  Encoding _encoding;
+  string _text;
+  wstring _wtext;
+
+  static Encoding _default_encoding;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    register_type(_type_handle, "TextEncoder");
+  }
+
+private:
+  static TypeHandle _type_handle;
+};
+
+#include "textEncoder.I"
+
+#endif

+ 0 - 0
panda/src/text/unicodeLatinMap.cxx → panda/src/express/unicodeLatinMap.cxx


+ 0 - 0
panda/src/text/unicodeLatinMap.h → panda/src/express/unicodeLatinMap.h


+ 3 - 9
panda/src/text/Sources.pp

@@ -20,11 +20,9 @@
     fontPool.I fontPool.h \
     geomTextGlyph.I geomTextGlyph.h \
     staticTextFont.I staticTextFont.h \
-    stringDecoder.I stringDecoder.h \
     textFont.I textFont.h \
     textGlyph.I textGlyph.h \
-    textNode.I textNode.h textNode.cxx \
-    unicodeLatinMap.h
+    textNode.I textNode.h textNode.cxx
 
   #define INCLUDED_SOURCES \
     config_text.cxx \
@@ -34,10 +32,8 @@
     dynamicTextPage.cxx \
     fontPool.cxx \
     geomTextGlyph.cxx \
-    stringDecoder.cxx \
     staticTextFont.cxx \
-    textFont.cxx textGlyph.cxx \
-    unicodeLatinMap.cxx
+    textFont.cxx textGlyph.cxx
 
   #define INSTALL_HEADERS \
     config_text.h \
@@ -47,11 +43,9 @@
     fontPool.I fontPool.h \
     geomTextGlyph.I geomTextGlyph.h \
     staticTextFont.I staticTextFont.h \
-    stringDecoder.I stringDecoder.h \
     textFont.I textFont.h \
     textGlyph.I textGlyph.h \
-    textNode.I textNode.h \
-    unicodeLatinMap.h
+    textNode.I textNode.h
 
 
   #define IGATESCAN all

+ 0 - 12
panda/src/text/config_text.cxx

@@ -79,18 +79,6 @@ init_libtext() {
   GeomTextGlyph::register_with_read_factory();
 #endif
 
-  string text_encoding = config_text.GetString("text-encoding", "iso8859");
-  if (text_encoding == "iso8859") {
-    TextNode::set_default_encoding(TextNode::E_iso8859);
-  } else if (text_encoding == "utf8") {
-    TextNode::set_default_encoding(TextNode::E_utf8);
-  } else if (text_encoding == "unicode") {
-    TextNode::set_default_encoding(TextNode::E_unicode);
-  } else {
-    text_cat.error()
-      << "Invalid text-encoding: " << text_encoding << "\n";
-  }
-
   // FT_linear_mipmap_nearest (that is, choose the nearest mipmap
   // level and bilinear filter the pixels from there) gives us some
   // mipmapping to avoid dropping pixels, but avoids the hideous

+ 10 - 276
panda/src/text/textNode.I

@@ -98,96 +98,6 @@ get_default_font() {
   return _default_font;
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::set_encoding
-//       Access: Published
-//  Description: Specifies how the string set via set_text() is to be
-//               interpreted.  The default, E_iso8859, means a
-//               standard string with one-byte characters
-//               (i.e. ASCII).  Other encodings are possible to take
-//               advantage of character sets with more than 256
-//               characters.
-//
-//               This affects only future calls to set_text(); it does
-//               not change text that was set previously.
-////////////////////////////////////////////////////////////////////
-INLINE void TextNode::
-set_encoding(TextNode::Encoding encoding) {
-  // Force the previously-set strings to be encoded or decoded now.
-  get_text();
-  get_wtext();
-  _encoding = encoding;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_encoding
-//       Access: Published
-//  Description: Returns the encoding by which the string set via
-//               set_text() is to be interpreted.  See set_encoding().
-////////////////////////////////////////////////////////////////////
-INLINE TextNode::Encoding TextNode::
-get_encoding() const {
-  return _encoding;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::set_default_encoding
-//       Access: Published, Static
-//  Description: Specifies the default encoding to be used for all
-//               subsequently created TextNode objects.  See
-//               set_encoding().
-////////////////////////////////////////////////////////////////////
-INLINE void TextNode::
-set_default_encoding(TextNode::Encoding encoding) {
-  _default_encoding = encoding;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_default_encoding
-//       Access: Published, Static
-//  Description: Specifies the default encoding to be used for all
-//               subsequently created TextNode objects.  See
-//               set_encoding().
-////////////////////////////////////////////////////////////////////
-INLINE TextNode::Encoding TextNode::
-get_default_encoding() {
-  return _default_encoding;
-}
-
-/*
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::set_expand_amp
-//       Access: Published
-//  Description: Sets the state of the expand_amp flag.  When this is
-//               true, embedded ampersands in the text string are
-//               expanded to special characters according to a subset
-//               of the HTML conventions.  When this is false,
-//               ampersands are treated as ordinary characters.
-//
-//               This affects only future calls to set_text(); it does
-//               not change text that was set previously.
-////////////////////////////////////////////////////////////////////
-INLINE void TextNode::
-set_expand_amp(bool expand_amp) {
-  if (expand_amp) {
-    _flags |= F_expand_amp;
-  } else {
-    _flags &= ~F_expand_amp;
-  }
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_expand_amp
-//       Access: Published
-//  Description: Returns the state of the expand_amp flag.  See
-//               set_expand_amp().
-////////////////////////////////////////////////////////////////////
-INLINE bool TextNode::
-get_expand_amp() const {
-  return (_flags & F_expand_amp) != 0;
-}
-*/
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::get_line_height
 //       Access: Published
@@ -1124,11 +1034,8 @@ get_coordinate_system() const {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 set_text(const string &text) {
-  if (!has_text() || _text != text) {
-    _text = text;
-    _flags = (_flags | F_got_text) & ~F_got_wtext;
-    invalidate_with_measure();
-  }
+  TextEncoder::set_text(text);
+  invalidate_with_measure();
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1143,7 +1050,8 @@ set_text(const string &text) {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 set_text(const string &text, TextNode::Encoding encoding) {
-  set_wtext(decode_text(text, encoding));
+  TextEncoder::set_text(text, encoding);
+  invalidate_with_measure();
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1153,52 +1061,10 @@ set_text(const string &text, TextNode::Encoding encoding) {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 clear_text() {
-  _text = string();
-  _wtext = wstring();
-  _flags |= (F_got_text | F_got_wtext);
+  TextEncoder::clear_text();
   invalidate_with_measure();
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::has_text
-//       Access: Published
-//  Description:
-////////////////////////////////////////////////////////////////////
-INLINE bool TextNode::
-has_text() const {
-  if (_flags & F_got_wtext) {
-    return !_wtext.empty();
-  } else {
-    return !_text.empty();
-  }
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_text
-//       Access: Published
-//  Description: Returns the current text, as encoded via the current
-//               encoding system.
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-get_text() const {
-  if ((_flags & F_got_text) == 0) {
-    ((TextNode *)this)->_text = encode_wtext(_wtext);
-    ((TextNode *)this)->_flags |= F_got_text;
-  }
-  return _text;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_text
-//       Access: Published
-//  Description: Returns the current text, as encoded via the indicated
-//               encoding system.
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-get_text(TextNode::Encoding encoding) const {
-  return encode_wtext(get_wtext(), encoding);
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::append_text
 //       Access: Published
@@ -1207,8 +1073,7 @@ get_text(TextNode::Encoding encoding) const {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 append_text(const string &text) {
-  _text = get_text() + text;
-  _flags = (_flags | F_got_text) & ~F_got_wtext;
+  TextEncoder::append_text(text);
   invalidate_with_measure();
 }
 
@@ -1221,100 +1086,10 @@ append_text(const string &text) {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 append_unicode_char(int character) {
-  _wtext = get_wtext() + wstring(1, (wchar_t)character);
-  _flags = (_flags | F_got_wtext) & ~F_got_text;
+  TextEncoder::append_unicode_char(character);
   invalidate_with_measure();
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_num_chars
-//       Access: Published
-//  Description: Returns the number of characters in the stored text.
-//               This is a count of wide characters, after the string
-//               has been decoded according to set_encoding().
-////////////////////////////////////////////////////////////////////
-INLINE int TextNode::
-get_num_chars() const {
-  return get_wtext().length();
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_unicode_char
-//       Access: Published
-//  Description: Returns the Unicode value of the nth character in the
-//               stored text.  This may be a wide character (greater
-//               than 255), after the string has been decoded
-//               according to set_encoding().
-////////////////////////////////////////////////////////////////////
-INLINE int TextNode::
-get_unicode_char(int index) const {
-  get_wtext();
-  nassertr(index >= 0 && index < (int)_wtext.length(), 0);
-  return _wtext[index];
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_encoded_char
-//       Access: Published
-//  Description: Returns the nth char of the stored text, as a one-,
-//               two-, or three-byte encoded string.
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-get_encoded_char(int index) const {
-  return get_encoded_char(index, get_encoding());
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_encoded_char
-//       Access: Published
-//  Description: Returns the nth char of the stored text, as a one-,
-//               two-, or three-byte encoded string.
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-get_encoded_char(int index, TextNode::Encoding encoding) const {
-  wstring wch(1, (wchar_t)get_unicode_char(index));
-  return encode_wtext(wch, encoding);
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_text_as_ascii
-//       Access: Published
-//  Description: Returns the text associated with the node, converted
-//               as nearly as possible to a fully-ASCII
-//               representation.  This means replacing accented
-//               letters with their unaccented ASCII equivalents.
-//
-//               It is possible that some characters in the string
-//               cannot be converted to ASCII.  (The string may
-//               involve symbols like the copyright symbol, for
-//               instance, or it might involve letters in some other
-//               alphabet such as Greek or Cyrillic, or even Latin
-//               letters like thorn or eth that are not part of the
-//               ASCII character set.)  In this case, as much of the
-//               string as possible will be converted to ASCII, and
-//               the nonconvertible characters will remain encoded in
-//               the encoding specified by set_encoding().
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-get_text_as_ascii() const {
-  return encode_wtext(get_wtext_as_ascii());
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::reencode_text
-//       Access: Published, Static
-//  Description: Given the indicated text string, which is assumed to
-//               be encoded via the encoding "from", decodes it and
-//               then reencodes it into the encoding "to", and returns
-//               the newly encoded string.  This does not change or
-//               affect any properties on the TextNode itself.
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-reencode_text(const string &text, TextNode::Encoding from, 
-              TextNode::Encoding to) {
-  return encode_wtext(decode_text(text, from), to);
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::calc_width
 //       Access: Published
@@ -1494,26 +1269,8 @@ force_update() {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 set_wtext(const wstring &wtext) {
-  if (!has_text() || _wtext != wtext) {
-    _wtext = wtext;
-    _flags = (_flags | F_got_wtext) & ~F_got_text;
-    invalidate_with_measure();
-  }
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_wtext
-//       Access: Public
-//  Description: Returns the text associated with the TextNode, as a
-//               wide-character string.
-////////////////////////////////////////////////////////////////////
-INLINE const wstring &TextNode::
-get_wtext() const {
-  if ((_flags & F_got_wtext) == 0) {
-    ((TextNode *)this)->_wtext = decode_text(_text);
-    ((TextNode *)this)->_flags |= F_got_wtext;
-  }
-  return _wtext;
+  TextEncoder::set_wtext(wtext);
+  invalidate_with_measure();
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1524,8 +1281,7 @@ get_wtext() const {
 ////////////////////////////////////////////////////////////////////
 INLINE void TextNode::
 append_wtext(const wstring &wtext) {
-  _wtext = get_wtext() + wtext;
-  _flags = (_flags | F_got_wtext) & ~F_got_text;
+  TextEncoder::append_wtext(wtext);
   invalidate_with_measure();
 }
 
@@ -1558,28 +1314,6 @@ wordwrap_to(const wstring &wtext, float wordwrap_width,
   return _font->wordwrap_to(wtext, wordwrap_width, preserve_trailing_whitespace);
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::encode_wtext
-//       Access: Public
-//  Description: Encodes a wide-text string into a single-char string,
-//               according to the current encoding.
-////////////////////////////////////////////////////////////////////
-INLINE string TextNode::
-encode_wtext(const wstring &wtext) const {
-  return encode_wtext(wtext, _encoding);
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::decode_text
-//       Access: Public
-//  Description: Returns the given wstring decoded to a single-byte
-//               string, via the current encoding system.
-////////////////////////////////////////////////////////////////////
-INLINE wstring TextNode::
-decode_text(const string &text) const {
-  return decode_text(text, _encoding);
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::invalidate_no_measure
 //       Access: Private

+ 1 - 271
panda/src/text/textNode.cxx

@@ -55,7 +55,6 @@ TypeHandle TextNode::_type_handle;
 
 PT(TextFont) TextNode::_default_font;
 bool TextNode::_loaded_default_font = false;
-TextNode::Encoding TextNode::_default_encoding;
 
 // This is the factor by which CT_small scales the character down.
 static const float small_accent_scale = 0.6f;
@@ -82,13 +81,9 @@ static const float ligature_advance_scale = 0.6f;
 ////////////////////////////////////////////////////////////////////
 TextNode::
 TextNode(const string &name) : PandaNode(name) {
-  _encoding = _default_encoding;
   _slant = 0.0f;
   
-  // Initially, since the text string is empty, we know that both
-  // _text and _wtext accurately reflect the empty state; so we "got"
-  // both of them.
-  _flags = (F_got_text | F_got_wtext);
+  _flags = 0;
   _align = A_left;
   _wordwrap_width = 1.0f;
 
@@ -423,152 +418,6 @@ generate() {
   return root;
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::get_wtext_as_ascii
-//       Access: Published
-//  Description: Returns the text associated with the node, converted
-//               as nearly as possible to a fully-ASCII
-//               representation.  This means replacing accented
-//               letters with their unaccented ASCII equivalents.
-//
-//               It is possible that some characters in the string
-//               cannot be converted to ASCII.  (The string may
-//               involve symbols like the copyright symbol, for
-//               instance, or it might involve letters in some other
-//               alphabet such as Greek or Cyrillic, or even Latin
-//               letters like thorn or eth that are not part of the
-//               ASCII character set.)  In this case, as much of the
-//               string as possible will be converted to ASCII, and
-//               the nonconvertible characters will remain in their
-//               original form.
-////////////////////////////////////////////////////////////////////
-wstring TextNode::
-get_wtext_as_ascii() const {
-  get_wtext();
-  wstring result;
-  wstring::const_iterator si;
-  for (si = _wtext.begin(); si != _wtext.end(); ++si) {
-    wchar_t character = (*si);
-
-    const UnicodeLatinMap::Entry *map_entry = 
-      UnicodeLatinMap::look_up(character);
-    if (map_entry != NULL && map_entry->_ascii_equiv != 0) {
-      result += (wchar_t)map_entry->_ascii_equiv;
-      if (map_entry->_ascii_additional != 0) {
-        result += (wchar_t)map_entry->_ascii_additional;
-      }
-
-    } else {
-      result += character;
-    }
-  }
-
-  return result;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::encode_wchar
-//       Access: Public, Static
-//  Description: Encodes a single wide char into a one-, two-, or
-//               three-byte string, according to the given encoding
-//               system.
-////////////////////////////////////////////////////////////////////
-string TextNode::
-encode_wchar(wchar_t ch, TextNode::Encoding encoding) {
-  switch (encoding) {
-  case E_iso8859:
-    if (ch < 0x100) {
-      return string(1, (char)ch);
-    } else {
-      // The character won't fit in the 8-bit ISO 8859.  See if we can
-      // make it fit by reducing it to its ascii equivalent
-      // (essentially stripping off an unusual accent mark).
-      const UnicodeLatinMap::Entry *map_entry = 
-        UnicodeLatinMap::look_up(ch);
-      if (map_entry != NULL && map_entry->_ascii_equiv != 0) {
-        // Yes, it has an ascii equivalent.
-        if (map_entry->_ascii_additional != 0) {
-          // In fact, it has two of them.
-          return
-            string(1, map_entry->_ascii_equiv) +
-            string(1, map_entry->_ascii_additional);
-        }
-        return string(1, map_entry->_ascii_equiv);
-      }
-      // Nope; return "." for lack of anything better.
-      return ".";
-    }
-
-  case E_utf8:
-    if (ch < 0x80) {
-      return string(1, (char)ch);
-    } else if (ch < 0x800) {
-      return 
-        string(1, (char)((ch >> 6) | 0xc0)) +
-        string(1, (char)((ch & 0x3f) | 0x80));
-    } else {
-      return 
-        string(1, (char)((ch >> 12) | 0xe0)) +
-        string(1, (char)(((ch >> 6) & 0x3f) | 0x80)) +
-        string(1, (char)((ch & 0x3f) | 0x80));
-    }
-
-  case E_unicode:
-    return
-      string(1, (char)(ch >> 8)) + 
-      string(1, (char)(ch & 0xff));
-  }
-
-  return "";
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::encode_wtext
-//       Access: Public, Static
-//  Description: Encodes a wide-text string into a single-char string,
-//               according to the given encoding.
-////////////////////////////////////////////////////////////////////
-string TextNode::
-encode_wtext(const wstring &wtext, TextNode::Encoding encoding) {
-  string result;
-
-  for (wstring::const_iterator pi = wtext.begin(); pi != wtext.end(); ++pi) {
-    result += encode_wchar(*pi, encoding);
-  }
-
-  return result;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::decode_text
-//       Access: Public, Static
-//  Description: Returns the given wstring decoded to a single-byte
-//               string, via the given encoding system.
-////////////////////////////////////////////////////////////////////
-wstring TextNode::
-decode_text(const string &text, TextNode::Encoding encoding) {
-  switch (encoding) {
-  case E_utf8:
-    {
-      StringUtf8Decoder decoder(text);
-      return decode_text_impl(decoder);
-    }
-
-  case E_unicode:
-    {
-      StringUnicodeDecoder decoder(text);
-      return decode_text_impl(decoder);
-    }
-
-  case E_iso8859:
-  default:
-    {
-      StringDecoder decoder(text);
-      return decode_text_impl(decoder);
-    }
-  };
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::get_unsafe_to_apply_attribs
 //       Access: Public, Virtual
@@ -778,125 +627,6 @@ recompute_internal_bound() {
   return bound;
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::decode_text_impl
-//       Access: Private, Static
-//  Description: Decodes the eight-bit stream from the indicated
-//               decoder, returning the decoded wide-char string.
-////////////////////////////////////////////////////////////////////
-wstring TextNode::
-decode_text_impl(StringDecoder &decoder) {
-  wstring result;
-  //  bool expand_amp = get_expand_amp();
-
-  wchar_t character = decoder.get_next_character();
-  while (!decoder.is_eof()) {
-    /*
-    if (character == '&' && expand_amp) {
-      // An ampersand in expand_amp mode is treated as an escape
-      // character.
-      character = expand_amp_sequence(decoder);
-    }
-    */
-    result += character;
-    character = decoder.get_next_character();
-  }
-
-  return result;
-}
-
-/*
-////////////////////////////////////////////////////////////////////
-//     Function: TextNode::expand_amp_sequence
-//       Access: Private
-//  Description: Given that we have just read an ampersand from the
-//               StringDecoder, and that we have expand_amp in effect
-//               and are therefore expected to expand the sequence
-//               that this ampersand begins into a single unicode
-//               character, do the expansion and return the character.
-////////////////////////////////////////////////////////////////////
-int TextNode::
-expand_amp_sequence(StringDecoder &decoder) const {
-  int result = 0;
-
-  int character = decoder.get_next_character();
-  if (!decoder.is_eof() && character == '#') {
-    // An explicit numeric sequence: &#nnn;
-    result = 0;
-    character = decoder.get_next_character();
-    while (!decoder.is_eof() && character < 128 && isdigit((unsigned int)character)) {
-      result = (result * 10) + (character - '0');
-      character = decoder.get_next_character();
-    }
-    if (character != ';') {
-      // Invalid sequence.
-      return 0;
-    }
-
-    return result;
-  }
-
-  string sequence;
-  
-  // Some non-numeric sequence.
-  while (!decoder.is_eof() && character < 128 && isalpha((unsigned int)character)) {
-    sequence += character;
-    character = decoder.get_next_character();
-  }
-  if (character != ';') {
-    // Invalid sequence.
-    return 0;
-  }
-
-  static const struct {
-    const char *name;
-    int code;
-  } tokens[] = {
-    { "amp", '&' }, { "lt", '<' }, { "gt", '>' }, { "quot", '"' },
-    { "nbsp", ' ' },
-
-    { "iexcl", 161 }, { "cent", 162 }, { "pound", 163 }, { "curren", 164 },
-    { "yen", 165 }, { "brvbar", 166 }, { "brkbar", 166 }, { "sect", 167 },
-    { "uml", 168 }, { "die", 168 }, { "copy", 169 }, { "ordf", 170 },
-    { "laquo", 171 }, { "not", 172 }, { "shy", 173 }, { "reg", 174 },
-    { "macr", 175 }, { "hibar", 175 }, { "deg", 176 }, { "plusmn", 177 },
-    { "sup2", 178 }, { "sup3", 179 }, { "acute", 180 }, { "micro", 181 },
-    { "para", 182 }, { "middot", 183 }, { "cedil", 184 }, { "sup1", 185 },
-    { "ordm", 186 }, { "raquo", 187 }, { "frac14", 188 }, { "frac12", 189 },
-    { "frac34", 190 }, { "iquest", 191 }, { "Agrave", 192 }, { "Aacute", 193 },
-    { "Acirc", 194 }, { "Atilde", 195 }, { "Auml", 196 }, { "Aring", 197 },
-    { "AElig", 198 }, { "Ccedil", 199 }, { "Egrave", 200 }, { "Eacute", 201 },
-    { "Ecirc", 202 }, { "Euml", 203 }, { "Igrave", 204 }, { "Iacute", 205 },
-    { "Icirc", 206 }, { "Iuml", 207 }, { "ETH", 208 }, { "Dstrok", 208 },
-    { "Ntilde", 209 }, { "Ograve", 210 }, { "Oacute", 211 }, { "Ocirc", 212 },
-    { "Otilde", 213 }, { "Ouml", 214 }, { "times", 215 }, { "Oslash", 216 },
-    { "Ugrave", 217 }, { "Uacute", 218 }, { "Ucirc", 219 }, { "Uuml", 220 },
-    { "Yacute", 221 }, { "THORN", 222 }, { "szlig", 223 }, { "agrave", 224 },
-    { "aacute", 225 }, { "acirc", 226 }, { "atilde", 227 }, { "auml", 228 },
-    { "aring", 229 }, { "aelig", 230 }, { "ccedil", 231 }, { "egrave", 232 },
-    { "eacute", 233 }, { "ecirc", 234 }, { "euml", 235 }, { "igrave", 236 },
-    { "iacute", 237 }, { "icirc", 238 }, { "iuml", 239 }, { "eth", 240 },
-    { "ntilde", 241 }, { "ograve", 242 }, { "oacute", 243 }, { "ocirc", 244 },
-    { "otilde", 245 }, { "ouml", 246 }, { "divide", 247 }, { "oslash", 248 },
-    { "ugrave", 249 }, { "uacute", 250 }, { "ucirc", 251 }, { "uuml", 252 },
-    { "yacute", 253 }, { "thorn", 254 }, { "yuml", 255 },
-
-    { NULL, 0 },
-  };
-
-  for (int i = 0; tokens[i].name != NULL; i++) {
-    if (sequence == tokens[i].name) {
-      // Here's a match.
-      return tokens[i].code;
-    }
-  }
-
-  // Some unrecognized sequence.
-  return 0;
-}
-*/
-
-
 ////////////////////////////////////////////////////////////////////
 //     Function: TextNode::do_rebuild
 //       Access: Private

+ 7 - 48
panda/src/text/textNode.h

@@ -22,6 +22,7 @@
 #include "pandabase.h"
 
 #include "config_text.h"
+#include "textEncoder.h"
 #include "textFont.h"
 #include "unicodeLatinMap.h"
 #include "pandaNode.h"
@@ -52,7 +53,7 @@ class StringDecoder;
 //               you may use however you like.  Each time you call
 //               generate() a new node is returned.
 ////////////////////////////////////////////////////////////////////
-class EXPCL_PANDA TextNode : public PandaNode {
+class EXPCL_PANDA TextNode : public PandaNode, public TextEncoder {
 PUBLISHED:
   TextNode(const string &name);
   ~TextNode();
@@ -62,12 +63,6 @@ PUBLISHED:
     A_right,
     A_center,
   };
-
-  enum Encoding {
-    E_iso8859,
-    E_utf8,
-    E_unicode
-  };
  
   INLINE int freeze();
   INLINE int thaw();
@@ -78,17 +73,6 @@ PUBLISHED:
   INLINE static void set_default_font(TextFont *);
   INLINE static TextFont *get_default_font();
 
-  INLINE void set_encoding(Encoding encoding);
-  INLINE Encoding get_encoding() const;
-
-  INLINE static void set_default_encoding(Encoding encoding);
-  INLINE static Encoding get_default_encoding();
-
-  /*
-  INLINE void set_expand_amp(bool expand_amp);
-  INLINE bool get_expand_amp() const;
-  */
-
   INLINE float get_line_height() const;
 
   INLINE void set_small_caps(bool small_caps);
@@ -181,21 +165,14 @@ PUBLISHED:
   INLINE void set_coordinate_system(CoordinateSystem cs);
   INLINE CoordinateSystem get_coordinate_system() const;
 
+  // These methods are inherited from TextEncoder, but we override
+  // here so we can flag the TextNode as dirty when they have been
+  // change.
   INLINE void set_text(const string &text);
   INLINE void set_text(const string &text, Encoding encoding);
   INLINE void clear_text();
-  INLINE bool has_text() const;
-  INLINE string get_text() const;
-  INLINE string get_text(Encoding encoding) const;
   INLINE void append_text(const string &text);
   INLINE void append_unicode_char(int character);
-  INLINE int get_num_chars() const;
-  INLINE int get_unicode_char(int index) const;
-  INLINE string get_encoded_char(int index) const;
-  INLINE string get_encoded_char(int index, Encoding encoding) const;
-  INLINE string get_text_as_ascii() const;
-
-  INLINE static string reencode_text(const string &text, Encoding from, Encoding to);
 
   INLINE float calc_width(int character) const;
   INLINE float calc_width(const string &line) const;
@@ -225,20 +202,12 @@ PUBLISHED:
 public:
   // Direct support for wide-character strings.
   INLINE void set_wtext(const wstring &wtext);
-  INLINE const wstring &get_wtext() const;
   INLINE void append_wtext(const wstring &text);
-  wstring get_wtext_as_ascii() const;
 
   INLINE float calc_width(const wstring &line) const;
   INLINE wstring wordwrap_to(const wstring &wtext, float wordwrap_width,
                              bool preserve_trailing_whitespace) const;
 
-  static string encode_wchar(wchar_t ch, Encoding encoding);
-  INLINE string encode_wtext(const wstring &wtext) const;
-  static string encode_wtext(const wstring &wtext, Encoding encoding);
-  INLINE wstring decode_text(const string &text) const;
-  static wstring decode_text(const string &text, Encoding encoding);
-
   // From parent class PandaNode
   virtual int get_unsafe_to_apply_attribs() const;
   virtual void apply_attribs_to_vertices(const AccumulatedAttribs &attribs,
@@ -255,9 +224,6 @@ public:
   virtual BoundingVolume *recompute_internal_bound();
 
 private:
-  static wstring decode_text_impl(StringDecoder &decoder);
-  //  int expand_amp_sequence(StringDecoder &decoder) const;
-
   INLINE void invalidate_no_measure();
   INLINE void invalidate_with_measure();
   INLINE void check_rebuild() const;
@@ -333,7 +299,6 @@ private:
   PT(TextFont) _font;
   PT(PandaNode) _internal_geom;
 
-  Encoding _encoding;
   float _slant;
 
   PT(Texture) _card_texture;
@@ -354,9 +319,6 @@ private:
     F_frame_corners    =  0x00000100,
     F_card_transp      =  0x00000200,
     F_has_card_border  =  0x00000400,
-    //    F_expand_amp       =  0x00000800,
-    F_got_text         =  0x00001000,
-    F_got_wtext        =  0x00002000,
     F_needs_rebuild    =  0x00004000,
     F_needs_measure    =  0x00008000,
     F_small_caps       =  0x00010000,
@@ -380,16 +342,12 @@ private:
   LMatrix4f _transform;
   CoordinateSystem _coordinate_system;
 
-  string _text;
-  wstring _wtext;
-
   LPoint2f _ul2d, _lr2d;
   LPoint3f _ul3d, _lr3d;
   int _num_rows;
 
   static PT(TextFont) _default_font;
   static bool _loaded_default_font;
-  static Encoding _default_encoding;
 
 public:
   static TypeHandle get_class_type() {
@@ -398,7 +356,8 @@ public:
   static void init_type() {
     PandaNode::init_type();
     register_type(_type_handle, "TextNode",
-                  PandaNode::get_class_type());
+                  PandaNode::get_class_type(),
+                  TextEncoder::get_class_type());
   }
   virtual TypeHandle get_type() const {
     return get_class_type();

+ 0 - 2
panda/src/text/text_composite1.cxx

@@ -6,7 +6,5 @@
 #include "fontPool.cxx"
 #include "geomTextGlyph.cxx"
 #include "staticTextFont.cxx"
-#include "stringDecoder.cxx"
 #include "textFont.cxx"
 #include "textGlyph.cxx"
-#include "unicodeLatinMap.cxx"