Browse Source

dtoolutil: consistently use char32_t for Unicode code points

Unlike wchar_t, char32_t is guaranteed to be able to hold a UTF-32 character.
rdb 7 years ago
parent
commit
51f5124048

+ 9 - 9
dtool/src/dtoolutil/textEncoder.I

@@ -220,7 +220,7 @@ get_unicode_char(size_t index) const {
  * according to set_encoding().
  * according to set_encoding().
  */
  */
 INLINE void TextEncoder::
 INLINE void TextEncoder::
-set_unicode_char(size_t index, int character) {
+set_unicode_char(size_t index, char32_t character) {
   get_wtext();
   get_wtext();
   if (index < _wtext.length()) {
   if (index < _wtext.length()) {
     _wtext[index] = character;
     _wtext[index] = character;
@@ -283,7 +283,7 @@ reencode_text(const std::string &text, TextEncoder::Encoding from,
  * otherwise.  This is akin to ctype's isalpha(), extended to Unicode.
  * otherwise.  This is akin to ctype's isalpha(), extended to Unicode.
  */
  */
 INLINE bool TextEncoder::
 INLINE bool TextEncoder::
-unicode_isalpha(int character) {
+unicode_isalpha(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     return false;
     return false;
@@ -297,7 +297,7 @@ unicode_isalpha(int character) {
  * otherwise.  This is akin to ctype's isdigit(), extended to Unicode.
  * otherwise.  This is akin to ctype's isdigit(), extended to Unicode.
  */
  */
 INLINE bool TextEncoder::
 INLINE bool TextEncoder::
-unicode_isdigit(int character) {
+unicode_isdigit(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     // The digits aren't actually listed in the map.
     // The digits aren't actually listed in the map.
@@ -312,7 +312,7 @@ unicode_isdigit(int character) {
  * otherwise.  This is akin to ctype's ispunct(), extended to Unicode.
  * otherwise.  This is akin to ctype's ispunct(), extended to Unicode.
  */
  */
 INLINE bool TextEncoder::
 INLINE bool TextEncoder::
-unicode_ispunct(int character) {
+unicode_ispunct(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     // Some punctuation marks aren't listed in the map.
     // Some punctuation marks aren't listed in the map.
@@ -326,7 +326,7 @@ unicode_ispunct(int character) {
  * otherwise.  This is akin to ctype's isupper(), extended to Unicode.
  * otherwise.  This is akin to ctype's isupper(), extended to Unicode.
  */
  */
 INLINE bool TextEncoder::
 INLINE bool TextEncoder::
-unicode_isupper(int character) {
+unicode_isupper(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     return false;
     return false;
@@ -339,7 +339,7 @@ unicode_isupper(int character) {
  * otherwise.  This is akin to ctype's isspace(), extended to Unicode.
  * otherwise.  This is akin to ctype's isspace(), extended to Unicode.
  */
  */
 INLINE bool TextEncoder::
 INLINE bool TextEncoder::
-unicode_isspace(int character) {
+unicode_isspace(char32_t character) {
   switch (character) {
   switch (character) {
   case ' ':
   case ' ':
   case '\t':
   case '\t':
@@ -356,7 +356,7 @@ unicode_isspace(int character) {
  * otherwise.  This is akin to ctype's islower(), extended to Unicode.
  * otherwise.  This is akin to ctype's islower(), extended to Unicode.
  */
  */
 INLINE bool TextEncoder::
 INLINE bool TextEncoder::
-unicode_islower(int character) {
+unicode_islower(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     return false;
     return false;
@@ -369,7 +369,7 @@ unicode_islower(int character) {
  * akin to ctype's toupper(), extended to Unicode.
  * akin to ctype's toupper(), extended to Unicode.
  */
  */
 INLINE int TextEncoder::
 INLINE int TextEncoder::
-unicode_toupper(int character) {
+unicode_toupper(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     return character;
     return character;
@@ -382,7 +382,7 @@ unicode_toupper(int character) {
  * akin to ctype's tolower(), extended to Unicode.
  * akin to ctype's tolower(), extended to Unicode.
  */
  */
 INLINE int TextEncoder::
 INLINE int TextEncoder::
-unicode_tolower(int character) {
+unicode_tolower(char32_t character) {
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   const UnicodeLatinMap::Entry *entry = UnicodeLatinMap::look_up(character);
   if (entry == nullptr) {
   if (entry == nullptr) {
     return character;
     return character;

+ 10 - 10
dtool/src/dtoolutil/textEncoder.h

@@ -23,7 +23,7 @@ class StringDecoder;
 
 
 /**
 /**
  * This class can be used to convert text between multiple representations,
  * This class can be used to convert text between multiple representations,
- * e.g.  utf-8 to Unicode.  You may use it as a static class object, passing
+ * e.g.  UTF-8 to UTF-16.  You may use it as a static class object, passing
  * the encoding each time, or you may create an instance and use that object,
  * the encoding each time, or you may create an instance and use that object,
  * which will record the current encoding and retain the current string.
  * which will record the current encoding and retain the current string.
  *
  *
@@ -78,21 +78,21 @@ PUBLISHED:
   INLINE void append_unicode_char(char32_t character);
   INLINE void append_unicode_char(char32_t character);
   INLINE size_t get_num_chars() const;
   INLINE size_t get_num_chars() const;
   INLINE int get_unicode_char(size_t index) const;
   INLINE int get_unicode_char(size_t index) const;
-  INLINE void set_unicode_char(size_t index, int character);
+  INLINE void set_unicode_char(size_t index, char32_t character);
   INLINE std::string get_encoded_char(size_t index) const;
   INLINE std::string get_encoded_char(size_t index) const;
   INLINE std::string get_encoded_char(size_t index, Encoding encoding) const;
   INLINE std::string get_encoded_char(size_t index, Encoding encoding) const;
   INLINE std::string get_text_as_ascii() const;
   INLINE std::string get_text_as_ascii() const;
 
 
   INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to);
   INLINE static std::string reencode_text(const std::string &text, Encoding from, Encoding to);
 
 
-  INLINE static bool unicode_isalpha(int character);
-  INLINE static bool unicode_isdigit(int character);
-  INLINE static bool unicode_ispunct(int character);
-  INLINE static bool unicode_islower(int character);
-  INLINE static bool unicode_isupper(int character);
-  INLINE static bool unicode_isspace(int character);
-  INLINE static int unicode_toupper(int character);
-  INLINE static int unicode_tolower(int character);
+  INLINE static bool unicode_isalpha(char32_t character);
+  INLINE static bool unicode_isdigit(char32_t character);
+  INLINE static bool unicode_ispunct(char32_t character);
+  INLINE static bool unicode_islower(char32_t character);
+  INLINE static bool unicode_isupper(char32_t character);
+  INLINE static bool unicode_isspace(char32_t character);
+  INLINE static int unicode_toupper(char32_t character);
+  INLINE static int unicode_tolower(char32_t character);
 
 
   INLINE static std::string upper(const std::string &source);
   INLINE static std::string upper(const std::string &source);
   INLINE static std::string upper(const std::string &source, Encoding encoding);
   INLINE static std::string upper(const std::string &source, Encoding encoding);

+ 1 - 1
dtool/src/dtoolutil/unicodeLatinMap.cxx

@@ -1378,7 +1378,7 @@ static const wchar_t combining_accent_map[] = {
  * Returns the Entry associated with the indicated character, if there is one.
  * Returns the Entry associated with the indicated character, if there is one.
  */
  */
 const UnicodeLatinMap::Entry *UnicodeLatinMap::
 const UnicodeLatinMap::Entry *UnicodeLatinMap::
-look_up(wchar_t character) {
+look_up(char32_t character) {
   if (!_initialized) {
   if (!_initialized) {
     init();
     init();
   }
   }

+ 5 - 5
dtool/src/dtoolutil/unicodeLatinMap.h

@@ -112,17 +112,17 @@ public:
 
 
   class Entry {
   class Entry {
   public:
   public:
-    wchar_t _character;
+    char32_t _character;
     CharType _char_type;
     CharType _char_type;
     char _ascii_equiv;
     char _ascii_equiv;
     char _ascii_additional;
     char _ascii_additional;
-    wchar_t _tolower_character;
-    wchar_t _toupper_character;
+    char32_t _tolower_character;
+    char32_t _toupper_character;
     AccentType _accent_type;
     AccentType _accent_type;
     int _additional_flags;
     int _additional_flags;
   };
   };
 
 
-  static const Entry *look_up(wchar_t character);
+  static const Entry *look_up(char32_t character);
 
 
   static wchar_t get_combining_accent(AccentType accent);
   static wchar_t get_combining_accent(AccentType accent);
 
 
@@ -130,7 +130,7 @@ private:
   static void init();
   static void init();
   static bool _initialized;
   static bool _initialized;
 
 
-  typedef phash_map<wchar_t, const Entry *, integer_hash<wchar_t> > ByCharacter;
+  typedef phash_map<char32_t, const Entry *, integer_hash<char32_t> > ByCharacter;
   static ByCharacter *_by_character;
   static ByCharacter *_by_character;
   enum { max_direct_chars = 256 };
   enum { max_direct_chars = 256 };
   static const Entry *_direct_chars[max_direct_chars];
   static const Entry *_direct_chars[max_direct_chars];