20 years ago · 56b25dafe6
--- a/panda/src/express/textEncoder.I
+++ b/panda/src/express/textEncoder.I
@@ -371,6 +371,26 @@ unicode_isupper(int character) {
 
				   return entry->_char_type == UnicodeLatinMap::CT_upper;
			
 
				 }
			
 
				 
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+//     Function: TextEncoder::unicode_isspace
			
 
				+//       Access: Published, Static
			
 
				+//  Description: Returns true if the indicated character is a
			
 
				+//               whitespace letter, false otherwise.  This is akin to
			
 
				+//               ctype's isspace(), extended to Unicode.
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+INLINE bool TextEncoder::
			
 
				+unicode_isspace(int character) {
			
 
				+  switch (character) {
			
 
				+  case ' ':
			
 
				+  case '\t':
			
 
				+  case '\n':
			
 
				+    return true;
			
 
				+
			
 
				+  default:
			
 
				+    return false;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 //     Function: TextEncoder::unicode_islower
			
 
				 //       Access: Published, Static
			
--- a/panda/src/express/textEncoder.h
+++ b/panda/src/express/textEncoder.h
@@ -79,6 +79,7 @@ PUBLISHED:
 
				   INLINE static bool unicode_ispunct(int character);
			
 
				   INLINE static bool unicode_islower(int character);
			
 
				   INLINE static bool unicode_isupper(int character);
			
 
				+  INLINE static bool unicode_isspace(int character);
			
 
				   INLINE static int unicode_toupper(int character);
			
 
				   INLINE static int unicode_tolower(int character);
			
 
				 
			
--- a/panda/src/putil/string_utils.cxx
+++ b/panda/src/putil/string_utils.cxx
@@ -17,6 +17,7 @@
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				 #include "string_utils.h"
			
 
				+#include "textEncoder.h"
			
 
				 
			
 
				 #include <ctype.h>
			
 
				 
			
@@ -131,6 +132,40 @@ extract_words(const string &str, vector_string &words) {
 
				   return num_words;
			
 
				 }
			
 
				 
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+//     Function: extract_words
			
 
				+//  Description: Divides the string into a number of words according
			
 
				+//               to whitespace.  The words vector should be cleared by
			
 
				+//               the user before calling; otherwise, the list of words
			
 
				+//               in the string will be appended to the end of whatever
			
 
				+//               was there before.
			
 
				+//
			
 
				+//               The return value is the number of words extracted.
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+int
			
 
				+extract_words(const wstring &str, pvector<wstring> &words) {
			
 
				+  int num_words = 0;
			
 
				+
			
 
				+  size_t pos = 0;
			
 
				+  while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
			
 
				+    pos++;
			
 
				+  }
			
 
				+  while (pos < str.length()) {
			
 
				+    size_t word_start = pos;
			
 
				+    while (pos < str.length() && !TextEncoder::unicode_isspace(str[pos])) {
			
 
				+      pos++;
			
 
				+    }
			
 
				+    words.push_back(str.substr(word_start, pos - word_start));
			
 
				+    num_words++;
			
 
				+
			
 
				+    while (pos < str.length() && TextEncoder::unicode_isspace(str[pos])) {
			
 
				+      pos++;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return num_words;
			
 
				+}
			
 
				+
			
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 //     Function: tokenize
			
 
				 //  Description: Chops the source string up into pieces delimited by
			
@@ -158,6 +193,33 @@ tokenize(const string &str, vector_string &words, const string &delimiters) {
 
				   words.push_back(string());
			
 
				 }
			
 
				 
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+//     Function: tokenize
			
 
				+//  Description: Chops the source string up into pieces delimited by
			
 
				+//               any of the characters specified in delimiters.
			
 
				+//               Repeated delimiter characters represent zero-length
			
 
				+//               tokens.
			
 
				+//
			
 
				+//               It is the user's responsibility to ensure the output
			
 
				+//               vector is cleared before calling this function; the
			
 
				+//               results will simply be appended to the end of the
			
 
				+//               vector.
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+void
			
 
				+tokenize(const wstring &str, pvector<wstring> &words, const wstring &delimiters) {
			
 
				+  size_t p = 0;
			
 
				+  while (p < str.length()) {
			
 
				+    size_t q = str.find_first_of(delimiters, p);
			
 
				+    if (q == string::npos) {
			
 
				+      words.push_back(str.substr(p));
			
 
				+      return;
			
 
				+    }
			
 
				+    words.push_back(str.substr(p, q - p));
			
 
				+    p = q + 1;
			
 
				+  }
			
 
				+  words.push_back(wstring());
			
 
				+}
			
 
				+
			
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 //     Function: trim_left
			
 
				 //  Description: Returns a new string representing the contents of the
			
@@ -173,6 +235,21 @@ trim_left(const string &str) {
 
				   return str.substr(begin);
			
 
				 }
			
 
				 
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+//     Function: trim_left
			
 
				+//  Description: Returns a new string representing the contents of the
			
 
				+//               given string with the leading whitespace removed.
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+wstring
			
 
				+trim_left(const wstring &str) {
			
 
				+  size_t begin = 0;
			
 
				+  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
			
 
				+    begin++;
			
 
				+  }
			
 
				+
			
 
				+  return str.substr(begin);
			
 
				+}
			
 
				+
			
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 //     Function: trim_right
			
 
				 //  Description: Returns a new string representing the contents of the
			
@@ -189,6 +266,22 @@ trim_right(const string &str) {
 
				   return str.substr(begin, end - begin);
			
 
				 }
			
 
				 
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+//     Function: trim_right
			
 
				+//  Description: Returns a new string representing the contents of the
			
 
				+//               given string with the trailing whitespace removed.
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+wstring
			
 
				+trim_right(const wstring &str) {
			
 
				+  size_t begin = 0;
			
 
				+  size_t end = str.size();
			
 
				+  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
			
 
				+    end--;
			
 
				+  }
			
 
				+
			
 
				+  return str.substr(begin, end - begin);
			
 
				+}
			
 
				+
			
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 //     Function: trim
			
 
				 //  Description: Returns a new string representing the contents of the
			
@@ -210,6 +303,27 @@ trim(const string &str) {
 
				   return str.substr(begin, end - begin);
			
 
				 }
			
 
				 
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+//     Function: trim
			
 
				+//  Description: Returns a new string representing the contents of the
			
 
				+//               given string with both leading and trailing
			
 
				+//               whitespace removed.
			
 
				+////////////////////////////////////////////////////////////////////
			
 
				+wstring
			
 
				+trim(const wstring &str) {
			
 
				+  size_t begin = 0;
			
 
				+  while (begin < str.size() && TextEncoder::unicode_isspace(str[begin])) {
			
 
				+    begin++;
			
 
				+  }
			
 
				+
			
 
				+  size_t end = str.size();
			
 
				+  while (end > begin && TextEncoder::unicode_isspace(str[end - 1])) {
			
 
				+    end--;
			
 
				+  }
			
 
				+
			
 
				+  return str.substr(begin, end - begin);
			
 
				+}
			
 
				+
			
 
				 ////////////////////////////////////////////////////////////////////
			
 
				 //     Function: string_to_int
			
 
				 //  Description: A string-interface wrapper around the C library
			
--- a/panda/src/putil/string_utils.h
+++ b/panda/src/putil/string_utils.h
@@ -39,15 +39,21 @@ EXPCL_PANDA string upcase(const string &s);
 
				 
			
 
				 // Separates the string into words according to whitespace.
			
 
				 EXPCL_PANDA int extract_words(const string &str, vector_string &words);
			
 
				+EXPCL_PANDA int extract_words(const wstring &str, pvector<wstring> &words);
			
 
				 
			
 
				 // Separates the string into words according to the indicated delimiters.
			
 
				 EXPCL_PANDA void tokenize(const string &str, vector_string &words,
			
 
				                           const string &delimiters);
			
 
				+EXPCL_PANDA void tokenize(const wstring &str, pvector<wstring> &words,
			
 
				+                          const wstring &delimiters);
			
 
				 
			
 
				 // Trims leading and/or trailing whitespace from the string.
			
 
				 EXPCL_PANDA string trim_left(const string &str);
			
 
				+EXPCL_PANDA wstring trim_left(const wstring &str);
			
 
				 EXPCL_PANDA string trim_right(const string &str);
			
 
				+EXPCL_PANDA wstring trim_right(const wstring &str);
			
 
				 EXPCL_PANDA string trim(const string &str);
			
 
				+EXPCL_PANDA wstring trim(const wstring &str);
			
 
				 
			
 
				 // Functions to parse numeric values out of a string.
			
 
				 EXPCL_PANDA int string_to_int(const string &str, string &tail);