| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- // Filename: tokenize.cxx
- // Created by: drose (25Sep00)
- //
- ////////////////////////////////////////////////////////////////////
- #include "tokenize.h"
- #include <ctype.h>
- ////////////////////////////////////////////////////////////////////
- // Function: tokenize
- // Description: Chops the source string up into pieces delimited by
- // any of the characters specified in delimiters.
- // Repeated delimiter characters represent zero-length
- // tokens.
- //
- // It is the user's responsibility to ensure the output
- // vector is cleared before calling this function; the
- // results will simply be appended to the end of the
- // vector.
- ////////////////////////////////////////////////////////////////////
- void
- tokenize(const string &source, vector<string> &tokens,
- const string &delimiters) {
- size_t p = 0;
- while (p < source.length()) {
- size_t q = source.find_first_of(delimiters, p);
- if (q == string::npos) {
- tokens.push_back(source.substr(p));
- return;
- }
- tokens.push_back(source.substr(p, q - p));
- p = q + 1;
- }
- tokens.push_back(string());
- }
- ////////////////////////////////////////////////////////////////////
- // Function: tokenize_whitespace
- // Description: Chops the source string up into pieces delimited by
- // whitespace characters. It is different from
- // tokenize() in that repeated whitespace characters are
- // not significant.
- //
- // It is the user's responsibility to ensure the output
- // vector is cleared before calling this function; the
- // results will simply be appended to the end of the
- // vector.
- ////////////////////////////////////////////////////////////////////
- void
- tokenize_whitespace(const string &source, vector<string> &tokens) {
- // First, start at the first non-whitespace character.
- size_t p = 0;
- while (p < source.length() && isspace(source[p])) {
- p++;
- }
- while (p < source.length()) {
- // Now scan to the end of the word.
- size_t q = p;
- while (q < source.length() && !isspace(source[q])) {
- q++;
- }
- tokens.push_back(source.substr(p, q - p));
- p = q;
- while (p < source.length() && isspace(source[p])) {
- p++;
- }
- }
- }
- ////////////////////////////////////////////////////////////////////
- // Function: repaste
- // Description: Returns a string representing the given sequence of
- // tokens concatenated together with the separator
- // string between them.
- ////////////////////////////////////////////////////////////////////
- string
- repaste(const vector<string> &tokens, const string &separator) {
- string result;
- if (!tokens.empty()) {
- vector<string>::const_iterator ti;
- ti = tokens.begin();
- result += (*ti);
- ++ti;
- while (ti != tokens.end()) {
- result += separator;
- result += (*ti);
- ++ti;
- }
- }
- return result;
- }
- ////////////////////////////////////////////////////////////////////
- // Function: trim_blanks
- // Description: Returns a new string, equivalent to the source
- // string, but with the leading and trailing whitespace
- // removed.
- ////////////////////////////////////////////////////////////////////
- string
- trim_blanks(const string &str) {
- size_t p = 0;
- while (p < str.length() && isspace(str[p])) {
- p++;
- }
-
- size_t q = str.length();
- while (q > p && isspace(str[q - 1])) {
- q--;
- }
- return str.substr(p, q - p);
- }
- ////////////////////////////////////////////////////////////////////
- // Function: contains_whitespace
- // Description: Returns true if the string contains any whitespace
- // characters, false if it does not.
- ////////////////////////////////////////////////////////////////////
- bool
- contains_whitespace(const string &str) {
- string::const_iterator si;
- for (si = str.begin(); si != str.end(); ++si) {
- if (isspace(*si)) {
- return true;
- }
- }
- return false;
- }
|