tokenize.cxx 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. // Filename: tokenize.cxx
  2. // Created by: drose (25Sep00)
  3. //
  4. ////////////////////////////////////////////////////////////////////
  5. #include "tokenize.h"
  6. #include <ctype.h>
  7. ////////////////////////////////////////////////////////////////////
  8. // Function: tokenize
  9. // Description: Chops the source string up into pieces delimited by
  10. // any of the characters specified in delimiters.
  11. // Repeated delimiter characters represent zero-length
  12. // tokens.
  13. //
  14. // It is the user's responsibility to ensure the output
  15. // vector is cleared before calling this function; the
  16. // results will simply be appended to the end of the
  17. // vector.
  18. ////////////////////////////////////////////////////////////////////
  19. void
  20. tokenize(const string &source, vector<string> &tokens,
  21. const string &delimiters) {
  22. size_t p = 0;
  23. while (p < source.length()) {
  24. size_t q = source.find_first_of(delimiters, p);
  25. if (q == string::npos) {
  26. tokens.push_back(source.substr(p));
  27. return;
  28. }
  29. tokens.push_back(source.substr(p, q - p));
  30. p = q + 1;
  31. }
  32. tokens.push_back(string());
  33. }
  34. ////////////////////////////////////////////////////////////////////
  35. // Function: tokenize_whitespace
  36. // Description: Chops the source string up into pieces delimited by
  37. // whitespace characters. It is different from
  38. // tokenize() in that repeated whitespace characters are
  39. // not significant.
  40. //
  41. // It is the user's responsibility to ensure the output
  42. // vector is cleared before calling this function; the
  43. // results will simply be appended to the end of the
  44. // vector.
  45. ////////////////////////////////////////////////////////////////////
  46. void
  47. tokenize_whitespace(const string &source, vector<string> &tokens) {
  48. // First, start at the first non-whitespace character.
  49. size_t p = 0;
  50. while (p < source.length() && isspace(source[p])) {
  51. p++;
  52. }
  53. while (p < source.length()) {
  54. // Now scan to the end of the word.
  55. size_t q = p;
  56. while (q < source.length() && !isspace(source[q])) {
  57. q++;
  58. }
  59. tokens.push_back(source.substr(p, q - p));
  60. p = q;
  61. while (p < source.length() && isspace(source[p])) {
  62. p++;
  63. }
  64. }
  65. }
  66. ////////////////////////////////////////////////////////////////////
  67. // Function: repaste
  68. // Description: Returns a string representing the given sequence of
  69. // tokens concatenated together with the separator
  70. // string between them.
  71. ////////////////////////////////////////////////////////////////////
  72. string
  73. repaste(const vector<string> &tokens, const string &separator) {
  74. string result;
  75. if (!tokens.empty()) {
  76. vector<string>::const_iterator ti;
  77. ti = tokens.begin();
  78. result += (*ti);
  79. ++ti;
  80. while (ti != tokens.end()) {
  81. result += separator;
  82. result += (*ti);
  83. ++ti;
  84. }
  85. }
  86. return result;
  87. }
  88. ////////////////////////////////////////////////////////////////////
  89. // Function: trim_blanks
  90. // Description: Returns a new string, equivalent to the source
  91. // string, but with the leading and trailing whitespace
  92. // removed.
  93. ////////////////////////////////////////////////////////////////////
  94. string
  95. trim_blanks(const string &str) {
  96. size_t p = 0;
  97. while (p < str.length() && isspace(str[p])) {
  98. p++;
  99. }
  100. size_t q = str.length();
  101. while (q > p && isspace(str[q - 1])) {
  102. q--;
  103. }
  104. return str.substr(p, q - p);
  105. }
  106. ////////////////////////////////////////////////////////////////////
  107. // Function: contains_whitespace
  108. // Description: Returns true if the string contains any whitespace
  109. // characters, false if it does not.
  110. ////////////////////////////////////////////////////////////////////
  111. bool
  112. contains_whitespace(const string &str) {
  113. string::const_iterator si;
  114. for (si = str.begin(); si != str.end(); ++si) {
  115. if (isspace(*si)) {
  116. return true;
  117. }
  118. }
  119. return false;
  120. }