globPattern.cxx 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. // Filename: globPattern.cxx
  2. // Created by: drose (30May00)
  3. //
  4. ////////////////////////////////////////////////////////////////////
  5. //
  6. // PANDA 3D SOFTWARE
  7. // Copyright (c) Carnegie Mellon University. All rights reserved.
  8. //
  9. // All use of this software is subject to the terms of the revised BSD
  10. // license. You should have received a copy of this license along
  11. // with this source code in a file named "LICENSE."
  12. //
  13. ////////////////////////////////////////////////////////////////////
  14. #include "globPattern.h"
  15. #include <ctype.h>
  16. ////////////////////////////////////////////////////////////////////
  17. // Function: GlobPattern::has_glob_characters
  18. // Access: Public
  19. // Description: Returns true if the pattern includes any special
  20. // globbing characters, or false if it is just a literal
  21. // string.
  22. ////////////////////////////////////////////////////////////////////
  23. bool GlobPattern::
  24. has_glob_characters() const {
  25. string::const_iterator pi;
  26. pi = _pattern.begin();
  27. while (pi != _pattern.end()) {
  28. switch (*pi) {
  29. case '*':
  30. case '?':
  31. case '[':
  32. return true;
  33. case '\\':
  34. ++pi;
  35. if (pi == _pattern.end()) {
  36. return false;
  37. }
  38. }
  39. ++pi;
  40. }
  41. return false;
  42. }
  43. ////////////////////////////////////////////////////////////////////
  44. // Function: GlobPattern::get_const_prefix
  45. // Access: Public
  46. // Description: Returns the initial part of the pattern before the
  47. // first glob character. Since many glob patterns begin
  48. // with a sequence of static characters and end with one
  49. // or more glob characters, this can be used to
  50. // optimized searches through sorted indices.
  51. ////////////////////////////////////////////////////////////////////
  52. string GlobPattern::
  53. get_const_prefix() const {
  54. string prefix;
  55. size_t p = 0; // current point
  56. size_t q = 0; // starting point
  57. while (p < _pattern.size()) {
  58. switch (_pattern[p]) {
  59. case '*':
  60. case '?':
  61. case '[':
  62. return prefix + _pattern.substr(q, p - q);
  63. case '\\':
  64. // Skip over the backslash.
  65. prefix += _pattern.substr(q, p - q);
  66. ++p;
  67. q = p;
  68. }
  69. ++p;
  70. }
  71. return prefix += _pattern.substr(q, p - q);
  72. }
  73. ////////////////////////////////////////////////////////////////////
  74. // Function: GlobPattern::match_files
  75. // Access: Public
  76. // Description: Treats the GlobPattern as a filename pattern, and
  77. // returns a list of any actual files that match the
  78. // pattern. This is the behavior of the standard Posix
  79. // glob() function. Any part of the filename may
  80. // contain glob characters, including intermediate
  81. // directory names.
  82. //
  83. // If cwd is specified, it is the directory that
  84. // relative filenames are taken to be relative to;
  85. // otherwise, the actual current working directory is
  86. // assumed.
  87. //
  88. // The return value is the number of files matched,
  89. // which are added to the results vector.
  90. ////////////////////////////////////////////////////////////////////
  91. int GlobPattern::
  92. match_files(vector_string &results, const Filename &cwd) {
  93. string prefix, pattern, suffix;
  94. string source = _pattern;
  95. if (!source.empty() && source[0] == '/') {
  96. // If the first character is a slash, that becomes the prefix.
  97. prefix = "/";
  98. source = source.substr(1);
  99. }
  100. size_t slash = source.find('/');
  101. if (slash == string::npos) {
  102. pattern = source;
  103. } else {
  104. pattern = source.substr(0, slash);
  105. suffix = source.substr(slash + 1);
  106. }
  107. GlobPattern glob(pattern);
  108. return glob.r_match_files(prefix, suffix, results, cwd);
  109. }
  110. ////////////////////////////////////////////////////////////////////
  111. // Function: GlobPattern::r_match_files
  112. // Access: Private
  113. // Description: The recursive implementation of match_files().
  114. ////////////////////////////////////////////////////////////////////
  115. int GlobPattern::
  116. r_match_files(const Filename &prefix, const string &suffix,
  117. vector_string &results, const Filename &cwd) {
  118. string next_pattern, next_suffix;
  119. size_t slash = suffix.find('/');
  120. if (slash == string::npos) {
  121. next_pattern = suffix;
  122. } else {
  123. next_pattern = suffix.substr(0, slash);
  124. next_suffix = suffix.substr(slash + 1);
  125. }
  126. Filename parent_dir;
  127. if (prefix.is_local() && !cwd.empty()) {
  128. parent_dir = Filename(cwd, prefix);
  129. } else {
  130. parent_dir = prefix;
  131. }
  132. GlobPattern next_glob(next_pattern);
  133. if (!has_glob_characters()) {
  134. // If there are no special characters in the pattern, it's a
  135. // literal match.
  136. if (suffix.empty()) {
  137. // Time to stop.
  138. Filename single_filename(parent_dir, _pattern);
  139. if (single_filename.exists()) {
  140. results.push_back(Filename(prefix, _pattern));
  141. return 1;
  142. }
  143. return 0;
  144. }
  145. return next_glob.r_match_files(Filename(prefix, _pattern),
  146. next_suffix, results, cwd);
  147. }
  148. // If there *are* special glob characters, we must attempt to
  149. // match the pattern against the files in this directory.
  150. vector_string dir_files;
  151. if (!parent_dir.scan_directory(dir_files)) {
  152. // Not a directory, or unable to read directory; stop here.
  153. return 0;
  154. }
  155. // Now go through each file in the directory looking for one that
  156. // matches the pattern.
  157. int num_matched = 0;
  158. vector_string::const_iterator fi;
  159. for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) {
  160. const string &local_file = (*fi);
  161. if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
  162. if (matches(local_file)) {
  163. // We have a match; continue.
  164. if (suffix.empty()) {
  165. results.push_back(Filename(prefix, local_file));
  166. num_matched++;
  167. } else {
  168. num_matched += next_glob.r_match_files(Filename(prefix, local_file),
  169. next_suffix, results, cwd);
  170. }
  171. }
  172. }
  173. }
  174. return num_matched;
  175. }
  176. ////////////////////////////////////////////////////////////////////
  177. // Function: GlobPattern::matches_substr
  178. // Access: Private
  179. // Description: The recursive implementation of matches(). This
  180. // returns true if the pattern substring [pi, pend)
  181. // matches the candidate substring [ci, cend), false
  182. // otherwise.
  183. ////////////////////////////////////////////////////////////////////
  184. bool GlobPattern::
  185. matches_substr(string::const_iterator pi, string::const_iterator pend,
  186. string::const_iterator ci, string::const_iterator cend) const {
  187. // If we run out of pattern or candidate string, it's a match only
  188. // if they both ran out at the same time.
  189. if (pi == pend || ci == cend) {
  190. // A special exception: we allow ci to reach the end before pi,
  191. // only if pi is one character before the end and that last
  192. // character is '*'.
  193. if ((ci == cend) && (std::distance(pi, pend) == 1) && (*pi) == '*') {
  194. return true;
  195. }
  196. return (pi == pend && ci == cend);
  197. }
  198. switch (*pi) {
  199. case '*':
  200. // A '*' in the pattern string means to match any sequence of zero
  201. // or more characters in the candidate string. This means we have
  202. // to recurse twice: either consume one character of the candidate
  203. // string and continue to try matching the *, or stop trying to
  204. // match the * here.
  205. if (_nomatch_chars.find(*ci) == string::npos) {
  206. return
  207. matches_substr(pi, pend, ci + 1, cend) ||
  208. matches_substr(pi + 1, pend, ci, cend);
  209. } else {
  210. // On the other hand, if this is one of the nomatch chars, we
  211. // can only stop here.
  212. return matches_substr(pi + 1, pend, ci, cend);
  213. }
  214. case '?':
  215. // A '?' in the pattern string means to match exactly one
  216. // character in the candidate string. That's easy.
  217. return matches_substr(pi + 1, pend, ci + 1, cend);
  218. case '[':
  219. // An open square bracket begins a set.
  220. ++pi;
  221. if ((*pi) == '!') {
  222. ++pi;
  223. if (matches_set(pi, pend, *ci)) {
  224. return false;
  225. }
  226. } else {
  227. if (!matches_set(pi, pend, *ci)) {
  228. return false;
  229. }
  230. }
  231. if (pi == pend) {
  232. // Oops, there wasn't a closing square bracket.
  233. return false;
  234. }
  235. return matches_substr(pi + 1, pend, ci + 1, cend);
  236. case '\\':
  237. // A backslash escapes the next special character.
  238. ++pi;
  239. if (pi == pend) {
  240. return false;
  241. }
  242. // fall through.
  243. default:
  244. // Anything else means to match exactly that.
  245. if (_case_sensitive) {
  246. if ((*pi) != (*ci)) {
  247. return false;
  248. }
  249. } else {
  250. if (tolower(*pi) != tolower(*ci)) {
  251. return false;
  252. }
  253. }
  254. return matches_substr(pi + 1, pend, ci + 1, cend);
  255. }
  256. }
  257. ////////////////////////////////////////////////////////////////////
  258. // Function: GlobPattern::matches_set
  259. // Access: Private
  260. // Description: Called when an unescaped open square bracked is
  261. // scanned, this is called with pi positioned after the
  262. // opening square bracket, scans the set sequence,
  263. // leaving pi positioned on the closing square bracket,
  264. // and returns true if the indicated character matches
  265. // the set of characters indicated, false otherwise.
  266. ////////////////////////////////////////////////////////////////////
  267. bool GlobPattern::
  268. matches_set(string::const_iterator &pi, string::const_iterator pend,
  269. char ch) const {
  270. bool matched = false;
  271. while (pi != pend && (*pi) != ']') {
  272. if ((*pi) == '\\') {
  273. // Backslash escapes the next character.
  274. ++pi;
  275. if (pi == pend) {
  276. return false;
  277. }
  278. }
  279. if (ch == (*pi)) {
  280. matched = true;
  281. }
  282. // Maybe it's an a-z style range?
  283. char start = (*pi);
  284. ++pi;
  285. if (pi != pend && (*pi) == '-') {
  286. ++pi;
  287. if (pi != pend && (*pi) != ']') {
  288. // Yes, we have a range: start-end.
  289. if ((*pi) == '\\') {
  290. // Backslash escapes.
  291. ++pi;
  292. if (pi == pend) {
  293. return false;
  294. }
  295. }
  296. char end = (*pi);
  297. ++pi;
  298. if (ch >= start && ch <= end ||
  299. (!_case_sensitive &&
  300. ((tolower(ch) >= start && tolower(ch) <= end) ||
  301. (toupper(ch) >= start && toupper(ch) <= end)))) {
  302. matched = true;
  303. }
  304. } else {
  305. // This was a - at the end of the string.
  306. if (ch == '-') {
  307. matched = true;
  308. }
  309. }
  310. }
  311. }
  312. return matched;
  313. }