Browse Source

dtoolutil: add GlobPattern::matches_file() and unit test

This is used similar to matches() but for file paths: it does not match slashes for * characters and has special support for /**/ and ignores // and /./ patterns.
rdb 7 years ago
parent
commit
825058805a

+ 122 - 3
dtool/src/dtoolutil/globPattern.cxx

@@ -12,6 +12,7 @@
  */
 
 #include "globPattern.h"
+#include "string_utils.h"
 #include <ctype.h>
 
 using std::string;
@@ -199,9 +200,7 @@ r_match_files(const Filename &prefix, const string &suffix,
     next_glob = *this;
   }
 
-  vector_string::const_iterator fi;
-  for (fi = dir_files.begin(); fi != dir_files.end(); ++fi) {
-    const string &local_file = (*fi);
+  for (const string &local_file : dir_files) {
     if (_pattern[0] == '.' || (local_file.empty() || local_file[0] != '.')) {
       if (matches(local_file)) {
         // We have a match; continue.
@@ -227,6 +226,126 @@ r_match_files(const Filename &prefix, const string &suffix,
   return num_matched;
 }
 
+/**
+ * Treats the GlobPattern as a filename pattern, and returns true if the given
+ * filename matches the pattern.  Unlike matches(), this will not match slash
+ * characters for single asterisk characters, and it will ignore path
+ * components that only contain a dot.
+ */
+bool GlobPattern::
+matches_file(Filename candidate) const {
+  if (_pattern.empty()) {
+    // Special case.
+    return candidate.empty();
+  }
+
+  // Either both must be absolute, or both must be relative.
+  if ((_pattern[0] != '/') != candidate.is_local()) {
+    return false;
+  }
+
+  return r_matches_file(_pattern, candidate);
+}
+
+/**
+ * The recursive implementation of matches_file().
+ */
+bool GlobPattern::
+r_matches_file(const string &pattern, const Filename &candidate) const {
+  // Split off the next bit of pattern.
+  std::string next_pattern;
+  GlobPattern glob;
+  glob.set_case_sensitive(get_case_sensitive());
+  glob.set_nomatch_chars(get_nomatch_chars());
+
+  bool pattern_end;
+  size_t slash = pattern.find('/');
+  if (slash == string::npos) {
+    glob.set_pattern(pattern);
+    pattern_end = true;
+  } else {
+    glob.set_pattern(pattern.substr(0, slash));
+    next_pattern = pattern.substr(slash + 1);
+    pattern_end = false;
+
+    if (slash == 0 || (slash == 1 && pattern[0] == '.')) {
+      // Ignore // and /./ in patterns
+      return r_matches_file(next_pattern, candidate);
+    }
+  }
+
+  // Also split off the next component in the candidate filename.
+  std::string part;
+  Filename next_candidate;
+
+  bool candidate_end;
+  size_t fn_slash = ((const std::string &)candidate).find('/');
+  if (fn_slash == string::npos) {
+    part = candidate;
+    candidate_end = true;
+  } else {
+    part = candidate.substr(0, fn_slash);
+    next_candidate = candidate.substr(fn_slash + 1);
+    candidate_end = false;
+
+    // Ignore // and /./ in filenames.
+    if (fn_slash == 0 || part == ".") {
+      return r_matches_file(pattern, next_candidate);
+    }
+  }
+
+  // Now check if the current part matches the current pattern.
+  bool part_matches;
+  if (glob.get_pattern() == "**") {
+    // This matches any number of parts.
+    if (pattern_end) {
+      // We might as well stop checking here; it matches whatever might come.
+      return true;
+    }
+    // We branch out to three options: either we match nothing, we match this
+    // part only, or we match this part and maybe more.
+    return r_matches_file(next_pattern, candidate)
+        || (!candidate_end && r_matches_file(next_pattern, next_candidate))
+        || (!candidate_end && r_matches_file(pattern, next_candidate));
+  }
+  else if (glob.get_pattern() == "*" && _nomatch_chars.empty()) {
+    // Matches any part (faster version of below)
+    part_matches = true;
+  }
+  else if ((glob.get_pattern() == "." && part.empty())
+        || (glob.get_pattern().empty() && part == ".")) {
+    // So that /path/. matches /path/, and vice versa.
+    part_matches = true;
+  }
+  else if (glob.has_glob_characters()) {
+    part_matches = glob.matches(part);
+  }
+  else if (get_case_sensitive()) {
+    part_matches = (part == glob.get_pattern());
+  }
+  else {
+    part_matches = (cmp_nocase(part, glob.get_pattern()) == 0);
+  }
+
+  if (!part_matches) {
+    // It doesn't match, so we end our search here.
+    return false;
+  }
+
+  if (candidate_end && pattern_end) {
+    // We've reached the end of both candidate and pattern, so it matches.
+    return true;
+  }
+
+  if (pattern_end != candidate_end) {
+    // One of them has ended, but the other hasn't, so it's not a match.
+    return false;
+  }
+
+  // It matches; move on to the next part.
+  return r_matches_file(next_pattern, next_candidate);
+}
+
 /**
  * The recursive implementation of matches().  This returns true if the
  * pattern substring [pi, pend) matches the candidate substring [ci, cend),

+ 2 - 0
dtool/src/dtoolutil/globPattern.h

@@ -52,6 +52,7 @@ PUBLISHED:
   MAKE_PROPERTY(nomatch_chars, get_nomatch_chars, set_nomatch_chars);
 
   INLINE bool matches(const std::string &candidate) const;
+  bool matches_file(Filename candidate) const;
 
   INLINE void output(std::ostream &out) const;
 
@@ -74,6 +75,7 @@ private:
 
   int r_match_files(const Filename &prefix, const std::string &suffix,
                     vector_string &results, const Filename &cwd);
+  bool r_matches_file(const std::string &suffix, const Filename &candidate) const;
 
   std::string _pattern;
   bool _case_sensitive;

+ 85 - 0
tests/dtoolutil/test_globpattern.py

@@ -0,0 +1,85 @@
+from panda3d.core import GlobPattern
+
+
+def test_globpattern_matches_file():
+    patt = GlobPattern('/a/b/c')
+    assert patt.matches_file('/a/b/c')
+    assert patt.matches_file('///a////b//c')
+    assert patt.matches_file('/a/b/././c')
+    assert not patt.matches_file('')
+    assert not patt.matches_file('/')
+    assert not patt.matches_file('/a/b/d')
+    assert not patt.matches_file('/A/b/c')
+    assert not patt.matches_file('/a/b/c/')
+    assert not patt.matches_file('/a/b/c/.')
+    assert not patt.matches_file('a/b/c')
+    assert not patt.matches_file('./a/b/c')
+
+    # Test regular pattern
+    patt = GlobPattern('*a')
+    assert patt.matches_file('a')
+    assert patt.matches_file('aa')
+    assert patt.matches_file('xa')
+    assert not patt.matches_file('A')
+    assert not patt.matches_file('ax')
+    assert not patt.matches_file('xax')
+
+    # Test path ending in directory
+    for patt in GlobPattern('/a/b/c/'), \
+                GlobPattern('/a/b/c/.'), \
+                GlobPattern('/a/b//c//'), \
+                GlobPattern('/a/b/./c/./'):
+        assert patt.matches_file('/a/b/c/')
+        assert patt.matches_file('///a////b//c//')
+        assert patt.matches_file('/a/b/././c/')
+        assert patt.matches_file('/a/b/c/.')
+        assert not patt.matches_file('/a/b/c')
+        assert not patt.matches_file('/a/b/c/./d')
+        assert not patt.matches_file('a/b/c/')
+        assert not patt.matches_file('./a/b/c/')
+
+    # Test globstar in middle
+    for patt in GlobPattern('/a/**/c'), GlobPattern('/a/**/**/c'):
+        assert patt.matches_file('/a/c')
+        assert patt.matches_file('/a/b/c')
+        assert patt.matches_file('/a/b/d/c')
+        assert not patt.matches_file('/a/b/c/d')
+        assert not patt.matches_file('/d/b/c')
+        assert not patt.matches_file('/a/b/d')
+
+    # Test globstar in beginning
+    for patt in GlobPattern('/**/b/c'), GlobPattern('/**/**/**/b/c'):
+        assert patt.matches_file('/a/b/c')
+        assert patt.matches_file('/a/d/b/c')
+        assert patt.matches_file('/a/b/c')
+        assert patt.matches_file('/a/b/c/./b//c')
+        assert not patt.matches_file('/a/b/c/d')
+        assert not patt.matches_file('/a/c')
+        assert not patt.matches_file('/a/b/d')
+
+    # Test globstar at end
+    for patt in GlobPattern('/a/b/**'), \
+                GlobPattern('/a/b/**/**'), \
+                GlobPattern('/a/b//**//**/**'):
+        assert patt.matches_file('/a/b/')
+        assert patt.matches_file('/a/b/.')
+        assert patt.matches_file('/a/b//')
+        assert patt.matches_file('/a/b/c')
+        assert patt.matches_file('/a/b/c/d/e/f/g/h')
+        assert patt.matches_file('/a/b/d/c')
+        assert not patt.matches_file('/a/')
+        assert not patt.matches_file('/a/c/b')
+
+    # Test multiple globstars at multiple locations
+    patt = GlobPattern('/a/**/b/**/c')
+    assert patt.matches_file('/a/b/c')
+    assert patt.matches_file('/a/./b/./c')
+    assert patt.matches_file('/a//b//c')
+    assert patt.matches_file('/a/x/y/b/c')
+    assert patt.matches_file('/a/b/x/y/c')
+    assert patt.matches_file('/a/b/c/a/b/c')
+    assert patt.matches_file('/a/x/y/b/x/y/c')
+    assert not patt.matches_file('/a/b/x')
+    assert not patt.matches_file('/a/b/c/x')
+    assert not patt.matches_file('/a/b/c/')
+    assert not patt.matches_file('/a/b/c/.')