Browse Source

improve version_str detection

Nick Sweeting 1 year ago
parent
commit
80d7a30f13
2 changed files with 19 additions and 8 deletions
  1. 15 4
      archivebox/config.py
  2. 4 4
      archivebox/core/settings.py

+ 15 - 4
archivebox/config.py

@@ -886,7 +886,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix='    ', config: Op
 
 
 # Dependency Metadata Helpers
-def bin_version(binary: Optional[str]) -> Optional[str]:
+def bin_version(binary: Optional[str], cmd: Optional[str]=None) -> Optional[str]:
     """check the presence and return valid version line of a specified binary"""
 
     abspath = bin_path(binary)
@@ -895,11 +895,22 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
 
     try:
         bin_env = os.environ | {'LANG': 'C'}
-        version_str = run([abspath, "--version"], stdout=PIPE, env=bin_env).stdout.strip().decode()
+        is_cmd_str = cmd and isinstance(cmd, str)
+        version_str = run(cmd or [abspath, "--version"], shell=is_cmd_str, stdout=PIPE, stderr=STDOUT, env=bin_env).stdout.strip().decode()
         if not version_str:
-            version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
+            version_str = run(cmd or [abspath, "--version"], shell=is_cmd_str, stdout=PIPE, stderr=STDOUT).stdout.strip().decode()
+        
         # take first 3 columns of first line of version info
-        return ' '.join(version_str.split('\n')[0].strip().split()[:3])
+        version_ptn = re.compile(r"\d+?\.\d+?\.?\d*", re.MULTILINE)
+        try:
+            version_nums = version_ptn.findall(version_str.split('\n')[0])[0]
+            if version_nums:
+                return version_nums
+            else:
+                raise IndexError
+        except IndexError:
+            # take first 3 columns of first line of version info
+            return ' '.join(version_str.split('\n')[0].strip().split()[:3])
     except OSError:
         pass
         # stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')

+ 4 - 4
archivebox/core/settings.py

@@ -380,21 +380,21 @@ IGNORABLE_404_URLS = [
 ]
 
 class NoisyRequestsFilter(logging.Filter):
-    def filter(self, record):
+    def filter(self, record) -> bool:
         logline = record.getMessage()
 
         # ignore harmless 404s for the patterns in IGNORABLE_404_URLS
         for ignorable_url_pattern in IGNORABLE_404_URLS:
             ignorable_log_pattern = re.compile(f'^"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M)
             if ignorable_log_pattern.match(logline):
-                return 0
+                return False
 
         # ignore staticfile requests that 200 or 30*
         ignoreable_200_log_pattern = re.compile(r'"GET /static/.* HTTP/.*" (200|30.) .+', re.I | re.M)
         if ignoreable_200_log_pattern.match(logline):
-            return 0
+            return False
 
-        return 1
+        return True
 
 if CONFIG.LOGS_DIR.exists():
     ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log')