Browse Source

fix extractor path calculation

Nick Sweeting 1 year ago
parent
commit
276a505cae
2 changed files with 4 additions and 4 deletions
  1. 1 1
      archivebox/core/settings.py
  2. 3 3
      archivebox/plugins_extractor/wget/apps.py

+ 1 - 1
archivebox/core/settings.py

@@ -40,7 +40,7 @@ BUILTIN_PLUGIN_DIRS = {
     'plugins_extractor':       PACKAGE_DIR / 'plugins_extractor',
 }
 USER_PLUGIN_DIRS = {
-    'user_plugins': DATA_DIR / 'user_plugins',
+    'user_plugins':            DATA_DIR / 'user_plugins',
 }
 
 BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)

+ 3 - 3
archivebox/plugins_extractor/wget/apps.py

@@ -86,7 +86,7 @@ WGET_BINARY = WgetBinary()
 
 class WgetExtractor(BaseExtractor):
     name: ExtractorName = 'wget'
-    binary: str = WGET_BINARY.name
+    binary: BinName = WGET_BINARY.name
 
     def get_output_path(self, snapshot) -> Path | None:
         wget_index_path = wget_output_path(snapshot.as_link())
@@ -99,10 +99,10 @@ WGET_EXTRACTOR = WgetExtractor()
 
 class WarcExtractor(BaseExtractor):
     name: ExtractorName = 'warc'
-    binary: str = WGET_BINARY.name
+    binary: BinName = WGET_BINARY.name
 
     def get_output_path(self, snapshot) -> Path | None:
-        warc_files = (snapshot.link_dir / 'warc').glob('*.warc.gz')
+        warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
         if warc_files:
             return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
         return None