瀏覽代碼

first attempt to migrate to Pathlib

apkallum 5 年之前
父節點
當前提交
594d9e49ce

+ 28 - 28
archivebox/config/__init__.py

@@ -222,15 +222,15 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'USER':                     {'default': lambda c: getpass.getuser() or os.getlogin()},
     'USER':                     {'default': lambda c: getpass.getuser() or os.getlogin()},
     'ANSI':                     {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
     'ANSI':                     {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
 
 
-    'REPO_DIR':                 {'default': lambda c: os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..'))},
-    'PYTHON_DIR':               {'default': lambda c: os.path.join(c['REPO_DIR'], PYTHON_DIR_NAME)},
-    'TEMPLATES_DIR':            {'default': lambda c: os.path.join(c['PYTHON_DIR'], TEMPLATES_DIR_NAME, 'legacy')},
-
-    'OUTPUT_DIR':               {'default': lambda c: os.path.abspath(os.path.expanduser(c['OUTPUT_DIR'])) if c['OUTPUT_DIR'] else os.path.abspath(os.curdir)},
-    'ARCHIVE_DIR':              {'default': lambda c: os.path.join(c['OUTPUT_DIR'], ARCHIVE_DIR_NAME)},
-    'SOURCES_DIR':              {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
-    'LOGS_DIR':                 {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
-    'CONFIG_FILE':              {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
+    'REPO_DIR':                 {'default': lambda c: Path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..')))},
+    'PYTHON_DIR':               {'default': lambda c: Path.joinpath(Path(c['REPO_DIR']), PYTHON_DIR_NAME)},
+    'TEMPLATES_DIR':            {'default': lambda c: Path.joinpath(c['PYTHON_DIR'], TEMPLATES_DIR_NAME, 'legacy')},
+
+    'OUTPUT_DIR':               {'default': lambda c: Path(os.path.abspath(os.path.expanduser(c['OUTPUT_DIR'])) if c['OUTPUT_DIR'] else os.path.abspath(os.curdir))},
+    'ARCHIVE_DIR':              {'default': lambda c: Path.joinpath(c['OUTPUT_DIR'], ARCHIVE_DIR_NAME)},
+    'SOURCES_DIR':              {'default': lambda c: Path.joinpath(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
+    'LOGS_DIR':                 {'default': lambda c: Path.joinpath(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
+    'CONFIG_FILE':              {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else Path.joinpath(c['OUTPUT_DIR'], CONFIG_FILENAME)},
     'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
     'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
     'CHROME_USER_DATA_DIR':     {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
     'CHROME_USER_DATA_DIR':     {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
     'URL_BLACKLIST_PTN':        {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
     'URL_BLACKLIST_PTN':        {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
@@ -348,7 +348,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
 
     out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
     out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
-    config_path = os.path.join(out_dir, CONFIG_FILENAME)
+    config_path = Path.joinpath(Path(out_dir), CONFIG_FILENAME)
     if os.path.exists(config_path):
     if os.path.exists(config_path):
         config_file = ConfigParser()
         config_file = ConfigParser()
         config_file.optionxform = str 
         config_file.optionxform = str 
@@ -371,7 +371,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
     from ..system import atomic_write
     from ..system import atomic_write
 
 
     out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
     out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
-    config_path = os.path.join(out_dir, CONFIG_FILENAME)
+    config_path = Path.joinpath(out_dir, CONFIG_FILENAME)
     
     
     if not os.path.exists(config_path):
     if not os.path.exists(config_path):
         atomic_write(config_path, CONFIG_HEADER)
         atomic_write(config_path, CONFIG_HEADER)
@@ -611,17 +611,17 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
         'REPO_DIR': {
         'REPO_DIR': {
             'path': os.path.abspath(config['REPO_DIR']),
             'path': os.path.abspath(config['REPO_DIR']),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['REPO_DIR'], 'archivebox')),
+            'is_valid': os.path.exists(Path.joinpath(config['REPO_DIR'], 'archivebox')),
         },
         },
         'PYTHON_DIR': {
         'PYTHON_DIR': {
             'path': os.path.abspath(config['PYTHON_DIR']),
             'path': os.path.abspath(config['PYTHON_DIR']),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['PYTHON_DIR'], '__main__.py')),
+            'is_valid': os.path.exists(Path.joinpath(config['PYTHON_DIR'], '__main__.py')),
         },
         },
         'TEMPLATES_DIR': {
         'TEMPLATES_DIR': {
             'path': os.path.abspath(config['TEMPLATES_DIR']),
             'path': os.path.abspath(config['TEMPLATES_DIR']),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['TEMPLATES_DIR'], 'static')),
+            'is_valid': os.path.exists(Path.joinpath(config['TEMPLATES_DIR'], 'static')),
         },
         },
     }
     }
 
 
@@ -645,7 +645,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
         'OUTPUT_DIR': {
         'OUTPUT_DIR': {
             'path': os.path.abspath(config['OUTPUT_DIR']),
             'path': os.path.abspath(config['OUTPUT_DIR']),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
+            'is_valid': os.path.exists(Path.joinpath(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
         },
         },
         'SOURCES_DIR': {
         'SOURCES_DIR': {
             'path': os.path.abspath(config['SOURCES_DIR']),
             'path': os.path.abspath(config['SOURCES_DIR']),
@@ -668,19 +668,19 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
             'is_valid': os.path.exists(config['CONFIG_FILE']),
             'is_valid': os.path.exists(config['CONFIG_FILE']),
         },
         },
         'SQL_INDEX': {
         'SQL_INDEX': {
-            'path': os.path.abspath(os.path.join(config['OUTPUT_DIR'], SQL_INDEX_FILENAME)),
+            'path': os.path.abspath(Path.joinpath(config['OUTPUT_DIR'], SQL_INDEX_FILENAME)),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['OUTPUT_DIR'], SQL_INDEX_FILENAME)),
+            'is_valid': os.path.exists(Path.joinpath(config['OUTPUT_DIR'], SQL_INDEX_FILENAME)),
         },
         },
         'JSON_INDEX': {
         'JSON_INDEX': {
-            'path': os.path.abspath(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
+            'path': os.path.abspath(Path.joinpath(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
+            'is_valid': os.path.exists(Path.joinpath(config['OUTPUT_DIR'], JSON_INDEX_FILENAME)),
         },
         },
         'HTML_INDEX': {
         'HTML_INDEX': {
-            'path': os.path.abspath(os.path.join(config['OUTPUT_DIR'], HTML_INDEX_FILENAME)),
+            'path': os.path.abspath(Path.joinpath(config['OUTPUT_DIR'], HTML_INDEX_FILENAME)),
             'enabled': True,
             'enabled': True,
-            'is_valid': os.path.exists(os.path.join(config['OUTPUT_DIR'], HTML_INDEX_FILENAME)),
+            'is_valid': os.path.exists(Path.joinpath(config['OUTPUT_DIR'], HTML_INDEX_FILENAME)),
         },
         },
     }
     }
 
 
@@ -877,9 +877,9 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
         
         
 def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:
 def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> None:
     output_dir = out_dir or config['OUTPUT_DIR']
     output_dir = out_dir or config['OUTPUT_DIR']
-    assert isinstance(output_dir, str)
+    assert isinstance(output_dir, (str, Path))
 
 
-    sql_index_exists = os.path.exists(os.path.join(output_dir, SQL_INDEX_FILENAME))
+    sql_index_exists = (Path(output_dir) / SQL_INDEX_FILENAME).exists()
     if not sql_index_exists:
     if not sql_index_exists:
         stderr('[X] No archivebox index found in the current directory.', color='red')
         stderr('[X] No archivebox index found in the current directory.', color='red')
         stderr(f'    {output_dir}', color='lightyellow')
         stderr(f'    {output_dir}', color='lightyellow')
@@ -909,7 +909,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
         stderr('        archivebox init')
         stderr('        archivebox init')
         raise SystemExit(3)
         raise SystemExit(3)
 
 
-    sources_dir = os.path.join(output_dir, SOURCES_DIR_NAME)
+    sources_dir = Path.joinpath(output_dir, SOURCES_DIR_NAME)
     if not os.path.exists(sources_dir):
     if not os.path.exists(sources_dir):
         os.makedirs(sources_dir)
         os.makedirs(sources_dir)
 
 
@@ -920,17 +920,17 @@ def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -
     
     
     output_dir = out_dir or config['OUTPUT_DIR']
     output_dir = out_dir or config['OUTPUT_DIR']
 
 
-    assert isinstance(output_dir, str) and isinstance(config['PYTHON_DIR'], str)
+    assert isinstance(output_dir, (Path, str)) and isinstance(config['PYTHON_DIR'], Path)
 
 
     try:
     try:
         import django
         import django
-        sys.path.append(config['PYTHON_DIR'])
-        os.environ.setdefault('OUTPUT_DIR', output_dir)
+        sys.path.append(str(config['PYTHON_DIR']))
+        os.environ.setdefault('OUTPUT_DIR', str(output_dir))
         os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
         os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
         django.setup()
         django.setup()
 
 
         if check_db:
         if check_db:
-            sql_index_path = os.path.join(output_dir, SQL_INDEX_FILENAME)
+            sql_index_path = Path.joinpath(output_dir, SQL_INDEX_FILENAME)
             assert os.path.exists(sql_index_path), (
             assert os.path.exists(sql_index_path), (
                 f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
                 f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
     except KeyboardInterrupt:
     except KeyboardInterrupt:

+ 15 - 14
archivebox/index/__init__.py

@@ -6,6 +6,7 @@ import json as pyjson
 from pathlib import Path
 from pathlib import Path
 
 
 from itertools import chain
 from itertools import chain
+from pathlib import Path
 from typing import List, Tuple, Dict, Optional, Iterable
 from typing import List, Tuple, Dict, Optional, Iterable
 from collections import OrderedDict
 from collections import OrderedDict
 from contextlib import contextmanager
 from contextlib import contextmanager
@@ -224,7 +225,7 @@ def timed_index_update(out_path: str):
 
 
 
 
 @enforce_types
 @enforce_types
-def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
+def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
     """create index.html file for a given list of links"""
     """create index.html file for a given list of links"""
 
 
     log_indexing_process_started(len(links))
     log_indexing_process_started(len(links))
@@ -260,7 +261,7 @@ def get_empty_snapshot_queryset(out_dir: str=OUTPUT_DIR):
     return Snapshot.objects.none()
     return Snapshot.objects.none()
 
 
 @enforce_types
 @enforce_types
-def load_main_index(out_dir: str=OUTPUT_DIR, warn: bool=True) -> List[Link]:
+def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
     """parse and load existing index with any new links from import_path merged in"""
     """parse and load existing index with any new links from import_path merged in"""
     setup_django(out_dir, check_db=True)
     setup_django(out_dir, check_db=True)
     from core.models import Snapshot
     from core.models import Snapshot
@@ -271,7 +272,7 @@ def load_main_index(out_dir: str=OUTPUT_DIR, warn: bool=True) -> List[Link]:
         raise SystemExit(0)
         raise SystemExit(0)
 
 
 @enforce_types
 @enforce_types
-def load_main_index_meta(out_dir: str=OUTPUT_DIR) -> Optional[dict]:
+def load_main_index_meta(out_dir: Path=OUTPUT_DIR) -> Optional[dict]:
     index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
     index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
     if os.path.exists(index_path):
     if os.path.exists(index_path):
         with open(index_path, 'r', encoding='utf-8') as f:
         with open(index_path, 'r', encoding='utf-8') as f:
@@ -392,7 +393,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
     return snapshots.filter(q_filter)
     return snapshots.filter(q_filter)
 
 
 
 
-def get_indexed_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_indexed_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links without checking archive status or data directory validity"""
     """indexed links without checking archive status or data directory validity"""
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     return {
     return {
@@ -400,7 +401,7 @@ def get_indexed_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optiona
         for link in links
         for link in links
     }
     }
 
 
-def get_archived_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_archived_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links that are archived with a valid data directory"""
     """indexed links that are archived with a valid data directory"""
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     return {
     return {
@@ -408,7 +409,7 @@ def get_archived_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Option
         for link in filter(is_archived, links)
         for link in filter(is_archived, links)
     }
     }
 
 
-def get_unarchived_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_unarchived_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links that are unarchived with no data directory or an empty data directory"""
     """indexed links that are unarchived with no data directory or an empty data directory"""
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     return {
     return {
@@ -416,7 +417,7 @@ def get_unarchived_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Opti
         for link in filter(is_unarchived, links)
         for link in filter(is_unarchived, links)
     }
     }
 
 
-def get_present_folders(_snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_present_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs that actually exist in the archive/ folder"""
     """dirs that actually exist in the archive/ folder"""
 
 
     all_folders = {}
     all_folders = {}
@@ -433,7 +434,7 @@ def get_present_folders(_snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Option
 
 
     return all_folders
     return all_folders
 
 
-def get_valid_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_valid_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs with a valid index matched to the main index and archived content"""
     """dirs with a valid index matched to the main index and archived content"""
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     links = [snapshot.as_link() for snapshot in snapshots.iterator()]
     return {
     return {
@@ -441,7 +442,7 @@ def get_valid_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[
         for link in filter(is_valid, links)
         for link in filter(is_valid, links)
     }
     }
 
 
-def get_invalid_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_invalid_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized"""
     """dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized"""
     duplicate = get_duplicate_folders(snapshots, out_dir=OUTPUT_DIR)
     duplicate = get_duplicate_folders(snapshots, out_dir=OUTPUT_DIR)
     orphaned = get_orphaned_folders(snapshots, out_dir=OUTPUT_DIR)
     orphaned = get_orphaned_folders(snapshots, out_dir=OUTPUT_DIR)
@@ -450,7 +451,7 @@ def get_invalid_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optiona
     return {**duplicate, **orphaned, **corrupted, **unrecognized}
     return {**duplicate, **orphaned, **corrupted, **unrecognized}
 
 
 
 
-def get_duplicate_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_duplicate_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs that conflict with other directories that have the same link URL or timestamp"""
     """dirs that conflict with other directories that have the same link URL or timestamp"""
     by_url = {}
     by_url = {}
     by_timestamp = {}
     by_timestamp = {}
@@ -484,7 +485,7 @@ def get_duplicate_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optio
                 duplicate_folders[path] = link
                 duplicate_folders[path] = link
     return duplicate_folders
     return duplicate_folders
 
 
-def get_orphaned_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_orphaned_folders(links, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs that contain a valid index but aren't listed in the main index"""
     """dirs that contain a valid index but aren't listed in the main index"""
     orphaned_folders = {}
     orphaned_folders = {}
 
 
@@ -502,7 +503,7 @@ def get_orphaned_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Option
 
 
     return orphaned_folders
     return orphaned_folders
 
 
-def get_corrupted_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_corrupted_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs that don't contain a valid index and aren't listed in the main index"""
     """dirs that don't contain a valid index and aren't listed in the main index"""
     corrupted = {}
     corrupted = {}
     for snapshot in snapshots.iterator():
     for snapshot in snapshots.iterator():
@@ -511,7 +512,7 @@ def get_corrupted_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optio
             corrupted[link.link_dir] = link
             corrupted[link.link_dir] = link
     return corrupted
     return corrupted
 
 
-def get_unrecognized_folders(snapshots, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+def get_unrecognized_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """dirs that don't contain recognizable archive data and aren't listed in the main index"""
     """dirs that don't contain recognizable archive data and aren't listed in the main index"""
     unrecognized_folders: Dict[str, Optional[Link]] = {}
     unrecognized_folders: Dict[str, Optional[Link]] = {}
 
 
@@ -580,7 +581,7 @@ def is_unarchived(link: Link) -> bool:
     return not link.is_archived
     return not link.is_archived
 
 
 
 
-def fix_invalid_folder_locations(out_dir: str=OUTPUT_DIR) -> Tuple[List[str], List[str]]:
+def fix_invalid_folder_locations(out_dir: Path=OUTPUT_DIR) -> Tuple[List[str], List[str]]:
     fixed = []
     fixed = []
     cant_fix = []
     cant_fix = []
     for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
     for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):

+ 3 - 2
archivebox/index/html.py

@@ -5,6 +5,7 @@ import os
 from string import Template
 from string import Template
 from datetime import datetime
 from datetime import datetime
 from typing import List, Optional, Iterator, Mapping
 from typing import List, Optional, Iterator, Mapping
+from pathlib import Path
 
 
 from .schema import Link
 from .schema import Link
 from ..system import atomic_write, copy_and_overwrite
 from ..system import atomic_write, copy_and_overwrite
@@ -40,7 +41,7 @@ TITLE_LOADING_MSG = 'Not yet archived...'
 ### Main Links Index
 ### Main Links Index
 
 
 @enforce_types
 @enforce_types
-def parse_html_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[str]:
+def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
     """parse an archive index html file and return the list of urls"""
     """parse an archive index html file and return the list of urls"""
 
 
     index_path = join(out_dir, HTML_INDEX_FILENAME)
     index_path = join(out_dir, HTML_INDEX_FILENAME)
@@ -52,7 +53,7 @@ def parse_html_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[str]:
     return ()
     return ()
 
 
 @enforce_types
 @enforce_types
-def write_html_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
+def write_html_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, finished: bool=False) -> None:
     """write the html link index to a given path"""
     """write the html link index to a given path"""
 
 
     copy_and_overwrite(join(TEMPLATES_DIR, FAVICON_FILENAME), join(out_dir, FAVICON_FILENAME))
     copy_and_overwrite(join(TEMPLATES_DIR, FAVICON_FILENAME), join(out_dir, FAVICON_FILENAME))

+ 5 - 5
archivebox/index/json.py

@@ -6,7 +6,7 @@ import json as pyjson
 from pathlib import Path
 from pathlib import Path
 
 
 from datetime import datetime
 from datetime import datetime
-from typing import List, Optional, Iterator, Any
+from typing import List, Optional, Iterator, Any, Union
 
 
 from .schema import Link, ArchiveResult
 from .schema import Link, ArchiveResult
 from ..system import atomic_write
 from ..system import atomic_write
@@ -42,7 +42,7 @@ MAIN_INDEX_HEADER = {
 ### Main Links Index
 ### Main Links Index
 
 
 @enforce_types
 @enforce_types
-def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
+def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
     """parse an archive index json file and return the list of links"""
     """parse an archive index json file and return the list of links"""
 
 
     index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
     index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
@@ -66,7 +66,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
     return ()
     return ()
 
 
 @enforce_types
 @enforce_types
-def write_json_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
+def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
     """write the json link index to a given path"""
     """write the json link index to a given path"""
 
 
     assert isinstance(links, List), 'Links must be a list, not a generator.'
     assert isinstance(links, List), 'Links must be a list, not a generator.'
@@ -101,7 +101,7 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
 
 
 
 
 @enforce_types
 @enforce_types
-def parse_json_link_details(out_dir: str, guess: Optional[bool]=False) -> Optional[Link]:
+def parse_json_link_details(out_dir: Union[Path, str], guess: Optional[bool]=False) -> Optional[Link]:
     """load the json link index from a given directory"""
     """load the json link index from a given directory"""
     existing_index = os.path.join(out_dir, JSON_INDEX_FILENAME)
     existing_index = os.path.join(out_dir, JSON_INDEX_FILENAME)
     if os.path.exists(existing_index):
     if os.path.exists(existing_index):
@@ -115,7 +115,7 @@ def parse_json_link_details(out_dir: str, guess: Optional[bool]=False) -> Option
 
 
 
 
 @enforce_types
 @enforce_types
-def parse_json_links_details(out_dir: str) -> Iterator[Link]:
+def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
     """read through all the archive data folders and return the parsed links"""
     """read through all the archive data folders and return the parsed links"""
 
 
     for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
     for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):

+ 8 - 7
archivebox/index/sql.py

@@ -1,6 +1,7 @@
 __package__ = 'archivebox.index'
 __package__ = 'archivebox.index'
 
 
 from io import StringIO
 from io import StringIO
+from pathlib import Path
 from typing import List, Tuple, Iterator
 from typing import List, Tuple, Iterator
 from django.db.models import QuerySet
 from django.db.models import QuerySet
 
 
@@ -12,7 +13,7 @@ from ..config import setup_django, OUTPUT_DIR
 ### Main Links Index
 ### Main Links Index
 
 
 @enforce_types
 @enforce_types
-def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
+def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
     setup_django(out_dir, check_db=True)
     setup_django(out_dir, check_db=True)
     from core.models import Snapshot
     from core.models import Snapshot
 
 
@@ -22,7 +23,7 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
     )
     )
 
 
 @enforce_types
 @enforce_types
-def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
+def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) -> None:
     setup_django(out_dir, check_db=True)
     setup_django(out_dir, check_db=True)
     from django.db import transaction
     from django.db import transaction
 
 
@@ -43,7 +44,7 @@ def write_link_to_sql_index(link: Link):
 
 
 
 
 @enforce_types
 @enforce_types
-def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
+def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
     setup_django(out_dir, check_db=True)
     setup_django(out_dir, check_db=True)
     from django.db import transaction
     from django.db import transaction
 
 
@@ -53,7 +54,7 @@ def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
             
             
 
 
 @enforce_types
 @enforce_types
-def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:
+def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
     setup_django(out_dir, check_db=True)
     setup_django(out_dir, check_db=True)
     from core.models import Snapshot
     from core.models import Snapshot
     from django.db import transaction
     from django.db import transaction
@@ -70,7 +71,7 @@ def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:
 
 
 
 
 @enforce_types
 @enforce_types
-def list_migrations(out_dir: str=OUTPUT_DIR) -> List[Tuple[bool, str]]:
+def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
     setup_django(out_dir, check_db=False)
     setup_django(out_dir, check_db=False)
     from django.core.management import call_command
     from django.core.management import call_command
     out = StringIO()
     out = StringIO()
@@ -87,7 +88,7 @@ def list_migrations(out_dir: str=OUTPUT_DIR) -> List[Tuple[bool, str]]:
     return migrations
     return migrations
 
 
 @enforce_types
 @enforce_types
-def apply_migrations(out_dir: str=OUTPUT_DIR) -> List[str]:
+def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
     setup_django(out_dir, check_db=False)
     setup_django(out_dir, check_db=False)
     from django.core.management import call_command
     from django.core.management import call_command
     null, out = StringIO(), StringIO()
     null, out = StringIO(), StringIO()
@@ -98,7 +99,7 @@ def apply_migrations(out_dir: str=OUTPUT_DIR) -> List[str]:
     return [line.strip() for line in out.readlines() if line.strip()]
     return [line.strip() for line in out.readlines() if line.strip()]
 
 
 @enforce_types
 @enforce_types
-def get_admins(out_dir: str=OUTPUT_DIR) -> List[str]:
+def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]:
     setup_django(out_dir, check_db=False)
     setup_django(out_dir, check_db=False)
     from django.contrib.auth.models import User
     from django.contrib.auth.models import User
     return User.objects.filter(is_superuser=True)
     return User.objects.filter(is_superuser=True)

+ 28 - 28
archivebox/main.py

@@ -5,6 +5,7 @@ import sys
 import shutil
 import shutil
 from pathlib import Path
 from pathlib import Path
 
 
+from pathlib import Path
 from typing import Dict, List, Optional, Iterable, IO, Union
 from typing import Dict, List, Optional, Iterable, IO, Union
 from crontab import CronTab, CronSlices
 from crontab import CronTab, CronSlices
 from django.db.models import QuerySet
 from django.db.models import QuerySet
@@ -130,7 +131,7 @@ ALLOWED_IN_OUTPUT_DIR = {
 }
 }
 
 
 @enforce_types
 @enforce_types
-def help(out_dir: str=OUTPUT_DIR) -> None:
+def help(out_dir: Path=OUTPUT_DIR) -> None:
     """Print the ArchiveBox help message and usage"""
     """Print the ArchiveBox help message and usage"""
 
 
     all_subcommands = list_subcommands()
     all_subcommands = list_subcommands()
@@ -153,7 +154,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
     )
     )
 
 
 
 
-    if os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)):
+    if (Path(out_dir) / SQL_INDEX_FILENAME).exists():
         print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
         print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
 
 
 {lightred}Active data directory:{reset}
 {lightred}Active data directory:{reset}
@@ -202,7 +203,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
 
 
 @enforce_types
 @enforce_types
 def version(quiet: bool=False,
 def version(quiet: bool=False,
-            out_dir: str=OUTPUT_DIR) -> None:
+            out_dir: Path=OUTPUT_DIR) -> None:
     """Print the ArchiveBox version and dependency information"""
     """Print the ArchiveBox version and dependency information"""
 
 
     if quiet:
     if quiet:
@@ -239,7 +240,7 @@ def version(quiet: bool=False,
 def run(subcommand: str,
 def run(subcommand: str,
         subcommand_args: Optional[List[str]],
         subcommand_args: Optional[List[str]],
         stdin: Optional[IO]=None,
         stdin: Optional[IO]=None,
-        out_dir: str=OUTPUT_DIR) -> None:
+        out_dir: Path=OUTPUT_DIR) -> None:
     """Run a given ArchiveBox subcommand with the given list of args"""
     """Run a given ArchiveBox subcommand with the given list of args"""
     run_subcommand(
     run_subcommand(
         subcommand=subcommand,
         subcommand=subcommand,
@@ -250,9 +251,9 @@ def run(subcommand: str,
 
 
 
 
 @enforce_types
 @enforce_types
-def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
+def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     """Initialize a new ArchiveBox collection in the current directory"""
     """Initialize a new ArchiveBox collection in the current directory"""
-    os.makedirs(out_dir, exist_ok=True)
+    Path(out_dir).mkdir(exist_ok=True)
     is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
     is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
 
 
     if (Path(out_dir) / JSON_INDEX_FILENAME).exists():
     if (Path(out_dir) / JSON_INDEX_FILENAME).exists():
@@ -289,32 +290,31 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
     else:
     else:
         print('\n{green}[+] Building archive folder structure...{reset}'.format(**ANSI))
         print('\n{green}[+] Building archive folder structure...{reset}'.format(**ANSI))
     
     
-    os.makedirs(SOURCES_DIR, exist_ok=True)
+    Path(SOURCES_DIR).mkdir(exist_ok=True)
     print(f'    √ {SOURCES_DIR}')
     print(f'    √ {SOURCES_DIR}')
     
     
-    os.makedirs(ARCHIVE_DIR, exist_ok=True)
+    Path(ARCHIVE_DIR).mkdir(exist_ok=True)
     print(f'    √ {ARCHIVE_DIR}')
     print(f'    √ {ARCHIVE_DIR}')
 
 
-    os.makedirs(LOGS_DIR, exist_ok=True)
+    Path(LOGS_DIR).mkdir(exist_ok=True)
     print(f'    √ {LOGS_DIR}')
     print(f'    √ {LOGS_DIR}')
 
 
     write_config_file({}, out_dir=out_dir)
     write_config_file({}, out_dir=out_dir)
     print(f'    √ {CONFIG_FILE}')
     print(f'    √ {CONFIG_FILE}')
-    
-    if os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)):
+    if (Path(out_dir) / SQL_INDEX_FILENAME).exists():
         print('\n{green}[*] Verifying main SQL index and running migrations...{reset}'.format(**ANSI))
         print('\n{green}[*] Verifying main SQL index and running migrations...{reset}'.format(**ANSI))
     else:
     else:
         print('\n{green}[+] Building main SQL index and running migrations...{reset}'.format(**ANSI))
         print('\n{green}[+] Building main SQL index and running migrations...{reset}'.format(**ANSI))
     
     
     setup_django(out_dir, check_db=False)
     setup_django(out_dir, check_db=False)
-    DATABASE_FILE = os.path.join(out_dir, SQL_INDEX_FILENAME)
+    DATABASE_FILE = Path(out_dir) / SQL_INDEX_FILENAME
     print(f'    √ {DATABASE_FILE}')
     print(f'    √ {DATABASE_FILE}')
     print()
     print()
     for migration_line in apply_migrations(out_dir):
     for migration_line in apply_migrations(out_dir):
         print(f'    {migration_line}')
         print(f'    {migration_line}')
 
 
 
 
-    assert os.path.exists(DATABASE_FILE)
+    assert DATABASE_FILE.exists()
     
     
     # from django.contrib.auth.models import User
     # from django.contrib.auth.models import User
     # if IS_TTY and not User.objects.filter(is_superuser=True).exists():
     # if IS_TTY and not User.objects.filter(is_superuser=True).exists():
@@ -391,7 +391,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
 
 
 
 
 @enforce_types
 @enforce_types
-def status(out_dir: str=OUTPUT_DIR) -> None:
+def status(out_dir: Path=OUTPUT_DIR) -> None:
     """Print out some info and statistics about the archive collection"""
     """Print out some info and statistics about the archive collection"""
 
 
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
@@ -491,7 +491,7 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
 
 
 
 
 @enforce_types
 @enforce_types
-def oneshot(url: str, out_dir: str=OUTPUT_DIR):
+def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
     """
     """
     Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
     Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
     You can run this to archive single pages without needing to create a whole collection with archivebox init.
     You can run this to archive single pages without needing to create a whole collection with archivebox init.
@@ -514,7 +514,7 @@ def add(urls: Union[str, List[str]],
         index_only: bool=False,
         index_only: bool=False,
         overwrite: bool=False,
         overwrite: bool=False,
         init: bool=False,
         init: bool=False,
-        out_dir: str=OUTPUT_DIR) -> List[Link]:
+        out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Add a new URL or list of URLs to your archive"""
     """Add a new URL or list of URLs to your archive"""
 
 
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
@@ -577,7 +577,7 @@ def remove(filter_str: Optional[str]=None,
            before: Optional[float]=None,
            before: Optional[float]=None,
            yes: bool=False,
            yes: bool=False,
            delete: bool=False,
            delete: bool=False,
-           out_dir: str=OUTPUT_DIR) -> List[Link]:
+           out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Remove the specified URLs from the archive"""
     """Remove the specified URLs from the archive"""
     
     
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
@@ -658,7 +658,7 @@ def update(resume: Optional[float]=None,
            status: Optional[str]=None,
            status: Optional[str]=None,
            after: Optional[str]=None,
            after: Optional[str]=None,
            before: Optional[str]=None,
            before: Optional[str]=None,
-           out_dir: str=OUTPUT_DIR) -> List[Link]:
+           out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Import any new links from subscriptions and retry any previously failed/skipped links"""
     """Import any new links from subscriptions and retry any previously failed/skipped links"""
 
 
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
@@ -714,7 +714,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
              json: bool=False,
              json: bool=False,
              html: bool=False,
              html: bool=False,
              with_headers: bool=False,
              with_headers: bool=False,
-             out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
+             out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
     """List, filter, and export information about archive entries"""
     """List, filter, and export information about archive entries"""
     
     
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
@@ -756,7 +756,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
                filter_type: str='exact',
                filter_type: str='exact',
                after: Optional[float]=None,
                after: Optional[float]=None,
                before: Optional[float]=None,
                before: Optional[float]=None,
-               out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
+               out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
     
     
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
 
 
@@ -776,7 +776,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
 @enforce_types
 @enforce_types
 def list_folders(links: List[Link],
 def list_folders(links: List[Link],
                  status: str,
                  status: str,
-                 out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
+                 out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     
     
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
 
 
@@ -805,7 +805,7 @@ def config(config_options_str: Optional[str]=None,
            get: bool=False,
            get: bool=False,
            set: bool=False,
            set: bool=False,
            reset: bool=False,
            reset: bool=False,
-           out_dir: str=OUTPUT_DIR) -> None:
+           out_dir: Path=OUTPUT_DIR) -> None:
     """Get and set your ArchiveBox project configuration values"""
     """Get and set your ArchiveBox project configuration values"""
 
 
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
@@ -905,12 +905,12 @@ def schedule(add: bool=False,
              every: Optional[str]=None,
              every: Optional[str]=None,
              depth: int=0,
              depth: int=0,
              import_path: Optional[str]=None,
              import_path: Optional[str]=None,
-             out_dir: str=OUTPUT_DIR):
+             out_dir: Path=OUTPUT_DIR):
     """Set ArchiveBox to regularly import URLs at specific times using cron"""
     """Set ArchiveBox to regularly import URLs at specific times using cron"""
     
     
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
 
 
-    os.makedirs(os.path.join(out_dir, LOGS_DIR_NAME), exist_ok=True)
+    (Path(out_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
 
 
     cron = CronTab(user=True)
     cron = CronTab(user=True)
     cron = dedupe_cron_jobs(cron)
     cron = dedupe_cron_jobs(cron)
@@ -932,7 +932,7 @@ def schedule(add: bool=False,
             quoted(ARCHIVEBOX_BINARY),
             quoted(ARCHIVEBOX_BINARY),
             *(['add', f'--depth={depth}', f'"{import_path}"'] if import_path else ['update']),
             *(['add', f'--depth={depth}', f'"{import_path}"'] if import_path else ['update']),
             '>',
             '>',
-            quoted(os.path.join(LOGS_DIR, 'archivebox.log')),
+            quoted(Path(LOGS_DIR) / 'archivebox.log'),
             '2>&1',
             '2>&1',
 
 
         ]
         ]
@@ -1016,7 +1016,7 @@ def server(runserver_args: Optional[List[str]]=None,
            reload: bool=False,
            reload: bool=False,
            debug: bool=False,
            debug: bool=False,
            init: bool=False,
            init: bool=False,
-           out_dir: str=OUTPUT_DIR) -> None:
+           out_dir: Path=OUTPUT_DIR) -> None:
     """Run the ArchiveBox HTTP server"""
     """Run the ArchiveBox HTTP server"""
 
 
     runserver_args = runserver_args or []
     runserver_args = runserver_args or []
@@ -1063,7 +1063,7 @@ def server(runserver_args: Optional[List[str]]=None,
 
 
 
 
 @enforce_types
 @enforce_types
-def manage(args: Optional[List[str]]=None, out_dir: str=OUTPUT_DIR) -> None:
+def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
     """Run an ArchiveBox Django management command"""
     """Run an ArchiveBox Django management command"""
 
 
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
@@ -1079,7 +1079,7 @@ def manage(args: Optional[List[str]]=None, out_dir: str=OUTPUT_DIR) -> None:
 
 
 
 
 @enforce_types
 @enforce_types
-def shell(out_dir: str=OUTPUT_DIR) -> None:
+def shell(out_dir: Path=OUTPUT_DIR) -> None:
     """Enter an interactive ArchiveBox Django shell"""
     """Enter an interactive ArchiveBox Django shell"""
 
 
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)

+ 3 - 2
archivebox/parsers/__init__.py

@@ -13,6 +13,7 @@ from io import StringIO
 
 
 from typing import IO, Tuple, List, Optional
 from typing import IO, Tuple, List, Optional
 from datetime import datetime
 from datetime import datetime
+from pathlib import Path 
 
 
 from ..system import atomic_write
 from ..system import atomic_write
 from ..config import (
 from ..config import (
@@ -125,7 +126,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None)
 
 
 
 
 @enforce_types
 @enforce_types
-def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: str=OUTPUT_DIR) -> str:
+def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
     source_path = os.path.join(out_dir, SOURCES_DIR_NAME, filename.format(ts=ts))
     source_path = os.path.join(out_dir, SOURCES_DIR_NAME, filename.format(ts=ts))
     atomic_write(source_path, raw_text)
     atomic_write(source_path, raw_text)
@@ -134,7 +135,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
 
 
 
 
 @enforce_types
 @enforce_types
-def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: str=OUTPUT_DIR) -> str:
+def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
     """download a given url's content into output/sources/domain-<timestamp>.txt"""
     """download a given url's content into output/sources/domain-<timestamp>.txt"""
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
     source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(basename=basename(path), ts=ts))
     source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(basename=basename(path), ts=ts))