1 year ago · 4b6f08b0fe
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -8,6 +8,8 @@ from pathlib import Path
 
															 from datetime import datetime, timezone
														
 
															 from typing import List, Optional, Iterator, Any, Union
														
 
															+import abx.archivebox.reads
														
 
															+
														
 
															 from archivebox.config import VERSION, DATA_DIR, CONSTANTS
														
 
															 from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
														
@@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
 
															 @enforce_types
														
 
															 def generate_json_index_from_links(links: List[Link], with_headers: bool):
														
 
															-    from django.conf import settings
														
 
															-    
														
 
															     MAIN_INDEX_HEADER = {
														
 
															         'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
														
 
															         'schema': 'archivebox.index.json',
														
@@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
 
															             'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
														
 
															             'source': 'https://github.com/ArchiveBox/ArchiveBox',
														
 
															             'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
														
 
															-            'dependencies': settings.BINARIES.to_dict(),
														
 
															+            'dependencies': dict(abx.archivebox.reads.get_BINARIES()),
														
 
															         },
														
 
															     }
														
 
															-    
														
 
															     if with_headers:
														
 
															         output = {
														
 
															             **MAIN_INDEX_HEADER,
														
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
															     from rich import print
														
 
															     from django.conf import settings
														
 
															-    from archivebox import CONSTANTS
														
 
															+    
														
 
															+    import abx.archivebox.reads
														
 
															     from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
														
 
															     from archivebox.config.paths import get_or_create_working_lib_dir
														
@@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
															     package_manager_names = ', '.join(
														
 
															         f'[yellow]{binprovider.name}[/yellow]'
														
 
															-        for binprovider in reversed(list(settings.BINPROVIDERS.values()))
														
 
															+        for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values()))
														
 
															         if not binproviders or (binproviders and binprovider.name in binproviders)
														
 
															     )
														
 
															     print(f'[+] Setting up package managers {package_manager_names}...')
														
 
															-    for binprovider in reversed(list(settings.BINPROVIDERS.values())):
														
 
															+    for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())):
														
 
															         if binproviders and binprovider.name not in binproviders:
														
 
															             continue
														
 
															         try:
														
@@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
															     print()
														
 
															-    for binary in reversed(list(settings.BINARIES.values())):
														
 
															+    for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())):
														
 
															         if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
														
 
															             # obviously must already be installed if we are running
														
 
															             continue
														
--- a/archivebox/misc/util.py
+++ b/archivebox/misc/util.py
@@ -5,7 +5,7 @@ import requests
 
															 import json as pyjson
														
 
															 import http.cookiejar
														
 
															-from typing import List, Optional, Any
														
 
															+from typing import List, Optional, Any, Callable
														
 
															 from pathlib import Path
														
 
															 from inspect import signature
														
 
															 from functools import wraps
														
@@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
 
															 from base32_crockford import encode as base32_encode                            # type: ignore
														
 
															 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
														
 
															 try:
														
 
															-    import chardet
														
 
															+    import chardet    # type:ignore
														
 
															     detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
														
 
															 except ImportError:
														
 
															     detect_encoding = lambda rawdata: "utf-8"
														
 
															-from archivebox.config import CONSTANTS
														
 
															-from archivebox.config.common import ARCHIVING_CONFIG
														
 
															+from archivebox.config.constants import CONSTANTS
														
 
															 from .logging import COLOR_DICT
														
@@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
 
															 @enforce_types
														
 
															-def parse_date(date: Any) -> Optional[datetime]:
														
 
															+def parse_date(date: Any) -> datetime:
														
 
															     """Parse unix timestamps, iso format, and human-readable strings"""
														
 
															     if date is None:
														
 
															-        return None
														
 
															+        return None    # type: ignore
														
 
															     if isinstance(date, datetime):
														
 
															         if date.tzinfo is None:
														
@@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
 
															 def download_url(url: str, timeout: int=None) -> str:
														
 
															     """Download the contents of a remote url and return the text"""
														
 
															+    from archivebox.config.common import ARCHIVING_CONFIG
														
 
															+
														
 
															     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
														
 
															     session = requests.Session()
														
@@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
 
															         return url.rsplit('/', 1)[-1]
														
 
															 @enforce_types
														
 
															-def get_headers(url: str, timeout: int=None) -> str:
														
 
															+def get_headers(url: str, timeout: int | None=None) -> str:
														
 
															     """Download the contents of a remote url and return the headers"""
														
 
															+    # TODO: get rid of this and use an abx pluggy hook instead
														
 
															+    
														
 
															+    from archivebox.config.common import ARCHIVING_CONFIG
														
 
															+    
														
 
															     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
														
 
															     try:
														
@@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str:
 
															 @enforce_types
														
 
															 def dedupe(options: List[str]) -> List[str]:
														
 
															     """
														
 
															-    Deduplicates the given options. Options that come later clobber earlier
														
 
															-    conflicting options.
														
 
															+    Deduplicates the given CLI args by key=value. Options that come later override earlier.
														
 
															     """
														
 
															     deduped = {}
														
 
															     for option in options:
														
 
															-        deduped[option.split('=')[0]] = option
														
 
															+        key = option.split('=')[0]
														
 
															+        deduped[key] = option
														
 
															     return list(deduped.values())
														
@@ -346,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
 
															         elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
														
 
															             return tuple(obj)
														
 
															+        
														
 
															+        elif isinstance(obj, Callable):
														
 
															+            return str(obj)
														
 
															         return pyjson.JSONEncoder.default(self, obj)