Browse Source

swap more direct settings.CONFIG access to abx getters

Nick Sweeting 1 year ago
parent
commit
4b6f08b0fe
3 changed files with 26 additions and 18 deletions
  1. 3 4
      archivebox/index/json.py
  2. 5 4
      archivebox/main.py
  3. 18 10
      archivebox/misc/util.py

+ 3 - 4
archivebox/index/json.py

@@ -8,6 +8,8 @@ from pathlib import Path
 from datetime import datetime, timezone
 from datetime import datetime, timezone
 from typing import List, Optional, Iterator, Any, Union
 from typing import List, Optional, Iterator, Any, Union
 
 
+import abx.archivebox.reads
+
 from archivebox.config import VERSION, DATA_DIR, CONSTANTS
 from archivebox.config import VERSION, DATA_DIR, CONSTANTS
 from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
 from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
 
 
@@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
 
 
 @enforce_types
 @enforce_types
 def generate_json_index_from_links(links: List[Link], with_headers: bool):
 def generate_json_index_from_links(links: List[Link], with_headers: bool):
-    from django.conf import settings
-    
     MAIN_INDEX_HEADER = {
     MAIN_INDEX_HEADER = {
         'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
         'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
         'schema': 'archivebox.index.json',
         'schema': 'archivebox.index.json',
@@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
             'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
             'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
             'source': 'https://github.com/ArchiveBox/ArchiveBox',
             'source': 'https://github.com/ArchiveBox/ArchiveBox',
             'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
             'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
-            'dependencies': settings.BINARIES.to_dict(),
+            'dependencies': dict(abx.archivebox.reads.get_BINARIES()),
         },
         },
     }
     }
     
     
-    
     if with_headers:
     if with_headers:
         output = {
         output = {
             **MAIN_INDEX_HEADER,
             **MAIN_INDEX_HEADER,

+ 5 - 4
archivebox/main.py

@@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     from rich import print
     from rich import print
     from django.conf import settings
     from django.conf import settings
     
     
-    from archivebox import CONSTANTS
+    
+    import abx.archivebox.reads
     from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
     from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
     from archivebox.config.paths import get_or_create_working_lib_dir
     from archivebox.config.paths import get_or_create_working_lib_dir
 
 
@@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     
     
     package_manager_names = ', '.join(
     package_manager_names = ', '.join(
         f'[yellow]{binprovider.name}[/yellow]'
         f'[yellow]{binprovider.name}[/yellow]'
-        for binprovider in reversed(list(settings.BINPROVIDERS.values()))
+        for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values()))
         if not binproviders or (binproviders and binprovider.name in binproviders)
         if not binproviders or (binproviders and binprovider.name in binproviders)
     )
     )
     print(f'[+] Setting up package managers {package_manager_names}...')
     print(f'[+] Setting up package managers {package_manager_names}...')
-    for binprovider in reversed(list(settings.BINPROVIDERS.values())):
+    for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())):
         if binproviders and binprovider.name not in binproviders:
         if binproviders and binprovider.name not in binproviders:
             continue
             continue
         try:
         try:
@@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     
     
     print()
     print()
     
     
-    for binary in reversed(list(settings.BINARIES.values())):
+    for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())):
         if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
         if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
             # obviously must already be installed if we are running
             # obviously must already be installed if we are running
             continue
             continue

+ 18 - 10
archivebox/misc/util.py

@@ -5,7 +5,7 @@ import requests
 import json as pyjson
 import json as pyjson
 import http.cookiejar
 import http.cookiejar
 
 
-from typing import List, Optional, Any
+from typing import List, Optional, Any, Callable
 from pathlib import Path
 from pathlib import Path
 from inspect import signature
 from inspect import signature
 from functools import wraps
 from functools import wraps
@@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
 from base32_crockford import encode as base32_encode                            # type: ignore
 from base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 try:
 try:
-    import chardet
+    import chardet    # type:ignore
     detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
     detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
 except ImportError:
 except ImportError:
     detect_encoding = lambda rawdata: "utf-8"
     detect_encoding = lambda rawdata: "utf-8"
 
 
 
 
-from archivebox.config import CONSTANTS
-from archivebox.config.common import ARCHIVING_CONFIG
+from archivebox.config.constants import CONSTANTS
 
 
 from .logging import COLOR_DICT
 from .logging import COLOR_DICT
 
 
@@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
 
 
 
 
 @enforce_types
 @enforce_types
-def parse_date(date: Any) -> Optional[datetime]:
+def parse_date(date: Any) -> datetime:
     """Parse unix timestamps, iso format, and human-readable strings"""
     """Parse unix timestamps, iso format, and human-readable strings"""
     
     
     if date is None:
     if date is None:
-        return None
+        return None    # type: ignore
 
 
     if isinstance(date, datetime):
     if isinstance(date, datetime):
         if date.tzinfo is None:
         if date.tzinfo is None:
@@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
 def download_url(url: str, timeout: int=None) -> str:
 def download_url(url: str, timeout: int=None) -> str:
     """Download the contents of a remote url and return the text"""
     """Download the contents of a remote url and return the text"""
 
 
+    from archivebox.config.common import ARCHIVING_CONFIG
+
     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
     session = requests.Session()
     session = requests.Session()
 
 
@@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
         return url.rsplit('/', 1)[-1]
         return url.rsplit('/', 1)[-1]
 
 
 @enforce_types
 @enforce_types
-def get_headers(url: str, timeout: int=None) -> str:
+def get_headers(url: str, timeout: int | None=None) -> str:
     """Download the contents of a remote url and return the headers"""
     """Download the contents of a remote url and return the headers"""
+    # TODO: get rid of this and use an abx pluggy hook instead
+    
+    from archivebox.config.common import ARCHIVING_CONFIG
+    
     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
 
 
     try:
     try:
@@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str:
 @enforce_types
 @enforce_types
 def dedupe(options: List[str]) -> List[str]:
 def dedupe(options: List[str]) -> List[str]:
     """
     """
-    Deduplicates the given options. Options that come later clobber earlier
-    conflicting options.
+    Deduplicates the given CLI args by key=value. Options that come later override earlier.
     """
     """
     deduped = {}
     deduped = {}
 
 
     for option in options:
     for option in options:
-        deduped[option.split('=')[0]] = option
+        key = option.split('=')[0]
+        deduped[key] = option
 
 
     return list(deduped.values())
     return list(deduped.values())
 
 
@@ -346,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
         
         
         elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
         elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
             return tuple(obj)
             return tuple(obj)
+        
+        elif isinstance(obj, Callable):
+            return str(obj)
 
 
         return pyjson.JSONEncoder.default(self, obj)
         return pyjson.JSONEncoder.default(self, obj)