Explorar o código

swap more direct settings.CONFIG access to abx getters

Nick Sweeting hai 1 ano
pai
achega
4b6f08b0fe
Modificáronse 3 ficheiros con 26 adicións e 18 borrados
  1. 3 4
      archivebox/index/json.py
  2. 5 4
      archivebox/main.py
  3. 18 10
      archivebox/misc/util.py

+ 3 - 4
archivebox/index/json.py

@@ -8,6 +8,8 @@ from pathlib import Path
 from datetime import datetime, timezone
 from typing import List, Optional, Iterator, Any, Union
 
+import abx.archivebox.reads
+
 from archivebox.config import VERSION, DATA_DIR, CONSTANTS
 from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
 
@@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
 
 @enforce_types
 def generate_json_index_from_links(links: List[Link], with_headers: bool):
-    from django.conf import settings
-    
     MAIN_INDEX_HEADER = {
         'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
         'schema': 'archivebox.index.json',
@@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
             'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
             'source': 'https://github.com/ArchiveBox/ArchiveBox',
             'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
-            'dependencies': settings.BINARIES.to_dict(),
+            'dependencies': dict(abx.archivebox.reads.get_BINARIES()),
         },
     }
     
-    
     if with_headers:
         output = {
             **MAIN_INDEX_HEADER,

+ 5 - 4
archivebox/main.py

@@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     from rich import print
     from django.conf import settings
     
-    from archivebox import CONSTANTS
+    
+    import abx.archivebox.reads
     from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
     from archivebox.config.paths import get_or_create_working_lib_dir
 
@@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     
     package_manager_names = ', '.join(
         f'[yellow]{binprovider.name}[/yellow]'
-        for binprovider in reversed(list(settings.BINPROVIDERS.values()))
+        for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values()))
         if not binproviders or (binproviders and binprovider.name in binproviders)
     )
     print(f'[+] Setting up package managers {package_manager_names}...')
-    for binprovider in reversed(list(settings.BINPROVIDERS.values())):
+    for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())):
         if binproviders and binprovider.name not in binproviders:
             continue
         try:
@@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     
     print()
     
-    for binary in reversed(list(settings.BINARIES.values())):
+    for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())):
         if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
             # obviously must already be installed if we are running
             continue

+ 18 - 10
archivebox/misc/util.py

@@ -5,7 +5,7 @@ import requests
 import json as pyjson
 import http.cookiejar
 
-from typing import List, Optional, Any
+from typing import List, Optional, Any, Callable
 from pathlib import Path
 from inspect import signature
 from functools import wraps
@@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
 from base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 try:
-    import chardet
+    import chardet    # type:ignore
     detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
 except ImportError:
     detect_encoding = lambda rawdata: "utf-8"
 
 
-from archivebox.config import CONSTANTS
-from archivebox.config.common import ARCHIVING_CONFIG
+from archivebox.config.constants import CONSTANTS
 
 from .logging import COLOR_DICT
 
@@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
 
 
 @enforce_types
-def parse_date(date: Any) -> Optional[datetime]:
+def parse_date(date: Any) -> datetime:
     """Parse unix timestamps, iso format, and human-readable strings"""
     
     if date is None:
-        return None
+        return None    # type: ignore
 
     if isinstance(date, datetime):
         if date.tzinfo is None:
@@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
 def download_url(url: str, timeout: int=None) -> str:
     """Download the contents of a remote url and return the text"""
 
+    from archivebox.config.common import ARCHIVING_CONFIG
+
     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
     session = requests.Session()
 
@@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
         return url.rsplit('/', 1)[-1]
 
 @enforce_types
-def get_headers(url: str, timeout: int=None) -> str:
+def get_headers(url: str, timeout: int | None=None) -> str:
     """Download the contents of a remote url and return the headers"""
+    # TODO: get rid of this and use an abx pluggy hook instead
+    
+    from archivebox.config.common import ARCHIVING_CONFIG
+    
     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
 
     try:
@@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str:
 @enforce_types
 def dedupe(options: List[str]) -> List[str]:
     """
-    Deduplicates the given options. Options that come later clobber earlier
-    conflicting options.
+    Deduplicates the given CLI args by key=value. Options that come later override earlier.
     """
     deduped = {}
 
     for option in options:
-        deduped[option.split('=')[0]] = option
+        key = option.split('=')[0]
+        deduped[key] = option
 
     return list(deduped.values())
 
@@ -346,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
         
         elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
             return tuple(obj)
+        
+        elif isinstance(obj, Callable):
+            return str(obj)
 
         return pyjson.JSONEncoder.default(self, obj)