ソースを参照

speed up startup time, add rich startup progressbar, split logging and checks into misc, fix search index import backend bug

Nick Sweeting 1 年間 前
コミット
64c7100cf9

+ 4 - 0
archivebox/__init__.py

@@ -1,5 +1,7 @@
 __package__ = 'archivebox'
 __package__ = 'archivebox'
 
 
+# print('INSTALLING MONKEY PATCHES')
+
 from .monkey_patches import *
 from .monkey_patches import *
 
 
 import os
 import os
@@ -28,3 +30,5 @@ def _detect_installed_version():
 
 
 
 
 __version__ = _detect_installed_version()
 __version__ = _detect_installed_version()
+
+# print('DONE INSTALLING MONKEY PATCHES')

+ 106 - 56
archivebox/cli/__init__.py

@@ -1,16 +1,20 @@
 __package__ = 'archivebox.cli'
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox'
 __command__ = 'archivebox'
 
 
-import os
 import sys
 import sys
 import argparse
 import argparse
 import threading
 import threading
+import archivebox
+
 from time import sleep
 from time import sleep
+from collections.abc import Mapping
 
 
-from typing import Optional, Dict, List, IO, Union, Iterable
+from typing import Optional, List, IO, Union, Iterable
 from pathlib import Path
 from pathlib import Path
 
 
-from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
+
+from ..misc.checks import check_data_folder, check_migrations
+from ..misc.logging import stderr
 
 
 from importlib import import_module
 from importlib import import_module
 
 
@@ -18,13 +22,46 @@ BUILTIN_LIST = list
 
 
 CLI_DIR = Path(__file__).resolve().parent
 CLI_DIR = Path(__file__).resolve().parent
 
 
-# these common commands will appear sorted before any others for ease-of-use
-meta_cmds = ('help', 'version')                               # dont require valid data folder at all
-main_cmds = ('init', 'config', 'setup')                       # dont require existing db present
-archive_cmds = ('add', 'remove', 'update', 'list', 'status')  # require existing db present
-fake_db = ("oneshot",)                                        # use fake in-memory db
 
 
-display_first = (*meta_cmds, *main_cmds, *archive_cmds)
+# def list_subcommands() -> Dict[str, str]:
+#     """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
+#     COMMANDS = []
+#     for filename in os.listdir(CLI_DIR):
+#         if is_cli_module(filename):
+#             subcommand = filename.replace('archivebox_', '').replace('.py', '')
+#             module = import_module('.archivebox_{}'.format(subcommand), __package__)
+#             assert is_valid_cli_module(module, subcommand)
+#             COMMANDS.append((subcommand, module.main.__doc__))
+#             globals()[subcommand] = module.main
+#     display_order = lambda cmd: (
+#         display_first.index(cmd[0])
+#         if cmd[0] in display_first else
+#         100 + len(cmd[0])
+#     )
+#     return dict(sorted(COMMANDS, key=display_order))
+
+# just define it statically, it's much faster:
+SUBCOMMAND_MODULES = {
+    'help': 'archivebox_help',
+    'version': 'archivebox_version' ,
+    
+    'init': 'archivebox_init',
+    'config': 'archivebox_config',
+    'setup': 'archivebox_setup',
+    
+    'add': 'archivebox_add',
+    'remove': 'archivebox_remove',
+    'update': 'archivebox_update',
+    'list': 'archivebox_list',
+    'status': 'archivebox_status',
+    
+    'schedule': 'archivebox_schedule',
+    'server': 'archivebox_server',
+    'shell': 'archivebox_shell',
+    'manage': 'archivebox_manage',
+
+    'oneshot': 'archivebox_oneshot',
+}
 
 
 # every imported command module must have these properties in order to be valid
 # every imported command module must have these properties in order to be valid
 required_attrs = ('__package__', '__command__', 'main')
 required_attrs = ('__package__', '__command__', 'main')
@@ -36,6 +73,38 @@ is_valid_cli_module = lambda module, subcommand: (
     and module.__command__.split(' ')[-1] == subcommand
     and module.__command__.split(' ')[-1] == subcommand
 )
 )
 
 
+class LazySubcommands(Mapping):
+    def keys(self):
+        return SUBCOMMAND_MODULES.keys()
+    
+    def values(self):
+        return [self[key] for key in self.keys()]
+    
+    def items(self):
+        return [(key, self[key]) for key in self.keys()]
+    
+    def __getitem__(self, key):
+        module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
+        assert is_valid_cli_module(module, key)
+        return module.main
+    
+    def __iter__(self):
+        return iter(SUBCOMMAND_MODULES.keys())
+    
+    def __len__(self):
+        return len(SUBCOMMAND_MODULES)
+
+CLI_SUBCOMMANDS = LazySubcommands()
+
+
+# these common commands will appear sorted before any others for ease-of-use
+meta_cmds = ('help', 'version')                               # dont require valid data folder at all
+main_cmds = ('init', 'config', 'setup')                       # dont require existing db present
+archive_cmds = ('add', 'remove', 'update', 'list', 'status')  # require existing db present
+fake_db = ("oneshot",)                                        # use fake in-memory db
+
+display_first = (*meta_cmds, *main_cmds, *archive_cmds)
+
 
 
 IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler')  # threads we dont have to wait for before exiting
 IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler')  # threads we dont have to wait for before exiting
 
 
@@ -71,29 +140,9 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
     raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
     raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
 
 
 
 
-def list_subcommands() -> Dict[str, str]:
-    """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
-
-    COMMANDS = []
-    for filename in os.listdir(CLI_DIR):
-        if is_cli_module(filename):
-            subcommand = filename.replace('archivebox_', '').replace('.py', '')
-            module = import_module('.archivebox_{}'.format(subcommand), __package__)
-            assert is_valid_cli_module(module, subcommand)
-            COMMANDS.append((subcommand, module.main.__doc__))
-            globals()[subcommand] = module.main
-
-    display_order = lambda cmd: (
-        display_first.index(cmd[0])
-        if cmd[0] in display_first else
-        100 + len(cmd[0])
-    )
-
-    return dict(sorted(COMMANDS, key=display_order))
-
 
 
 def run_subcommand(subcommand: str,
 def run_subcommand(subcommand: str,
-                   subcommand_args: List[str]=None,
+                   subcommand_args: List[str] | None = None,
                    stdin: Optional[IO]=None,
                    stdin: Optional[IO]=None,
                    pwd: Union[Path, str, None]=None) -> None:
                    pwd: Union[Path, str, None]=None) -> None:
     """Run a given ArchiveBox subcommand with the given list of args"""
     """Run a given ArchiveBox subcommand with the given list of args"""
@@ -101,18 +150,18 @@ def run_subcommand(subcommand: str,
     subcommand_args = subcommand_args or []
     subcommand_args = subcommand_args or []
 
 
     if subcommand not in meta_cmds:
     if subcommand not in meta_cmds:
-        from ..config import setup_django
+        from ..config import setup_django, CONFIG
 
 
         cmd_requires_db = subcommand in archive_cmds
         cmd_requires_db = subcommand in archive_cmds
         init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
         init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
 
 
         if cmd_requires_db:
         if cmd_requires_db:
-            check_data_folder(pwd)
+            check_data_folder(CONFIG)
 
 
         setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
         setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
 
 
         if cmd_requires_db:
         if cmd_requires_db:
-            check_migrations()
+            check_migrations(CONFIG)
 
 
     module = import_module('.archivebox_{}'.format(subcommand), __package__)
     module = import_module('.archivebox_{}'.format(subcommand), __package__)
     module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
     module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
@@ -121,17 +170,28 @@ def run_subcommand(subcommand: str,
     wait_for_bg_threads_to_exit(timeout=60)
     wait_for_bg_threads_to_exit(timeout=60)
 
 
 
 
-SUBCOMMANDS = list_subcommands()
+
+
 
 
 class NotProvided:
 class NotProvided:
-    pass
+    def __len__(self):
+        return 0
+    def __bool__(self):
+        return False
+    def __repr__(self):
+        return '<not provided>'
+
+Omitted = Union[None, NotProvided]
 
 
+OMITTED = NotProvided()
 
 
-def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
-    args = sys.argv[1:] if args is NotProvided else args
-    stdin = sys.stdin if stdin is NotProvided else stdin
 
 
-    subcommands = list_subcommands()
+def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
+    # print('STARTING CLI MAIN ENTRYPOINT')
+    
+    args = sys.argv[1:] if args is OMITTED else args
+    stdin = sys.stdin if stdin is OMITTED else stdin
+
     parser = argparse.ArgumentParser(
     parser = argparse.ArgumentParser(
         prog=__command__,
         prog=__command__,
         description='ArchiveBox: The self-hosted internet archive',
         description='ArchiveBox: The self-hosted internet archive',
@@ -141,19 +201,19 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
     group.add_argument(
     group.add_argument(
         '--help', '-h',
         '--help', '-h',
         action='store_true',
         action='store_true',
-        help=subcommands['help'],
+        help=CLI_SUBCOMMANDS['help'].__doc__,
     )
     )
     group.add_argument(
     group.add_argument(
         '--version',
         '--version',
         action='store_true',
         action='store_true',
-        help=subcommands['version'],
+        help=CLI_SUBCOMMANDS['version'].__doc__,
     )
     )
     group.add_argument(
     group.add_argument(
         "subcommand",
         "subcommand",
         type=str,
         type=str,
         help= "The name of the subcommand to run",
         help= "The name of the subcommand to run",
         nargs='?',
         nargs='?',
-        choices=subcommands.keys(),
+        choices=CLI_SUBCOMMANDS.keys(),
         default=None,
         default=None,
     )
     )
     parser.add_argument(
     parser.add_argument(
@@ -174,23 +234,13 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
         log_cli_command(
         log_cli_command(
             subcommand=command.subcommand,
             subcommand=command.subcommand,
             subcommand_args=command.subcommand_args,
             subcommand_args=command.subcommand_args,
-            stdin=stdin,
-            pwd=pwd or OUTPUT_DIR
+            stdin=stdin or None,
+            pwd=pwd or archivebox.DATA_DIR,
         )
         )
 
 
     run_subcommand(
     run_subcommand(
         subcommand=command.subcommand,
         subcommand=command.subcommand,
         subcommand_args=command.subcommand_args,
         subcommand_args=command.subcommand_args,
-        stdin=stdin,
-        pwd=pwd or OUTPUT_DIR,
+        stdin=stdin or None,
+        pwd=pwd or archivebox.DATA_DIR,
     )
     )
-
-
-__all__ = (
-    'SUBCOMMANDS',
-    'list_subcommands',
-    'run_subcommand',
-    *SUBCOMMANDS.keys(),
-)
-
-

+ 125 - 288
archivebox/config.py

@@ -28,21 +28,19 @@ import sys
 import json
 import json
 import inspect
 import inspect
 import getpass
 import getpass
-import platform
 import shutil
 import shutil
 import requests
 import requests
 
 
 from hashlib import md5
 from hashlib import md5
 from pathlib import Path
 from pathlib import Path
-from benedict import benedict
 from datetime import datetime, timezone
 from datetime import datetime, timezone
-from typing import Optional, Type, Tuple, Dict, Union, List
+from typing import Optional, Type, Tuple, Dict
 from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
 from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
 from configparser import ConfigParser
 from configparser import ConfigParser
-from collections import defaultdict
 import importlib.metadata
 import importlib.metadata
 
 
 from pydantic_pkgr import SemVer
 from pydantic_pkgr import SemVer
+from rich.progress import Progress
 
 
 import django
 import django
 from django.db.backends.sqlite3.base import Database as sqlite3
 from django.db.backends.sqlite3.base import Database as sqlite3
@@ -56,6 +54,17 @@ from .config_stubs import (
     ConfigDefaultDict,
     ConfigDefaultDict,
 )
 )
 
 
+from .misc.logging import (
+    CONSOLE,
+    SHOW_PROGRESS,
+    DEFAULT_CLI_COLORS,
+    ANSI,
+    COLOR_DICT,
+    stderr,
+    hint,
+)
+from .misc.checks import check_system_config
+
 # print('STARTING CONFIG LOADING')
 # print('STARTING CONFIG LOADING')
 
 
 # load fallback libraries from vendor dir
 # load fallback libraries from vendor dir
@@ -70,7 +79,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
     'SHELL_CONFIG': {
     'SHELL_CONFIG': {
         'IS_TTY':                   {'type': bool,  'default': lambda _: sys.stdout.isatty()},
         'IS_TTY':                   {'type': bool,  'default': lambda _: sys.stdout.isatty()},
         'USE_COLOR':                {'type': bool,  'default': lambda c: c['IS_TTY']},
         'USE_COLOR':                {'type': bool,  'default': lambda c: c['IS_TTY']},
-        'SHOW_PROGRESS':            {'type': bool,  'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')},  # progress bars are buggy on mac, disable for now
+        'SHOW_PROGRESS':            {'type': bool,  'default': lambda c: c['IS_TTY']},  # progress bars are buggy on mac, disable for now
         'IN_DOCKER':                {'type': bool,  'default': False},
         'IN_DOCKER':                {'type': bool,  'default': False},
         'IN_QEMU':                  {'type': bool,  'default': False},
         'IN_QEMU':                  {'type': bool,  'default': False},
         'PUID':                     {'type': int,   'default': os.getuid()},
         'PUID':                     {'type': int,   'default': os.getuid()},
@@ -306,32 +315,7 @@ ROBOTS_TXT_FILENAME = 'robots.txt'
 FAVICON_FILENAME = 'favicon.ico'
 FAVICON_FILENAME = 'favicon.ico'
 CONFIG_FILENAME = 'ArchiveBox.conf'
 CONFIG_FILENAME = 'ArchiveBox.conf'
 
 
-DEFAULT_CLI_COLORS = benedict(
-    {
-        "reset": "\033[00;00m",
-        "lightblue": "\033[01;30m",
-        "lightyellow": "\033[01;33m",
-        "lightred": "\033[01;35m",
-        "red": "\033[01;31m",
-        "green": "\033[01;32m",
-        "blue": "\033[01;34m",
-        "white": "\033[01;37m",
-        "black": "\033[01;30m",
-    }
-)
-ANSI = AttrDict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
-
-COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
-    '00': [(0, 0, 0), (0, 0, 0)],
-    '30': [(0, 0, 0), (0, 0, 0)],
-    '31': [(255, 0, 0), (128, 0, 0)],
-    '32': [(0, 200, 0), (0, 128, 0)],
-    '33': [(255, 255, 0), (128, 128, 0)],
-    '34': [(0, 0, 255), (0, 0, 128)],
-    '35': [(255, 0, 255), (128, 0, 128)],
-    '36': [(0, 255, 255), (0, 128, 128)],
-    '37': [(255, 255, 255), (255, 255, 255)],
-})
+
 
 
 STATICFILE_EXTENSIONS = {
 STATICFILE_EXTENSIONS = {
     # 99.999% of the time, URLs ending in these extensions are static files
     # 99.999% of the time, URLs ending in these extensions are static files
@@ -880,37 +864,6 @@ def parse_version_string(version: str) -> Tuple[int, int, int]:
     return tuple(int(part) for part in base.split('.'))[:3]
     return tuple(int(part) for part in base.split('.'))[:3]
 
 
 
 
-# Logging Helpers
-def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
-    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
-
-    if color:
-        strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
-    else:
-        strs = [' '.join(str(a) for a in args), '\n']
-
-    sys.stdout.write(prefix + ''.join(strs))
-
-def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
-    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
-
-    if color:
-        strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
-    else:
-        strs = [' '.join(str(a) for a in args), '\n']
-
-    sys.stderr.write(prefix + ''.join(strs))
-
-def hint(text: Union[Tuple[str, ...], List[str], str], prefix='    ', config: Optional[ConfigDict]=None) -> None:
-    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
-
-    if isinstance(text, str):
-        stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
-    else:
-        stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
-        for line in text[1:]:
-            stderr('{}      {}'.format(prefix, line))
-
 
 
 # Dependency Metadata Helpers
 # Dependency Metadata Helpers
 def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
 def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
@@ -919,6 +872,10 @@ def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3)
     abspath = bin_path(binary)
     abspath = bin_path(binary)
     if not binary or not abspath:
     if not binary or not abspath:
         return None
         return None
+    
+    return '999.999.999'
+
+    # Now handled by new BinProvider plugin system, no longer needed:
 
 
     try:
     try:
         bin_env = os.environ | {'LANG': 'C'}
         bin_env = os.environ | {'LANG': 'C'}
@@ -960,6 +917,9 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
     return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
     return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
 
 
 def bin_hash(binary: Optional[str]) -> Optional[str]:
 def bin_hash(binary: Optional[str]) -> Optional[str]:
+    return 'UNUSED'
+    # DEPRECATED: now handled by new BinProvider plugin system, no longer needed:
+
     if binary is None:
     if binary is None:
         return None
         return None
     abs_path = bin_path(binary)
     abs_path = bin_path(binary)
@@ -1329,246 +1289,123 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
 
 
 ########################### Config Validity Checkers ###########################
 ########################### Config Validity Checkers ###########################
 
 
+INITIAL_STARTUP_PROGRESS = None
+INITIAL_STARTUP_PROGRESS_TASK = 0
 
 
-def check_system_config(config: ConfigDict=CONFIG) -> None:
-    ### Check system environment
-    if config['USER'] == 'root' or str(config['PUID']) == "0":
-        stderr('[!] ArchiveBox should never be run as root!', color='red')
-        stderr('    For more information, see the security overview documentation:')
-        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
-        
-        if config['IN_DOCKER']:
-            attempted_command = ' '.join(sys.argv[:3])
-            stderr('')
-            stderr('    {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
-            stderr(f'        docker compose run archivebox {attempted_command}')
-            stderr(f'        docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
-            stderr('        or:')
-            stderr(f'        docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
-            stderr(f'        docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
-        
-        raise SystemExit(2)
-
-    ### Check Python environment
-    if sys.version_info[:3] < (3, 7, 0):
-        stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
-        stderr('    See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
-        raise SystemExit(2)
-
-    if int(CONFIG['DJANGO_VERSION'].split('.')[0]) < 3:
-        stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
-        stderr('    Upgrade django using pip or your system package manager: pip3 install --upgrade django')
-        raise SystemExit(2)
-
-    if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
-        stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
-        stderr('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
-        stderr('    Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
-        stderr('')
-        stderr('    Confirm that it\'s fixed by opening a new shell and running:')
-        stderr('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8')
-        raise SystemExit(2)
-
-    # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
-    # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
-    if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
-        if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
-            stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
-            stderr(f'    {config["CHROME_USER_DATA_DIR"]}')
-            stderr('    Make sure you set it to a Chrome user data directory containing a Default profile folder.')
-            stderr('    For more info see:')
-            stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
-            if '/Default' in str(config['CHROME_USER_DATA_DIR']):
-                stderr()
-                stderr('    Try removing /Default from the end e.g.:')
-                stderr('        CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
-            
-            # hard error is too annoying here, instead just set it to nothing
-            # raise SystemExit(2)
-            config['CHROME_USER_DATA_DIR'] = None
-    else:
-        config['CHROME_USER_DATA_DIR'] = None
-
-
-def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
-    invalid_dependencies = [
-        (name, info) for name, info in config['DEPENDENCIES'].items()
-        if info['enabled'] and not info['is_valid']
-    ]
-    if invalid_dependencies and show_help:
-        stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
-        for dependency, info in invalid_dependencies:
-            stderr(
-                '    ! {}: {} ({})'.format(
-                    dependency,
-                    info['path'] or 'unable to find binary',
-                    info['version'] or 'unable to detect version',
-                )
-            )
-            if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
-                hint(('To install all packages automatically run: archivebox setup',
-                    f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
-                    ''), prefix='      ')
-        stderr('')
-
-    if config['TIMEOUT'] < 5:
-        stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
-        stderr('    You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
-        stderr('    (Setting it to somewhere between 30 and 3000 seconds is recommended)')
-        stderr()
-        stderr('    If you want to make ArchiveBox run faster, disable specific archive methods instead:')
-        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
-        stderr()
-
-    elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
-        stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
-        stderr('    Chrome will fail to archive all sites if set to less than ~15 seconds.')
-        stderr('    (Setting it to somewhere between 30 and 300 seconds is recommended)')
-        stderr()
-        stderr('    If you want to make ArchiveBox run faster, disable specific archive methods instead:')
-        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
-        stderr()
-
-    if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
-        stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
-        stderr('    youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
-        stderr('    (Setting it somewhere over 60 seconds is recommended)')
-        stderr()
-        stderr('    If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
-        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
-        stderr()
+def bump_startup_progress_bar():
+    global INITIAL_STARTUP_PROGRESS
+    global INITIAL_STARTUP_PROGRESS_TASK
+    if INITIAL_STARTUP_PROGRESS:
+        INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1)   # type: ignore
 
 
-        
-def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
-    output_dir = out_dir or config['OUTPUT_DIR']
-    assert isinstance(output_dir, (str, Path))
-
-    archive_dir_exists = (Path(output_dir) / ARCHIVE_DIR_NAME).exists()
-    if not archive_dir_exists:
-        stderr('[X] No archivebox index found in the current directory.', color='red')
-        stderr(f'    {output_dir}', color='lightyellow')
-        stderr()
-        stderr('    {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
-        stderr('        cd path/to/your/archive/folder')
-        stderr('        archivebox [command]')
-        stderr()
-        stderr('    {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
-        stderr('        archivebox init')
-        raise SystemExit(2)
-
-
-def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
-    output_dir = out_dir or config['OUTPUT_DIR']
-    from .index.sql import list_migrations
-
-    pending_migrations = [name for status, name in list_migrations() if not status]
-
-    if pending_migrations:
-        stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
-        stderr(f'    {output_dir}')
-        stderr()
-        stderr(f'    To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
-        stderr('        archivebox init')
-        raise SystemExit(3)
-
-    (Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
-    (Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
-    (Path(output_dir) / CACHE_DIR_NAME).mkdir(exist_ok=True)
-    (Path(output_dir) / LIB_DIR_NAME / 'bin').mkdir(exist_ok=True, parents=True)
-    (Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True, parents=True)
+def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
+    global INITIAL_STARTUP_PROGRESS
+    global INITIAL_STARTUP_PROGRESS_TASK
+    
+    with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
+        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
+        check_system_config(config)
 
 
+        output_dir = out_dir or Path(config['OUTPUT_DIR'])
 
 
+        assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
 
 
-def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
-    check_system_config()
+        bump_startup_progress_bar()
+        try:
+            from django.core.management import call_command
 
 
-    output_dir = out_dir or Path(config['OUTPUT_DIR'])
+            sys.path.append(str(config['PACKAGE_DIR']))
+            os.environ.setdefault('OUTPUT_DIR', str(output_dir))
+            assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
+            os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
 
 
-    assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
+            # Check to make sure JSON extension is available in our Sqlite3 instance
+            try:
+                cursor = sqlite3.connect(':memory:').cursor()
+                cursor.execute('SELECT JSON(\'{"a": "b"}\')')
+            except sqlite3.OperationalError as exc:
+                stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
+                hint([
+                    'Upgrade your Python version or install the extension manually:',
+                    'https://code.djangoproject.com/wiki/JSON1Extension'
+                ])
+                
+            bump_startup_progress_bar()
+
+            if in_memory_db:
+                # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
+                # in those cases we create a temporary in-memory db and run the migrations
+                # immediately to get a usable in-memory-database at startup
+                os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
+                django.setup()
+                
+                bump_startup_progress_bar()
+                call_command("migrate", interactive=False, verbosity=0)
+            else:
+                # Otherwise use default sqlite3 file-based database and initialize django
+                # without running migrations automatically (user runs them manually by calling init)
+                django.setup()
+            
+            bump_startup_progress_bar()
 
 
-    try:
-        from django.core.management import call_command
+            from django.conf import settings
 
 
-        sys.path.append(str(config['PACKAGE_DIR']))
-        os.environ.setdefault('OUTPUT_DIR', str(output_dir))
-        assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
-        os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
+            # log startup message to the error log
+            with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
+                command = ' '.join(sys.argv)
+                ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
+                f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
 
 
-        # Check to make sure JSON extension is available in our Sqlite3 instance
-        try:
-            cursor = sqlite3.connect(':memory:').cursor()
-            cursor.execute('SELECT JSON(\'{"a": "b"}\')')
-        except sqlite3.OperationalError as exc:
-            stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
-            hint([
-                'Upgrade your Python version or install the extension manually:',
-                'https://code.djangoproject.com/wiki/JSON1Extension'
-            ])
-
-        if in_memory_db:
-            # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
-            # in those cases we create a temporary in-memory db and run the migrations
-            # immediately to get a usable in-memory-database at startup
-            os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
-            django.setup()
-            call_command("migrate", interactive=False, verbosity=0)
-        else:
-            # Otherwise use default sqlite3 file-based database and initialize django
-            # without running migrations automatically (user runs them manually by calling init)
-            django.setup()
-
-        from django.conf import settings
-
-        # log startup message to the error log
-        with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
-            command = ' '.join(sys.argv)
-            ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
-            f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
-
-        if check_db:
-            # Enable WAL mode in sqlite3
-            from django.db import connection
-            with connection.cursor() as cursor:
-
-                # Set Journal mode to WAL to allow for multiple writers
-                current_mode = cursor.execute("PRAGMA journal_mode")
-                if current_mode != 'wal':
-                    cursor.execute("PRAGMA journal_mode=wal;")
-
-                # Set max blocking delay for concurrent writes and write sync mode
-                # https://litestream.io/tips/#busy-timeout
-                cursor.execute("PRAGMA busy_timeout = 5000;")
-                cursor.execute("PRAGMA synchronous = NORMAL;")
-
-            # Create cache table in DB if needed
-            try:
-                from django.core.cache import cache
-                cache.get('test', None)
-            except django.db.utils.OperationalError:
-                call_command("createcachetable", verbosity=0)
+            if check_db:
+                # Enable WAL mode in sqlite3
+                from django.db import connection
+                with connection.cursor() as cursor:
+
+                    # Set Journal mode to WAL to allow for multiple writers
+                    current_mode = cursor.execute("PRAGMA journal_mode")
+                    if current_mode != 'wal':
+                        cursor.execute("PRAGMA journal_mode=wal;")
+
+                    # Set max blocking delay for concurrent writes and write sync mode
+                    # https://litestream.io/tips/#busy-timeout
+                    cursor.execute("PRAGMA busy_timeout = 5000;")
+                    cursor.execute("PRAGMA synchronous = NORMAL;")
+
+                # Create cache table in DB if needed
+                try:
+                    from django.core.cache import cache
+                    cache.get('test', None)
+                except django.db.utils.OperationalError:
+                    call_command("createcachetable", verbosity=0)
 
 
-            # if archivebox gets imported multiple times, we have to close
-            # the sqlite3 whenever we init from scratch to avoid multiple threads
-            # sharing the same connection by accident
-            from django.db import connections
-            for conn in connections.all():
-                conn.close_if_unusable_or_obsolete()
+                bump_startup_progress_bar()
 
 
-            sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
-            assert sql_index_path.exists(), (
-                f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
+                # if archivebox gets imported multiple times, we have to close
+                # the sqlite3 whenever we init from scratch to avoid multiple threads
+                # sharing the same connection by accident
+                from django.db import connections
+                for conn in connections.all():
+                    conn.close_if_unusable_or_obsolete()
 
 
+                sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
+                assert sql_index_path.exists(), (
+                    f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
 
 
-            # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
-            if settings.DEBUG_LOGFIRE:
-                from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
-                SQLite3Instrumentor().instrument()
+                bump_startup_progress_bar()
 
 
-                import logfire
+                # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
+                if settings.DEBUG_LOGFIRE:
+                    from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
+                    SQLite3Instrumentor().instrument()
 
 
-                logfire.configure()
-                logfire.instrument_django(is_sql_commentor_enabled=True)
-                logfire.info(f'Started ArchiveBox v{CONFIG.VERSION}', argv=sys.argv)
+                    import logfire
+
+                    logfire.configure()
+                    logfire.instrument_django(is_sql_commentor_enabled=True)
+                    logfire.info(f'Started ArchiveBox v{CONFIG.VERSION}', argv=sys.argv)
+
+        except KeyboardInterrupt:
+            raise SystemExit(2)
 
 
-    except KeyboardInterrupt:
-        raise SystemExit(2)
+    INITIAL_STARTUP_PROGRESS = None
+    INITIAL_STARTUP_PROGRESS_TASK = None

+ 2 - 0
archivebox/core/settings.py

@@ -170,6 +170,7 @@ STATICFILES_DIRS = [
     *[
     *[
         str(plugin_dir / 'static')
         str(plugin_dir / 'static')
         for plugin_dir in PLUGIN_DIRS.values()
         for plugin_dir in PLUGIN_DIRS.values()
+        if (plugin_dir / 'static').is_dir()
     ],
     ],
     str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
     str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
 ]
 ]
@@ -179,6 +180,7 @@ TEMPLATE_DIRS = [
     *[
     *[
         str(plugin_dir / 'templates')
         str(plugin_dir / 'templates')
         for plugin_dir in PLUGIN_DIRS.values()
         for plugin_dir in PLUGIN_DIRS.values()
+        if (plugin_dir / 'templates').is_dir()
     ],
     ],
     str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
     str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
     str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
     str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),

+ 5 - 0
archivebox/core/settings_logging.py

@@ -141,18 +141,22 @@ SETTINGS_LOGGING = {
         "api": {
         "api": {
             "handlers": ["default", "logfile"],
             "handlers": ["default", "logfile"],
             "level": "DEBUG",
             "level": "DEBUG",
+            "propagate": False,
         },
         },
         "checks": {
         "checks": {
             "handlers": ["default", "logfile"],
             "handlers": ["default", "logfile"],
             "level": "DEBUG",
             "level": "DEBUG",
+            "propagate": False,
         },
         },
         "core": {
         "core": {
             "handlers": ["default", "logfile"],
             "handlers": ["default", "logfile"],
             "level": "DEBUG",
             "level": "DEBUG",
+            "propagate": False,
         },
         },
         "plugins_extractor": {
         "plugins_extractor": {
             "handlers": ["default", "logfile"],
             "handlers": ["default", "logfile"],
             "level": "DEBUG",
             "level": "DEBUG",
+            "propagate": False,
         },
         },
         "httpx": {
         "httpx": {
             "handlers": ["outbound_webhooks"],
             "handlers": ["outbound_webhooks"],
@@ -164,6 +168,7 @@ SETTINGS_LOGGING = {
             "handlers": ["default", "logfile"],
             "handlers": ["default", "logfile"],
             "level": "INFO",
             "level": "INFO",
             "filters": ["noisyrequestsfilter"],
             "filters": ["noisyrequestsfilter"],
+            "propagate": False,
         },
         },
         "django.utils.autoreload": {
         "django.utils.autoreload": {
             "propagate": False,
             "propagate": False,

+ 3 - 3
archivebox/logging_util.py

@@ -230,7 +230,7 @@ def progress_bar(seconds: int, prefix: str='') -> None:
         print()
         print()
 
 
 
 
-def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str):
+def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str):
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format(
     stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format(
         now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
         now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
@@ -526,11 +526,11 @@ def log_removal_finished(all_links: int, to_remove: int):
 
 
 
 
 def log_shell_welcome_msg():
 def log_shell_welcome_msg():
-    from .cli import list_subcommands
+    from .cli import CLI_SUBCOMMANDS
 
 
     print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
     print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
     print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
     print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
-    print('{green}from cli import *\n    {}{reset}'.format("\n    ".join(list_subcommands().keys()), **ANSI))
+    print('{green}from cli import *\n    {}{reset}'.format("\n    ".join(CLI_SUBCOMMANDS.keys()), **ANSI))
     print()
     print()
     print('[i] Welcome to the ArchiveBox Shell!')
     print('[i] Welcome to the ArchiveBox Shell!')
     print('    https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
     print('    https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')

+ 19 - 21
archivebox/main.py

@@ -16,7 +16,7 @@ from django.db.models import QuerySet
 from django.utils import timezone
 from django.utils import timezone
 
 
 from .cli import (
 from .cli import (
-    list_subcommands,
+    CLI_SUBCOMMANDS,
     run_subcommand,
     run_subcommand,
     display_first,
     display_first,
     meta_cmds,
     meta_cmds,
@@ -66,9 +66,9 @@ from .index.html import (
 )
 )
 from .index.csv import links_to_csv
 from .index.csv import links_to_csv
 from .extractors import archive_links, archive_link, ignore_methods
 from .extractors import archive_links, archive_link, ignore_methods
+from .misc.logging import stderr, hint
+from .misc.checks import check_data_folder, check_dependencies
 from .config import (
 from .config import (
-    stderr,
-    hint,
     ConfigDict,
     ConfigDict,
     ANSI,
     ANSI,
     IS_TTY,
     IS_TTY,
@@ -98,8 +98,6 @@ from .config import (
     SEARCH_BACKEND_ENGINE,
     SEARCH_BACKEND_ENGINE,
     LDAP,
     LDAP,
     get_version,
     get_version,
-    check_dependencies,
-    check_data_folder,
     write_config_file,
     write_config_file,
     VERSION,
     VERSION,
     VERSIONS_AVAILABLE,
     VERSIONS_AVAILABLE,
@@ -146,7 +144,7 @@ from .logging_util import (
 def help(out_dir: Path=OUTPUT_DIR) -> None:
 def help(out_dir: Path=OUTPUT_DIR) -> None:
     """Print the ArchiveBox help message and usage"""
     """Print the ArchiveBox help message and usage"""
 
 
-    all_subcommands = list_subcommands()
+    all_subcommands = CLI_SUBCOMMANDS
     COMMANDS_HELP_TEXT = '\n    '.join(
     COMMANDS_HELP_TEXT = '\n    '.join(
         f'{cmd.ljust(20)} {summary}'
         f'{cmd.ljust(20)} {summary}'
         for cmd, summary in all_subcommands.items()
         for cmd, summary in all_subcommands.items()
@@ -281,7 +279,7 @@ def version(quiet: bool=False,
             print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
             print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
 
 
         print()
         print()
-        check_dependencies()
+        check_dependencies(CONFIG)
 
 
 
 
 @enforce_types
 @enforce_types
@@ -469,7 +467,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
 def status(out_dir: Path=OUTPUT_DIR) -> None:
 def status(out_dir: Path=OUTPUT_DIR) -> None:
     """Print out some info and statistics about the archive collection"""
     """Print out some info and statistics about the archive collection"""
 
 
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     from core.models import Snapshot
     from core.models import Snapshot
     from django.contrib.auth import get_user_model
     from django.contrib.auth import get_user_model
@@ -609,8 +607,8 @@ def add(urls: Union[str, List[str]],
         run_subcommand('init', stdin=None, pwd=out_dir)
         run_subcommand('init', stdin=None, pwd=out_dir)
 
 
     # Load list of links from the existing index
     # Load list of links from the existing index
-    check_data_folder(out_dir=out_dir)
-    check_dependencies()
+    check_data_folder(CONFIG)
+    check_dependencies(CONFIG)
     new_links: List[Link] = []
     new_links: List[Link] = []
     all_links = load_main_index(out_dir=out_dir)
     all_links = load_main_index(out_dir=out_dir)
 
 
@@ -705,7 +703,7 @@ def remove(filter_str: Optional[str]=None,
            out_dir: Path=OUTPUT_DIR) -> List[Link]:
            out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Remove the specified URLs from the archive"""
     """Remove the specified URLs from the archive"""
     
     
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     if snapshots is None:
     if snapshots is None:
         if filter_str and filter_patterns:
         if filter_str and filter_patterns:
@@ -792,8 +790,8 @@ def update(resume: Optional[float]=None,
     from core.models import ArchiveResult
     from core.models import ArchiveResult
     from .search import index_links
     from .search import index_links
 
 
-    check_data_folder(out_dir=out_dir)
-    check_dependencies()
+    check_data_folder(CONFIG)
+    check_dependencies(CONFIG)
     new_links: List[Link] = [] # TODO: Remove input argument: only_new
     new_links: List[Link] = [] # TODO: Remove input argument: only_new
 
 
     extractors = extractors.split(",") if extractors else []
     extractors = extractors.split(",") if extractors else []
@@ -863,7 +861,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
              out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
              out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
     """List, filter, and export information about archive entries"""
     """List, filter, and export information about archive entries"""
     
     
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     if filter_patterns and filter_patterns_str:
     if filter_patterns and filter_patterns_str:
         stderr(
         stderr(
@@ -911,7 +909,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
                before: Optional[float]=None,
                before: Optional[float]=None,
                out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
                out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
     
     
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     if snapshots:
     if snapshots:
         all_snapshots = snapshots
         all_snapshots = snapshots
@@ -935,7 +933,7 @@ def list_folders(links: List[Link],
                  status: str,
                  status: str,
                  out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
                  out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     
     
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     STATUS_FUNCTIONS = {
     STATUS_FUNCTIONS = {
         "indexed": get_indexed_folders,
         "indexed": get_indexed_folders,
@@ -1080,7 +1078,7 @@ def config(config_options_str: Optional[str]=None,
            out_dir: Path=OUTPUT_DIR) -> None:
            out_dir: Path=OUTPUT_DIR) -> None:
     """Get and set your ArchiveBox project configuration values"""
     """Get and set your ArchiveBox project configuration values"""
 
 
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     if config_options and config_options_str:
     if config_options and config_options_str:
         stderr(
         stderr(
@@ -1183,7 +1181,7 @@ def schedule(add: bool=False,
              out_dir: Path=OUTPUT_DIR):
              out_dir: Path=OUTPUT_DIR):
     """Set ArchiveBox to regularly import URLs at specific times using cron"""
     """Set ArchiveBox to regularly import URLs at specific times using cron"""
     
     
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     Path(LOGS_DIR).mkdir(exist_ok=True)
     Path(LOGS_DIR).mkdir(exist_ok=True)
 
 
@@ -1324,7 +1322,7 @@ def server(runserver_args: Optional[List[str]]=None,
     config.SHOW_PROGRESS = False
     config.SHOW_PROGRESS = False
     config.DEBUG = config.DEBUG or debug
     config.DEBUG = config.DEBUG or debug
 
 
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     from django.core.management import call_command
     from django.core.management import call_command
     from django.contrib.auth.models import User
     from django.contrib.auth.models import User
@@ -1417,7 +1415,7 @@ def server(runserver_args: Optional[List[str]]=None,
 def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
 def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
     """Run an ArchiveBox Django management command"""
     """Run an ArchiveBox Django management command"""
 
 
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
     from django.core.management import execute_from_command_line
     from django.core.management import execute_from_command_line
 
 
     if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
     if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
@@ -1432,7 +1430,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
 def shell(out_dir: Path=OUTPUT_DIR) -> None:
 def shell(out_dir: Path=OUTPUT_DIR) -> None:
     """Enter an interactive ArchiveBox Django shell"""
     """Enter an interactive ArchiveBox Django shell"""
 
 
-    check_data_folder(out_dir=out_dir)
+    check_data_folder(CONFIG)
 
 
     from django.core.management import call_command
     from django.core.management import call_command
     call_command("shell_plus")
     call_command("shell_plus")

+ 1 - 1
archivebox/manage.py

@@ -7,7 +7,7 @@ if __name__ == '__main__':
     # versions of ./manage.py commands whenever possible. When that's not possible
     # versions of ./manage.py commands whenever possible. When that's not possible
     # (e.g. makemigrations), you can comment out this check temporarily
     # (e.g. makemigrations), you can comment out this check temporarily
 
 
-    allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs']
+    allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs', 'test']
 
 
     if not any(cmd in sys.argv for cmd in allowed_commands):
     if not any(cmd in sys.argv for cmd in allowed_commands):
         print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
         print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")

+ 0 - 0
archivebox/misc/__init__.py


+ 159 - 0
archivebox/misc/checks.py

@@ -0,0 +1,159 @@
+__package__ = 'archivebox.misc'
+
+# TODO: migrate all of these to new plugantic/base_check.py Check system
+
+import sys
+from benedict import benedict
+from pathlib import Path
+
+from .logging import stderr, hint
+
+
+def check_system_config(config: benedict) -> None:
+    ### Check system environment
+    if config['USER'] == 'root' or str(config['PUID']) == "0":
+        stderr('[!] ArchiveBox should never be run as root!', color='red')
+        stderr('    For more information, see the security overview documentation:')
+        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
+        
+        if config['IN_DOCKER']:
+            attempted_command = ' '.join(sys.argv[:3])
+            stderr('')
+            stderr('    {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
+            stderr(f'        docker compose run archivebox {attempted_command}')
+            stderr(f'        docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
+            stderr('        or:')
+            stderr(f'        docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
+            stderr(f'        docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
+        
+        raise SystemExit(2)
+
+    ### Check Python environment
+    if sys.version_info[:3] < (3, 7, 0):
+        stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
+        stderr('    See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
+        raise SystemExit(2)
+
+    if int(config['DJANGO_VERSION'].split('.')[0]) < 3:
+        stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
+        stderr('    Upgrade django using pip or your system package manager: pip3 install --upgrade django')
+        raise SystemExit(2)
+
+    if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
+        stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
+        stderr('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
+        stderr('    Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
+        stderr('')
+        stderr('    Confirm that it\'s fixed by opening a new shell and running:')
+        stderr('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8')
+        raise SystemExit(2)
+
+    # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
+    # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
+    if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
+        if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
+            stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
+            stderr(f'    {config["CHROME_USER_DATA_DIR"]}')
+            stderr('    Make sure you set it to a Chrome user data directory containing a Default profile folder.')
+            stderr('    For more info see:')
+            stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
+            if '/Default' in str(config['CHROME_USER_DATA_DIR']):
+                stderr()
+                stderr('    Try removing /Default from the end e.g.:')
+                stderr('        CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
+            
+            # hard error is too annoying here, instead just set it to nothing
+            # raise SystemExit(2)
+            config['CHROME_USER_DATA_DIR'] = None
+    else:
+        config['CHROME_USER_DATA_DIR'] = None
+
+
+def check_dependencies(config: benedict, show_help: bool=True) -> None:
+    invalid_dependencies = [
+        (name, info) for name, info in config['DEPENDENCIES'].items()
+        if info['enabled'] and not info['is_valid']
+    ]
+    if invalid_dependencies and show_help:
+        stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
+        for dependency, info in invalid_dependencies:
+            stderr(
+                '    ! {}: {} ({})'.format(
+                    dependency,
+                    info['path'] or 'unable to find binary',
+                    info['version'] or 'unable to detect version',
+                )
+            )
+            if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
+                hint(('To install all packages automatically run: archivebox setup',
+                    f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
+                    ''), prefix='      ')
+        stderr('')
+
+    if config['TIMEOUT'] < 5:
+        stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
+        stderr('    You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
+        stderr('    (Setting it to somewhere between 30 and 3000 seconds is recommended)')
+        stderr()
+        stderr('    If you want to make ArchiveBox run faster, disable specific archive methods instead:')
+        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
+        stderr()
+
+    elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
+        stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
+        stderr('    Chrome will fail to archive all sites if set to less than ~15 seconds.')
+        stderr('    (Setting it to somewhere between 30 and 300 seconds is recommended)')
+        stderr()
+        stderr('    If you want to make ArchiveBox run faster, disable specific archive methods instead:')
+        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
+        stderr()
+
+    if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
+        stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
+        stderr('    youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
+        stderr('    (Setting it somewhere over 60 seconds is recommended)')
+        stderr()
+        stderr('    If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
+        stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
+        stderr()
+
+        
+
+
+def check_data_folder(config: benedict) -> None:
+    output_dir = config['OUTPUT_DIR']
+
+    archive_dir_exists = (Path(output_dir) / 'archive').exists()
+    if not archive_dir_exists:
+        stderr('[X] No archivebox index found in the current directory.', color='red')
+        stderr(f'    {output_dir}', color='lightyellow')
+        stderr()
+        stderr('    {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
+        stderr('        cd path/to/your/archive/folder')
+        stderr('        archivebox [command]')
+        stderr()
+        stderr('    {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
+        stderr('        archivebox init')
+        raise SystemExit(2)
+
+
+def check_migrations(config: benedict):
+    output_dir = config['OUTPUT_DIR']
+    
+    from ..index.sql import list_migrations
+
+    pending_migrations = [name for status, name in list_migrations() if not status]
+
+    if pending_migrations:
+        stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
+        stderr(f'    {output_dir}')
+        stderr()
+        stderr(f'    To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
+        stderr('        archivebox init')
+        raise SystemExit(3)
+
+    (Path(output_dir) / config['SOURCES_DIR_NAME']).mkdir(exist_ok=True)
+    (Path(output_dir) / config['LOGS_DIR_NAME']).mkdir(exist_ok=True)
+    (Path(output_dir) / config['CACHE_DIR_NAME']).mkdir(exist_ok=True)
+    (Path(output_dir) / config['LIB_DIR_NAME'] / 'bin').mkdir(exist_ok=True, parents=True)
+    (Path(output_dir) / config['PERSONAS_DIR_NAME'] / 'Default').mkdir(exist_ok=True, parents=True)

+ 30 - 0
archivebox/misc/debugging.py

@@ -0,0 +1,30 @@
+from functools import wraps
+from time import time
+
+def timed_function(func):
+    """
+    Very simple profiling decorator for debugging.
+    Usage:
+        @timed_function
+        def my_func():
+            ...
+    
+    More advanced alternatives:
+        - viztracer ../.venv/bin/archivebox manage check          # https://viztracer.readthedocs.io/en/latest/filter.html
+        - python -m cProfile -o archivebox.prof ../.venv/bin/archivebox manage check; snakeviz archivebox.prof
+        - Django Debug Toolbar + django-debug-toolbar-flamegraph
+        + Django Requests Tracker (requests-tracker)
+    """
+    @wraps(func)
+    def wrap(*args, **kwargs):
+        if args and hasattr(args[0], '__module__'):
+            module = args[0].__module__
+        else:
+            module = func.__module__
+        ts_start = time()
+        result = func(*args, **kwargs)
+        ts_end = time()
+        ms_elapsed = int((ts_end-ts_start) * 1000)
+        print(f'[DEBUG][{ms_elapsed}ms] {module}.{func.__name__}(...)')
+        return result
+    return wrap

+ 77 - 0
archivebox/misc/logging.py

@@ -0,0 +1,77 @@
+__package__ = 'archivebox.misc'
+
+# TODO: merge/dedupe this file with archivebox/logging_util.py
+
+import os
+import sys
+from typing import Optional, Union, Tuple, List
+from collections import defaultdict
+from benedict import benedict
+from rich.console import Console
+
+from ..config_stubs import ConfigDict
+
+SHOW_PROGRESS = None
+if os.environ.get('SHOW_PROGRESS', 'None') in ('True', '1', 'true', 'yes'):
+    SHOW_PROGRESS = True
+
+CONSOLE = Console(force_interactive=SHOW_PROGRESS)
+SHOW_PROGRESS = CONSOLE.is_interactive if SHOW_PROGRESS is None else SHOW_PROGRESS
+
+DEFAULT_CLI_COLORS = benedict(
+    {
+        "reset": "\033[00;00m",
+        "lightblue": "\033[01;30m",
+        "lightyellow": "\033[01;33m",
+        "lightred": "\033[01;35m",
+        "red": "\033[01;31m",
+        "green": "\033[01;32m",
+        "blue": "\033[01;34m",
+        "white": "\033[01;37m",
+        "black": "\033[01;30m",
+    }
+)
+ANSI = benedict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
+
+COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
+    '00': [(0, 0, 0), (0, 0, 0)],
+    '30': [(0, 0, 0), (0, 0, 0)],
+    '31': [(255, 0, 0), (128, 0, 0)],
+    '32': [(0, 200, 0), (0, 128, 0)],
+    '33': [(255, 255, 0), (128, 128, 0)],
+    '34': [(0, 0, 255), (0, 0, 128)],
+    '35': [(255, 0, 255), (128, 0, 128)],
+    '36': [(0, 255, 255), (0, 128, 128)],
+    '37': [(255, 255, 255), (255, 255, 255)],
+})
+
+# Logging Helpers
+def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
+    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
+
+    if color:
+        strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
+    else:
+        strs = [' '.join(str(a) for a in args), '\n']
+
+    sys.stdout.write(prefix + ''.join(strs))
+
+def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
+    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
+
+    if color:
+        strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
+    else:
+        strs = [' '.join(str(a) for a in args), '\n']
+
+    sys.stderr.write(prefix + ''.join(strs))
+
+def hint(text: Union[Tuple[str, ...], List[str], str], prefix='    ', config: Optional[ConfigDict]=None) -> None:
+    ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
+
+    if isinstance(text, str):
+        stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
+    else:
+        stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
+        for line in text[1:]:
+            stderr('{}      {}'.format(prefix, line))

+ 0 - 1
archivebox/monkey_patches.py

@@ -10,7 +10,6 @@ import datetime
 from django.utils import timezone
 from django.utils import timezone
 timezone.utc = datetime.timezone.utc
 timezone.utc = datetime.timezone.utc
 
 
-
 # monkey patch django-signals-webhooks to change how it shows up in Admin UI
 # monkey patch django-signals-webhooks to change how it shows up in Admin UI
 # from signal_webhooks.apps import DjangoSignalWebhooksConfig
 # from signal_webhooks.apps import DjangoSignalWebhooksConfig
 # DjangoSignalWebhooksConfig.verbose_name = 'API'
 # DjangoSignalWebhooksConfig.verbose_name = 'API'

+ 3 - 3
archivebox/package-lock.json

@@ -371,9 +371,9 @@
       "license": "Apache-2.0"
       "license": "Apache-2.0"
     },
     },
     "node_modules/bare-events": {
     "node_modules/bare-events": {
-      "version": "2.4.2",
-      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz",
-      "integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==",
+      "version": "2.5.0",
+      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.5.0.tgz",
+      "integrity": "sha512-/E8dDe9dsbLyh2qrZ64PEPadOQ0F4gbl1sUJOrmph7xOiIxfY8vwab/4bFLh4Y88/Hk/ujKcrQKc+ps0mv873A==",
       "license": "Apache-2.0",
       "license": "Apache-2.0",
       "optional": true
       "optional": true
     },
     },

+ 3 - 3
archivebox/plugantic/base_binary.py

@@ -3,6 +3,7 @@ __package__ = "archivebox.plugantic"
 from typing import Dict, List
 from typing import Dict, List
 from typing_extensions import Self
 from typing_extensions import Self
 
 
+from benedict import benedict
 from pydantic import Field, InstanceOf, validate_call
 from pydantic import Field, InstanceOf, validate_call
 from pydantic_pkgr import (
 from pydantic_pkgr import (
     Binary,
     Binary,
@@ -17,7 +18,6 @@ from pydantic_pkgr import (
 from django.conf import settings
 from django.conf import settings
 
 
 from .base_hook import BaseHook, HookType
 from .base_hook import BaseHook, HookType
-from ..config_stubs import AttrDict
 
 
 
 
 class BaseBinProvider(BaseHook, BinProvider):
 class BaseBinProvider(BaseHook, BinProvider):
@@ -38,7 +38,7 @@ class BaseBinProvider(BaseHook, BinProvider):
     def register(self, settings, parent_plugin=None):
     def register(self, settings, parent_plugin=None):
         # self._plugin = parent_plugin                                      # for debugging only, never rely on this!
         # self._plugin = parent_plugin                                      # for debugging only, never rely on this!
 
 
-        settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or AttrDict({})
+        settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or benedict({})
         settings.BINPROVIDERS[self.id] = self
         settings.BINPROVIDERS[self.id] = self
 
 
         super().register(settings, parent_plugin=parent_plugin)
         super().register(settings, parent_plugin=parent_plugin)
@@ -58,7 +58,7 @@ class BaseBinary(BaseHook, Binary):
     def register(self, settings, parent_plugin=None):
     def register(self, settings, parent_plugin=None):
         # self._plugin = parent_plugin                                      # for debugging only, never rely on this!
         # self._plugin = parent_plugin                                      # for debugging only, never rely on this!
 
 
-        settings.BINARIES = getattr(settings, "BINARIES", None) or AttrDict({})
+        settings.BINARIES = getattr(settings, "BINARIES", None) or benedict({})
         settings.BINARIES[self.id] = self
         settings.BINARIES[self.id] = self
 
 
         super().register(settings, parent_plugin=parent_plugin)
         super().register(settings, parent_plugin=parent_plugin)

+ 2 - 5
archivebox/plugantic/base_check.py

@@ -28,7 +28,7 @@ class BaseCheck(BaseHook):
     def register(self, settings, parent_plugin=None):
     def register(self, settings, parent_plugin=None):
         # self._plugin = parent_plugin  # backref to parent is for debugging only, never rely on this!
         # self._plugin = parent_plugin  # backref to parent is for debugging only, never rely on this!
 
 
-        self.register_with_django_check_system()  # (SIDE EFFECT)
+        self.register_with_django_check_system(settings)  # (SIDE EFFECT)
 
 
         # install hook into settings.CHECKS
         # install hook into settings.CHECKS
         settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
         settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
@@ -37,12 +37,9 @@ class BaseCheck(BaseHook):
         # record installed hook in settings.HOOKS
         # record installed hook in settings.HOOKS
         super().register(settings, parent_plugin=parent_plugin)
         super().register(settings, parent_plugin=parent_plugin)
 
 
-    def register_with_django_check_system(self):
-
+    def register_with_django_check_system(self, settings):
         def run_check(app_configs, **kwargs) -> List[Warning]:
         def run_check(app_configs, **kwargs) -> List[Warning]:
-            from django.conf import settings
             import logging
             import logging
-
             return self.check(settings, logging.getLogger("checks"))
             return self.check(settings, logging.getLogger("checks"))
 
 
         run_check.__name__ = self.id
         run_check.__name__ = self.id

+ 2 - 3
archivebox/plugantic/base_hook.py

@@ -96,14 +96,13 @@ class BaseHook(BaseModel):
         # e.g. /admin/environment/config/LdapConfig/
         # e.g. /admin/environment/config/LdapConfig/
         return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
         return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
 
 
-
     def register(self, settings, parent_plugin=None):
     def register(self, settings, parent_plugin=None):
         """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
         """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
         self._plugin = parent_plugin         # for debugging only, never rely on this!
         self._plugin = parent_plugin         # for debugging only, never rely on this!
 
 
         # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
         # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
 
 
-        print('  -', self.hook_module, '.register()')
+        # print('  -', self.hook_module, '.register()')
 
 
         # record installed hook in settings.HOOKS
         # record installed hook in settings.HOOKS
         settings.HOOKS[self.id] = self
         settings.HOOKS[self.id] = self
@@ -118,7 +117,7 @@ class BaseHook(BaseModel):
     def ready(self, settings):
     def ready(self, settings):
         """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
         """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
 
 
-        print('  -', self.hook_module, '.ready()')
+        # print('  -', self.hook_module, '.ready()')
 
 
         assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
         assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
 
 

+ 14 - 11
archivebox/plugantic/base_plugin.py

@@ -1,6 +1,5 @@
 __package__ = 'archivebox.plugantic'
 __package__ = 'archivebox.plugantic'
 
 
-import json
 import inspect
 import inspect
 from pathlib import Path
 from pathlib import Path
 
 
@@ -18,10 +17,11 @@ from pydantic import (
     computed_field,
     computed_field,
     validate_call,
     validate_call,
 )
 )
+from benedict import benedict
 
 
 from .base_hook import BaseHook, HookType
 from .base_hook import BaseHook, HookType
 
 
-from ..config import AttrDict
+from ..config import bump_startup_progress_bar
 
 
 
 
 class BasePlugin(BaseModel):
 class BasePlugin(BaseModel):
@@ -90,7 +90,8 @@ class BasePlugin(BaseModel):
         
         
         assert self.app_label and self.app_label and self.verbose_name, f'{self.__class__.__name__} is missing .name or .app_label or .verbose_name'
         assert self.app_label and self.app_label and self.verbose_name, f'{self.__class__.__name__} is missing .name or .app_label or .verbose_name'
         
         
-        assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
+        # assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
+        
         return self
         return self
     
     
     @property
     @property
@@ -114,13 +115,13 @@ class BasePlugin(BaseModel):
 
 
     @property
     @property
     def HOOKS_BY_ID(self) -> Dict[str, InstanceOf[BaseHook]]:
     def HOOKS_BY_ID(self) -> Dict[str, InstanceOf[BaseHook]]:
-        return AttrDict({hook.id: hook for hook in self.hooks})
+        return benedict({hook.id: hook for hook in self.hooks})
 
 
     @property
     @property
     def HOOKS_BY_TYPE(self) -> Dict[HookType, Dict[str, InstanceOf[BaseHook]]]:
     def HOOKS_BY_TYPE(self) -> Dict[HookType, Dict[str, InstanceOf[BaseHook]]]:
-        hooks = AttrDict({})
+        hooks = benedict({})
         for hook in self.hooks:
         for hook in self.hooks:
-            hooks[hook.hook_type] = hooks.get(hook.hook_type) or AttrDict({})
+            hooks[hook.hook_type] = hooks.get(hook.hook_type) or benedict({})
             hooks[hook.hook_type][hook.id] = hook
             hooks[hook.hook_type][hook.id] = hook
         return hooks
         return hooks
 
 
@@ -131,10 +132,10 @@ class BasePlugin(BaseModel):
             from django.conf import settings as django_settings
             from django.conf import settings as django_settings
             settings = django_settings
             settings = django_settings
             
             
-        print()
-        print(self.plugin_module_full, '.register()')
+        # print()
+        # print(self.plugin_module_full, '.register()')
 
 
-        assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
+        # assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
 
 
         assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
         assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
 
 
@@ -149,6 +150,7 @@ class BasePlugin(BaseModel):
 
 
         settings.PLUGINS[self.id]._is_registered = True
         settings.PLUGINS[self.id]._is_registered = True
         # print('√ REGISTERED PLUGIN:', self.plugin_module)
         # print('√ REGISTERED PLUGIN:', self.plugin_module)
+        bump_startup_progress_bar()
 
 
     def ready(self, settings=None):
     def ready(self, settings=None):
         """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
         """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
@@ -157,8 +159,8 @@ class BasePlugin(BaseModel):
             from django.conf import settings as django_settings
             from django.conf import settings as django_settings
             settings = django_settings
             settings = django_settings
 
 
-        print()
-        print(self.plugin_module_full, '.ready()')
+        # print()
+        # print(self.plugin_module_full, '.ready()')
 
 
         assert (
         assert (
             self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
             self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
@@ -171,6 +173,7 @@ class BasePlugin(BaseModel):
             hook.ready(settings)
             hook.ready(settings)
         
         
         settings.PLUGINS[self.id]._is_ready = True
         settings.PLUGINS[self.id]._is_ready = True
+        bump_startup_progress_bar()
 
 
     # @validate_call
     # @validate_call
     # def install_binaries(self) -> Self:
     # def install_binaries(self) -> Self:

+ 0 - 335
archivebox/plugantic/ini_to_toml.py

@@ -83,338 +83,3 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):
     # for computed_field properties render them like this instead:
     # for computed_field properties render them like this instead:
     # inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
     # inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
 
 
-
-
-### Basic Assertions
-
-# test_input = """
-# [SERVER_CONFIG]
-# IS_TTY=False
-# USE_COLOR=False
-# SHOW_PROGRESS=False
-# IN_DOCKER=False
-# IN_QEMU=False
-# PUID=501
-# PGID=20
-# OUTPUT_DIR=/opt/archivebox/data
-# CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
-# ONLY_NEW=True
-# TIMEOUT=60
-# MEDIA_TIMEOUT=3600
-# OUTPUT_PERMISSIONS=644
-# RESTRICT_FILE_NAMES=windows
-# URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
-# URL_ALLOWLIST=None
-# ADMIN_USERNAME=None
-# ADMIN_PASSWORD=None
-# ENFORCE_ATOMIC_WRITES=True
-# TAG_SEPARATOR_PATTERN=[,]
-# SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-# BIND_ADDR=127.0.0.1:8000
-# ALLOWED_HOSTS=*
-# DEBUG=False
-# PUBLIC_INDEX=True
-# PUBLIC_SNAPSHOTS=True
-# PUBLIC_ADD_VIEW=False
-# FOOTER_INFO=Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.
-# SNAPSHOTS_PER_PAGE=40
-# CUSTOM_TEMPLATES_DIR=None
-# TIME_ZONE=UTC
-# TIMEZONE=UTC
-# REVERSE_PROXY_USER_HEADER=Remote-User
-# REVERSE_PROXY_WHITELIST=
-# LOGOUT_REDIRECT_URL=/
-# PREVIEW_ORIGINALS=True
-# LDAP=False
-# LDAP_SERVER_URI=None
-# LDAP_BIND_DN=None
-# LDAP_BIND_PASSWORD=None
-# LDAP_USER_BASE=None
-# LDAP_USER_FILTER=None
-# LDAP_USERNAME_ATTR=None
-# LDAP_FIRSTNAME_ATTR=None
-# LDAP_LASTNAME_ATTR=None
-# LDAP_EMAIL_ATTR=None
-# LDAP_CREATE_SUPERUSER=False
-# SAVE_TITLE=True
-# SAVE_FAVICON=True
-# SAVE_WGET=True
-# SAVE_WGET_REQUISITES=True
-# SAVE_SINGLEFILE=True
-# SAVE_READABILITY=True
-# SAVE_MERCURY=True
-# SAVE_HTMLTOTEXT=True
-# SAVE_PDF=True
-# SAVE_SCREENSHOT=True
-# SAVE_DOM=True
-# SAVE_HEADERS=True
-# SAVE_WARC=True
-# SAVE_GIT=True
-# SAVE_MEDIA=True
-# SAVE_ARCHIVE_DOT_ORG=True
-# RESOLUTION=1440,2000
-# GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
-# CHECK_SSL_VALIDITY=True
-# MEDIA_MAX_SIZE=750m
-# USER_AGENT=None
-# CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
-# WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
-# CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
-# COOKIES_FILE=None
-# CHROME_USER_DATA_DIR=None
-# CHROME_TIMEOUT=0
-# CHROME_HEADLESS=True
-# CHROME_SANDBOX=True
-# CHROME_EXTRA_ARGS=[]
-# YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
-# YOUTUBEDL_EXTRA_ARGS=[]
-# WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
-# WGET_EXTRA_ARGS=[]
-# CURL_ARGS=['--silent', '--location', '--compressed']
-# CURL_EXTRA_ARGS=[]
-# GIT_ARGS=['--recursive']
-# SINGLEFILE_ARGS=[]
-# SINGLEFILE_EXTRA_ARGS=[]
-# MERCURY_ARGS=['--format=text']
-# MERCURY_EXTRA_ARGS=[]
-# FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
-# USE_INDEXING_BACKEND=True
-# USE_SEARCHING_BACKEND=True
-# SEARCH_BACKEND_ENGINE=ripgrep
-# SEARCH_BACKEND_HOST_NAME=localhost
-# SEARCH_BACKEND_PORT=1491
-# SEARCH_BACKEND_PASSWORD=SecretPassword
-# SEARCH_PROCESS_HTML=True
-# SONIC_COLLECTION=archivebox
-# SONIC_BUCKET=snapshots
-# SEARCH_BACKEND_TIMEOUT=90
-# FTS_SEPARATE_DATABASE=True
-# FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
-# FTS_SQLITE_MAX_LENGTH=1000000000
-# USE_CURL=True
-# USE_WGET=True
-# USE_SINGLEFILE=True
-# USE_READABILITY=True
-# USE_MERCURY=True
-# USE_GIT=True
-# USE_CHROME=True
-# USE_NODE=True
-# USE_YOUTUBEDL=True
-# USE_RIPGREP=True
-# CURL_BINARY=curl
-# GIT_BINARY=git
-# WGET_BINARY=wget
-# SINGLEFILE_BINARY=single-file
-# READABILITY_BINARY=readability-extractor
-# MERCURY_BINARY=postlight-parser
-# YOUTUBEDL_BINARY=yt-dlp
-# NODE_BINARY=node
-# RIPGREP_BINARY=rg
-# CHROME_BINARY=chrome
-# POCKET_CONSUMER_KEY=None
-# USER=squash
-# PACKAGE_DIR=/opt/archivebox/archivebox
-# TEMPLATES_DIR=/opt/archivebox/archivebox/templates
-# ARCHIVE_DIR=/opt/archivebox/data/archive
-# SOURCES_DIR=/opt/archivebox/data/sources
-# LOGS_DIR=/opt/archivebox/data/logs
-# PERSONAS_DIR=/opt/archivebox/data/personas
-# URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
-# URL_ALLOWLIST_PTN=None
-# DIR_OUTPUT_PERMISSIONS=755
-# ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
-# VERSION=0.8.0
-# COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
-# BUILD_TIME=2024-05-15 03:28:05 1715768885
-# VERSIONS_AVAILABLE=None
-# CAN_UPGRADE=False
-# PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
-# PYTHON_ENCODING=UTF-8
-# PYTHON_VERSION=3.10.14
-# DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
-# DJANGO_VERSION=5.0.6 final (0)
-# SQLITE_BINARY=/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
-# SQLITE_VERSION=2.6.0
-# CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
-# WGET_VERSION=GNU Wget 1.24.5
-# WGET_AUTO_COMPRESSION=True
-# RIPGREP_VERSION=ripgrep 14.1.0
-# SINGLEFILE_VERSION=None
-# READABILITY_VERSION=None
-# MERCURY_VERSION=None
-# GIT_VERSION=git version 2.44.0
-# YOUTUBEDL_VERSION=2024.04.09
-# CHROME_VERSION=Google Chrome 124.0.6367.207
-# NODE_VERSION=v21.7.3
-# """
-
-
-# expected_output = TOML_HEADER + '''[SERVER_CONFIG]
-# IS_TTY = false
-# USE_COLOR = false
-# SHOW_PROGRESS = false
-# IN_DOCKER = false
-# IN_QEMU = false
-# PUID = 501
-# PGID = 20
-# OUTPUT_DIR = "/opt/archivebox/data"
-# CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
-# ONLY_NEW = true
-# TIMEOUT = 60
-# MEDIA_TIMEOUT = 3600
-# OUTPUT_PERMISSIONS = 644
-# RESTRICT_FILE_NAMES = "windows"
-# URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
-# URL_ALLOWLIST = null
-# ADMIN_USERNAME = null
-# ADMIN_PASSWORD = null
-# ENFORCE_ATOMIC_WRITES = true
-# TAG_SEPARATOR_PATTERN = "[,]"
-# SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
-# BIND_ADDR = "127.0.0.1:8000"
-# ALLOWED_HOSTS = "*"
-# DEBUG = false
-# PUBLIC_INDEX = true
-# PUBLIC_SNAPSHOTS = true
-# PUBLIC_ADD_VIEW = false
-# FOOTER_INFO = "Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests."
-# SNAPSHOTS_PER_PAGE = 40
-# CUSTOM_TEMPLATES_DIR = null
-# TIME_ZONE = "UTC"
-# TIMEZONE = "UTC"
-# REVERSE_PROXY_USER_HEADER = "Remote-User"
-# REVERSE_PROXY_WHITELIST = ""
-# LOGOUT_REDIRECT_URL = "/"
-# PREVIEW_ORIGINALS = true
-# LDAP = false
-# LDAP_SERVER_URI = null
-# LDAP_BIND_DN = null
-# LDAP_BIND_PASSWORD = null
-# LDAP_USER_BASE = null
-# LDAP_USER_FILTER = null
-# LDAP_USERNAME_ATTR = null
-# LDAP_FIRSTNAME_ATTR = null
-# LDAP_LASTNAME_ATTR = null
-# LDAP_EMAIL_ATTR = null
-# LDAP_CREATE_SUPERUSER = false
-# SAVE_TITLE = true
-# SAVE_FAVICON = true
-# SAVE_WGET = true
-# SAVE_WGET_REQUISITES = true
-# SAVE_SINGLEFILE = true
-# SAVE_READABILITY = true
-# SAVE_MERCURY = true
-# SAVE_HTMLTOTEXT = true
-# SAVE_PDF = true
-# SAVE_SCREENSHOT = true
-# SAVE_DOM = true
-# SAVE_HEADERS = true
-# SAVE_WARC = true
-# SAVE_GIT = true
-# SAVE_MEDIA = true
-# SAVE_ARCHIVE_DOT_ORG = true
-# RESOLUTION = [1440, 2000]
-# GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
-# CHECK_SSL_VALIDITY = true
-# MEDIA_MAX_SIZE = "750m"
-# USER_AGENT = null
-# CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
-# WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
-# CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
-# COOKIES_FILE = null
-# CHROME_USER_DATA_DIR = null
-# CHROME_TIMEOUT = false
-# CHROME_HEADLESS = true
-# CHROME_SANDBOX = true
-# CHROME_EXTRA_ARGS = []
-# YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
-# YOUTUBEDL_EXTRA_ARGS = []
-# WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
-# WGET_EXTRA_ARGS = []
-# CURL_ARGS = ["--silent", "--location", "--compressed"]
-# CURL_EXTRA_ARGS = []
-# GIT_ARGS = ["--recursive"]
-# SINGLEFILE_ARGS = []
-# SINGLEFILE_EXTRA_ARGS = []
-# MERCURY_ARGS = ["--format=text"]
-# MERCURY_EXTRA_ARGS = []
-# FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
-# USE_INDEXING_BACKEND = true
-# USE_SEARCHING_BACKEND = true
-# SEARCH_BACKEND_ENGINE = "ripgrep"
-# SEARCH_BACKEND_HOST_NAME = "localhost"
-# SEARCH_BACKEND_PORT = 1491
-# SEARCH_BACKEND_PASSWORD = "SecretPassword"
-# SEARCH_PROCESS_HTML = true
-# SONIC_COLLECTION = "archivebox"
-# SONIC_BUCKET = "snapshots"
-# SEARCH_BACKEND_TIMEOUT = 90
-# FTS_SEPARATE_DATABASE = true
-# FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
-# FTS_SQLITE_MAX_LENGTH = 1000000000
-# USE_CURL = true
-# USE_WGET = true
-# USE_SINGLEFILE = true
-# USE_READABILITY = true
-# USE_MERCURY = true
-# USE_GIT = true
-# USE_CHROME = true
-# USE_NODE = true
-# USE_YOUTUBEDL = true
-# USE_RIPGREP = true
-# CURL_BINARY = "curl"
-# GIT_BINARY = "git"
-# WGET_BINARY = "wget"
-# SINGLEFILE_BINARY = "single-file"
-# READABILITY_BINARY = "readability-extractor"
-# MERCURY_BINARY = "postlight-parser"
-# YOUTUBEDL_BINARY = "yt-dlp"
-# NODE_BINARY = "node"
-# RIPGREP_BINARY = "rg"
-# CHROME_BINARY = "chrome"
-# POCKET_CONSUMER_KEY = null
-# USER = "squash"
-# PACKAGE_DIR = "/opt/archivebox/archivebox"
-# TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
-# ARCHIVE_DIR = "/opt/archivebox/data/archive"
-# SOURCES_DIR = "/opt/archivebox/data/sources"
-# LOGS_DIR = "/opt/archivebox/data/logs"
-# PERSONAS_DIR = "/opt/archivebox/data/personas"
-# URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
-# URL_ALLOWLIST_PTN = null
-# DIR_OUTPUT_PERMISSIONS = 755
-# ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
-# VERSION = "0.8.0"
-# COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
-# BUILD_TIME = "2024-05-15 03:28:05 1715768885"
-# VERSIONS_AVAILABLE = null
-# CAN_UPGRADE = false
-# PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
-# PYTHON_ENCODING = "UTF-8"
-# PYTHON_VERSION = "3.10.14"
-# DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
-# DJANGO_VERSION = "5.0.6 final (0)"
-# SQLITE_BINARY = "/opt/homebrew/Cellar/[email protected]/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
-# SQLITE_VERSION = "2.6.0"
-# CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
-# WGET_VERSION = "GNU Wget 1.24.5"
-# WGET_AUTO_COMPRESSION = true
-# RIPGREP_VERSION = "ripgrep 14.1.0"
-# SINGLEFILE_VERSION = null
-# READABILITY_VERSION = null
-# MERCURY_VERSION = null
-# GIT_VERSION = "git version 2.44.0"
-# YOUTUBEDL_VERSION = "2024.04.09"
-# CHROME_VERSION = "Google Chrome 124.0.6367.207"
-# NODE_VERSION = "v21.7.3"'''
-
-
-# first_output = convert(test_input)      # make sure ini -> toml parses correctly
-# second_output = convert(first_output)   # make sure toml -> toml parses/dumps consistently
-# assert first_output == second_output == expected_output  # make sure parsing is indempotent
-
-# # DEBUGGING
-# import sys
-# import difflib
-# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
-# print(repr(second_output))

+ 9 - 31
archivebox/plugins_extractor/chrome/apps.py

@@ -1,3 +1,5 @@
+__package__ = 'archivebox.plugins_extractor.chrome'
+
 import platform
 import platform
 from pathlib import Path
 from pathlib import Path
 from typing import List, Optional, Dict, ClassVar
 from typing import List, Optional, Dict, ClassVar
@@ -77,40 +79,16 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
 ###################### Config ##########################
 ###################### Config ##########################
 
 
 
 
-class ChromeDependencyConfigs(BaseConfigSet):
+class ChromeConfig(BaseConfigSet):
     section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
     section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
 
 
-    CHROME_BINARY: str = Field(default='chrome')
-    CHROME_ARGS: Optional[List[str]] = Field(default=None)
-    CHROME_EXTRA_ARGS: List[str] = []
-    CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
-    
-    # def load(self) -> Self:
-    #     # for each field in the model, load its value
-    #     # load from each source in order of precedence (lowest to highest):
-    #     # - schema default
-    #     # - ArchiveBox.conf INI file
-    #     # - environment variables
-    #     # - command-line arguments
-        
-    #     LOADED_VALUES: Dict[str, Any] = {}
-
-    #     for field_name, field in self.__fields__.items():
-    #         def_value   = field.default_factory() if field.default_factory else field.default
-    #         ini_value   = settings.INI_CONFIG.get_value(field_name)
-    #         env_value   = settings.ENV_CONFIG.get_value(field_name)
-    #         cli_value   = settings.CLI_CONFIG.get_value(field_name)
-    #         run_value   = settings.RUN_CONFIG.get_value(field_name)
-    #         value = run_value or cli_value or env_value or ini_value or def_value
-
-class ChromeConfigs(ChromeDependencyConfigs):
-    # section: ConfigSectionName = 'ALL_CONFIGS'
-    pass
+    CHROME_BINARY: str                      = Field(default='chrome')
+    CHROME_ARGS: List[str] | None           = Field(default=None)
+    CHROME_EXTRA_ARGS: List[str]            = Field(default=[])
+    CHROME_DEFAULT_ARGS: List[str]          = Field(default=lambda: ['--timeout={TIMEOUT-10}'])
 
 
-DEFAULT_GLOBAL_CONFIG = {
-}
 
 
-CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
+CHROME_CONFIG = ChromeConfig()
 
 
 
 
 class ChromeBinary(BaseBinary):
 class ChromeBinary(BaseBinary):
@@ -133,6 +111,7 @@ class ChromeBinary(BaseBinary):
     def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
     def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
         if not (binary.abspath and binary.abspath.exists()):
         if not (binary.abspath and binary.abspath.exists()):
             return
             return
+        
         bin_dir.mkdir(parents=True, exist_ok=True)
         bin_dir.mkdir(parents=True, exist_ok=True)
         symlink = bin_dir / binary.name
         symlink = bin_dir / binary.name
         
         
@@ -146,7 +125,6 @@ class ChromeBinary(BaseBinary):
 
 
 CHROME_BINARY = ChromeBinary()
 CHROME_BINARY = ChromeBinary()
 
 
-PLUGIN_BINARIES = [CHROME_BINARY]
 
 
 class ChromePlugin(BasePlugin):
 class ChromePlugin(BasePlugin):
     app_label: str = 'chrome'
     app_label: str = 'chrome'

+ 1 - 0
archivebox/plugins_pkg/pip/apps.py

@@ -149,6 +149,7 @@ class CheckUserIsNotRoot(BaseCheck):
             )
             )
         logger.debug('[√] UID is not root')
         logger.debug('[√] UID is not root')
         return errors
         return errors
+
     
     
 class CheckPipEnvironment(BaseCheck):
 class CheckPipEnvironment(BaseCheck):
     label: str = "CheckPipEnvironment"
     label: str = "CheckPipEnvironment"

+ 1 - 1
archivebox/search/__init__.py

@@ -14,7 +14,7 @@ from .utils import get_indexable_content, log_index_started
 
 
 
 
 def import_backend():
 def import_backend():
-    for backend in settings.SEARCH_BACKENDS:
+    for backend in settings.SEARCH_BACKENDS.values():
         if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
         if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
             return backend
             return backend
     raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')
     raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')