1 year ago · b7b3addbab
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,7 +102,7 @@ jobs:
 
				         # TODO: remove this exception for windows once we get tests passing on that platform
			
 
				         if: ${{ !contains(matrix.os, 'windows') }}
			
 
				         run: |
			
 
				-          python -m pytest -s --basetemp=tests/out --ignore=archivebox/vendor --ignore=deb_dist --ignore=pip_dist --ignore=brew_dist
			
 
				+          python -m pytest -s --basetemp=tests/out --ignore=archivebox/pkgs
			
 
				 
			
 
				   docker_tests:
			
 
				     runs-on: ubuntu-latest
			
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,3 @@
 
				 [submodule "docs"]
			
 
				     path = docs
			
 
				     url = https://github.com/ArchiveBox/ArchiveBox.wiki.git
			
 
				-[submodule "archivebox/vendor/pocket"]
			
 
				-	path = archivebox/vendor/pocket
			
 
				-	url = https://github.com/tapanpandita/pocket
			
 
				-[submodule "archivebox/vendor/pydantic-pkgr"]
			
 
				-	path = archivebox/vendor/pydantic-pkgr
			
 
				-	url = https://github.com/ArchiveBox/pydantic-pkgr
			
--- a/archivebox/.flake8
+++ b/archivebox/.flake8
@@ -3,4 +3,4 @@ ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E
 
				 select = F,E9,W
			
 
				 max-line-length = 130
			
 
				 max-complexity = 10
			
 
				-exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv
			
 
				+exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv,data,data*
			
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -13,8 +13,8 @@ __package__ = 'archivebox'
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				-
			
 
				 from pathlib import Path
			
 
				+from typing import cast
			
 
				 
			
 
				 ASCII_LOGO = """
			
 
				  █████╗ ██████╗  ██████╗██╗  ██╗██╗██╗   ██╗███████╗ ██████╗  ██████╗ ██╗  ██╗
			
@@ -47,11 +47,54 @@ from .monkey_patches import *                    # noqa
 
				 
			
 
				 
			
 
				 # print('LOADING VENDORED LIBRARIES')
			
 
				-from .vendor import load_vendored_libs           # noqa
			
 
				-load_vendored_libs()
			
 
				+from .pkgs import load_vendored_pkgs             # noqa
			
 
				+load_vendored_pkgs()
			
 
				 # print('DONE LOADING VENDORED LIBRARIES')
			
 
				 
			
 
				+# Load ABX Plugin Specifications + Default Implementations
			
 
				+import abx                                       # noqa
			
 
				+import abx_spec_archivebox                       # noqa
			
 
				+import abx_spec_config                           # noqa
			
 
				+import abx_spec_pydantic_pkgr                    # noqa
			
 
				+import abx_spec_django                           # noqa
			
 
				+import abx_spec_searchbackend                    # noqa
			
 
				+
			
 
				+abx.pm.add_hookspecs(abx_spec_config.PLUGIN_SPEC)
			
 
				+abx.pm.register(abx_spec_config.PLUGIN_SPEC())
			
 
				+
			
 
				+abx.pm.add_hookspecs(abx_spec_pydantic_pkgr.PLUGIN_SPEC)
			
 
				+abx.pm.register(abx_spec_pydantic_pkgr.PLUGIN_SPEC())
			
 
				+
			
 
				+abx.pm.add_hookspecs(abx_spec_django.PLUGIN_SPEC)
			
 
				+abx.pm.register(abx_spec_django.PLUGIN_SPEC())
			
 
				+
			
 
				+abx.pm.add_hookspecs(abx_spec_searchbackend.PLUGIN_SPEC)
			
 
				+abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC())
			
 
				+
			
 
				+# Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods
			
 
				+abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm)
			
 
				+pm = abx.pm
			
 
				+
			
 
				+
			
 
				+# Load all pip-installed ABX-compatible plugins
			
 
				+ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
			
 
				+
			
 
				+# Load all built-in ArchiveBox plugins
			
 
				+ARCHIVEBOX_BUILTIN_PLUGINS = {
			
 
				+    'config': PACKAGE_DIR / 'config',
			
 
				+    'core': PACKAGE_DIR / 'core',
			
 
				+    # 'search': PACKAGE_DIR / 'search',
			
 
				+    # 'core': PACKAGE_DIR / 'core',
			
 
				+}
			
 
				+
			
 
				+# Load all user-defined ArchiveBox plugins
			
 
				+USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins')
			
 
				+
			
 
				+# Import all plugins and register them with ABX Plugin Manager
			
 
				+ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS}
			
 
				+LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS)
			
 
				 
			
 
				+# Setup basic config, constants, paths, and version
			
 
				 from .config.constants import CONSTANTS                         # noqa
			
 
				 from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR    # noqa
			
 
				 from .config.version import VERSION                             # noqa
			
--- a/archivebox/abid_utils/models.py
+++ b/archivebox/abid_utils/models.py
@@ -175,7 +175,7 @@ class ABIDModel(models.Model):
 
				             'uri': self.abid_uri_src,
			
 
				             'subtype': self.abid_subtype_src,
			
 
				             'rand': self.abid_rand_src,
			
 
				-            'salt': 'self.abid_salt',               # defined as static class vars at build time
			
 
				+            'salt': 'self.abid_salt',                 # defined as static class vars at build time
			
 
				         }
			
 
				 
			
 
				     @property
			
--- a/archivebox/abx/__init__.py
+++ b/archivebox/abx/__init__.py
@@ -1,131 +0,0 @@
 
				-__package__ = 'abx'
			
 
				-
			
 
				-import importlib
			
 
				-from pathlib import Path
			
 
				-from typing import Dict, Callable, List
			
 
				-
			
 
				-from . import hookspec as base_spec
			
 
				-from abx.hookspec import hookimpl, hookspec           # noqa
			
 
				-from abx.manager import pm, PluginManager             # noqa
			
 
				-
			
 
				-
			
 
				-pm.add_hookspecs(base_spec)
			
 
				-
			
 
				-
			
 
				-###### PLUGIN DISCOVERY AND LOADING ########################################################
			
 
				-
			
 
				-def get_plugin_order(plugin_entrypoint: Path):
			
 
				-    order = 999
			
 
				-    try:
			
 
				-        # if .plugin_order file exists, use it to set the load priority
			
 
				-        order = int((plugin_entrypoint.parent / '.plugin_order').read_text())
			
 
				-    except FileNotFoundError:
			
 
				-        pass
			
 
				-    return (order, plugin_entrypoint)
			
 
				-
			
 
				-def register_hookspecs(hookspecs: List[str]):
			
 
				-    """
			
 
				-    Register all the hookspecs from a list of module names.
			
 
				-    """
			
 
				-    for hookspec_import_path in hookspecs:
			
 
				-        hookspec_module = importlib.import_module(hookspec_import_path)
			
 
				-        pm.add_hookspecs(hookspec_module)
			
 
				-
			
 
				-
			
 
				-def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
			
 
				-    """
			
 
				-    Find all the plugins in a given directory. Just looks for an __init__.py file.
			
 
				-    """
			
 
				-    return {
			
 
				-        f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
			
 
				-        for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order)
			
 
				-        if plugin_entrypoint.parent.name != 'abx'
			
 
				-    }   # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
			
 
				-
			
 
				-
			
 
				-def get_pip_installed_plugins(group='abx'):
			
 
				-    """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
			
 
				-    import importlib.metadata
			
 
				-
			
 
				-    DETECTED_PLUGINS = {}   # module_name: module_dir_path
			
 
				-    for dist in list(importlib.metadata.distributions()):
			
 
				-        for entrypoint in dist.entry_points:
			
 
				-            if entrypoint.group != group or pm.is_blocked(entrypoint.name):
			
 
				-                continue
			
 
				-            DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
			
 
				-            # pm.register(plugin, name=ep.name)
			
 
				-            # pm._plugin_distinfo.append((plugin, DistFacade(dist)))
			
 
				-    return DETECTED_PLUGINS
			
 
				-
			
 
				-
			
 
				-def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
			
 
				-    """
			
 
				-    Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories.
			
 
				-    """
			
 
				-    DETECTED_PLUGINS = {}
			
 
				-    for plugin_prefix, plugin_dir in plugin_dirs.items():
			
 
				-        DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
			
 
				-    return DETECTED_PLUGINS
			
 
				-
			
 
				-
			
 
				-# Load all plugins from pip packages, archivebox built-ins, and user plugins
			
 
				-
			
 
				-def load_plugins(plugins_dict: Dict[str, Path]):
			
 
				-    """
			
 
				-    Load all the plugins from a dictionary of module names and directory paths.
			
 
				-    """
			
 
				-    LOADED_PLUGINS = {}
			
 
				-    for plugin_module, plugin_dir in plugins_dict.items():
			
 
				-        # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
			
 
				-        plugin_module_loaded = importlib.import_module(plugin_module)
			
 
				-        pm.register(plugin_module_loaded)
			
 
				-        LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
			
 
				-        # print(f'    √ Loaded plugin: {plugin_module}')
			
 
				-    return LOADED_PLUGINS
			
 
				-
			
 
				-def get_registered_plugins():
			
 
				-    """
			
 
				-    Get all the plugins registered with Pluggy.
			
 
				-    """
			
 
				-    plugins = {}
			
 
				-    plugin_to_distinfo = dict(pm.list_plugin_distinfo())
			
 
				-    for plugin in pm.get_plugins():
			
 
				-        plugin_info = {
			
 
				-            "name": plugin.__name__,
			
 
				-            "hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
			
 
				-        }
			
 
				-        distinfo = plugin_to_distinfo.get(plugin)
			
 
				-        if distinfo:
			
 
				-            plugin_info["version"] = distinfo.version
			
 
				-            plugin_info["name"] = (
			
 
				-                getattr(distinfo, "name", None) or distinfo.project_name
			
 
				-            )
			
 
				-        plugins[plugin_info["name"]] = plugin_info
			
 
				-    return plugins
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]:
			
 
				-    """
			
 
				-    Get all the functions marked with @hookimpl on a module.
			
 
				-    """
			
 
				-    if not plugin_pkg:
			
 
				-        return {}
			
 
				-    
			
 
				-    hooks = {}
			
 
				-    
			
 
				-    plugin_module = importlib.import_module(plugin_pkg)
			
 
				-    for attr_name in dir(plugin_module):
			
 
				-        if attr_name.startswith('_'):
			
 
				-            continue
			
 
				-        try:
			
 
				-            attr = getattr(plugin_module, attr_name)
			
 
				-            if isinstance(attr, Callable):
			
 
				-                hooks[attr_name] = None
			
 
				-                pm.parse_hookimpl_opts(plugin_module, attr_name)
			
 
				-                hooks[attr_name] = attr
			
 
				-        except Exception as e:
			
 
				-            print(f'Error getting hookimpls for {plugin_pkg}: {e}')
			
 
				-
			
 
				-    return hooks
			
--- a/archivebox/abx/archivebox/__init__.py
+++ b/archivebox/abx/archivebox/__init__.py
@@ -1,30 +0,0 @@
 
				-__package__ = 'abx.archivebox'
			
 
				-
			
 
				-import os
			
 
				-import importlib
			
 
				-
			
 
				-from typing import Dict
			
 
				-from pathlib import Path
			
 
				-
			
 
				-
			
 
				-def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
			
 
				-    """Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
			
 
				-    LOADED_PLUGINS = {}
			
 
				-    for plugin_module, plugin_dir in reversed(plugins_dict.items()):
			
 
				-        # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
			
 
				-        
			
 
				-        # 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
			
 
				-        try:
			
 
				-            plugin_module_loaded = importlib.import_module(plugin_module)
			
 
				-            pm.register(plugin_module_loaded)
			
 
				-        except Exception as e:
			
 
				-            print(f'Error registering plugin: {plugin_module} - {e}')
			
 
				-            
			
 
				-        
			
 
				-        # 2. then try to import plugin_module.apps as well
			
 
				-        if os.access(plugin_dir / 'apps.py', os.R_OK):
			
 
				-            plugin_apps = importlib.import_module(plugin_module + '.apps')
			
 
				-            pm.register(plugin_apps)                                           # register the whole .apps  in case it contains loose hookimpls (not in a class)
			
 
				-            
			
 
				-        # print(f'    √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}')
			
 
				-    return LOADED_PLUGINS
			
--- a/archivebox/abx/archivebox/base_binary.py
+++ b/archivebox/abx/archivebox/base_binary.py
@@ -1,106 +0,0 @@
 
				-__package__ = "abx.archivebox"
			
 
				-
			
 
				-import os
			
 
				-from typing import Optional, cast
			
 
				-from typing_extensions import Self
			
 
				-
			
 
				-from pydantic import validate_call
			
 
				-from pydantic_pkgr import (
			
 
				-    Binary,
			
 
				-    BinProvider,
			
 
				-    BinProviderName,
			
 
				-    AptProvider,
			
 
				-    BrewProvider,
			
 
				-    EnvProvider,
			
 
				-)
			
 
				-
			
 
				-from archivebox.config.permissions import ARCHIVEBOX_USER
			
 
				-
			
 
				-
			
 
				-class BaseBinProvider(BinProvider):
			
 
				-    
			
 
				-    # TODO: add install/load/load_or_install methods as abx.hookimpl methods
			
 
				-    
			
 
				-    @property
			
 
				-    def admin_url(self) -> str:
			
 
				-        # e.g. /admin/environment/binproviders/NpmBinProvider/   TODO
			
 
				-        return "/admin/environment/binaries/"
			
 
				-
			
 
				-class BaseBinary(Binary):
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def symlink_to_lib(binary, bin_dir=None) -> None:
			
 
				-        from archivebox.config.common import STORAGE_CONFIG
			
 
				-        bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
			
 
				-        
			
 
				-        if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
			
 
				-            return
			
 
				-        
			
 
				-        try:
			
 
				-            bin_dir.mkdir(parents=True, exist_ok=True)
			
 
				-            symlink = bin_dir / binary.name
			
 
				-            symlink.unlink(missing_ok=True)
			
 
				-            symlink.symlink_to(binary.abspath)
			
 
				-            symlink.chmod(0o777)   # make sure its executable by everyone
			
 
				-        except Exception as err:
			
 
				-            # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
			
 
				-            # not actually needed, we can just run without it
			
 
				-            pass
			
 
				-        
			
 
				-    @validate_call
			
 
				-    def load(self, fresh=False, **kwargs) -> Self:
			
 
				-        from archivebox.config.common import STORAGE_CONFIG
			
 
				-        if fresh:
			
 
				-            binary = super().load(**kwargs)
			
 
				-            self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
			
 
				-        else:
			
 
				-            # get cached binary from db
			
 
				-            try:
			
 
				-                from machine.models import InstalledBinary
			
 
				-                installed_binary = InstalledBinary.objects.get_from_db_or_cache(self)    # type: ignore
			
 
				-                binary = InstalledBinary.load_from_db(installed_binary)
			
 
				-            except Exception:
			
 
				-                # maybe we are not in a DATA dir so there is no db, fallback to reading from fs
			
 
				-                # (e.g. when archivebox version is run outside of a DATA dir)
			
 
				-                binary = super().load(**kwargs)
			
 
				-        return cast(Self, binary)
			
 
				-    
			
 
				-    @validate_call
			
 
				-    def install(self, **kwargs) -> Self:
			
 
				-        from archivebox.config.common import STORAGE_CONFIG
			
 
				-        binary = super().install(**kwargs)
			
 
				-        self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
			
 
				-        return binary
			
 
				-    
			
 
				-    @validate_call
			
 
				-    def load_or_install(self, fresh=False, **kwargs) -> Self:
			
 
				-        from archivebox.config.common import STORAGE_CONFIG
			
 
				-        try:
			
 
				-            binary = self.load(fresh=fresh)
			
 
				-            if binary and binary.version:
			
 
				-                self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
			
 
				-                return binary
			
 
				-        except Exception:
			
 
				-            pass
			
 
				-        return self.install(**kwargs)
			
 
				-    
			
 
				-    @property
			
 
				-    def admin_url(self) -> str:
			
 
				-        # e.g. /admin/environment/config/LdapConfig/
			
 
				-        return f"/admin/environment/binaries/{self.name}/"
			
 
				-
			
 
				-
			
 
				-class AptBinProvider(AptProvider, BaseBinProvider):
			
 
				-    name: BinProviderName = "apt"
			
 
				-    
			
 
				-class BrewBinProvider(BrewProvider, BaseBinProvider):
			
 
				-    name: BinProviderName = "brew"
			
 
				-    
			
 
				-class EnvBinProvider(EnvProvider, BaseBinProvider):
			
 
				-    name: BinProviderName = "env"
			
 
				-    
			
 
				-    euid: Optional[int] = ARCHIVEBOX_USER
			
 
				-
			
 
				-apt = AptBinProvider()
			
 
				-brew = BrewBinProvider()
			
 
				-env = EnvBinProvider()
			
--- a/archivebox/abx/archivebox/base_extractor.py
+++ b/archivebox/abx/archivebox/base_extractor.py
@@ -1,219 +0,0 @@
 
				-__package__ = 'abx.archivebox'
			
 
				-
			
 
				-import json
			
 
				-import os
			
 
				-
			
 
				-from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
			
 
				-from typing_extensions import Self
			
 
				-from pathlib import Path
			
 
				-
			
 
				-from pydantic import model_validator, AfterValidator
			
 
				-from pydantic_pkgr import BinName
			
 
				-from django.utils.functional import cached_property
			
 
				-from django.utils import timezone
			
 
				-
			
 
				-import abx
			
 
				-
			
 
				-from .base_binary import BaseBinary
			
 
				-
			
 
				-
			
 
				-def no_empty_args(args: List[str]) -> List[str]:
			
 
				-    assert all(len(arg) for arg in args)
			
 
				-    return args
			
 
				-
			
 
				-ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
			
 
				-
			
 
				-HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
			
 
				-CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
			
 
				-
			
 
				-
			
 
				-class BaseExtractor:
			
 
				-    
			
 
				-    name: ExtractorName
			
 
				-    binary: BinName
			
 
				-
			
 
				-    output_path_func: HandlerFuncStr = 'self.get_output_path'
			
 
				-    should_extract_func: HandlerFuncStr = 'self.should_extract'
			
 
				-    extract_func: HandlerFuncStr = 'self.extract'
			
 
				-    exec_func: HandlerFuncStr = 'self.exec'
			
 
				-
			
 
				-    default_args: CmdArgsList = []
			
 
				-    extra_args: CmdArgsList = []
			
 
				-    args: Optional[CmdArgsList] = None
			
 
				-
			
 
				-    @model_validator(mode='after')
			
 
				-    def validate_model(self) -> Self:
			
 
				-        if self.args is None:
			
 
				-            self.args = [*self.default_args, *self.extra_args]
			
 
				-        return self
			
 
				-
			
 
				-
			
 
				-    def get_output_path(self, snapshot) -> Path:
			
 
				-        return Path(self.__class__.__name__.lower())
			
 
				-
			
 
				-    def should_extract(self, uri: str, config: dict | None=None) -> bool:
			
 
				-        try:
			
 
				-            assert self.detect_installed_binary().version
			
 
				-        except Exception:
			
 
				-            raise
			
 
				-            # could not load binary
			
 
				-            return False
			
 
				-        
			
 
				-        # output_dir = self.get_output_path(snapshot)
			
 
				-        # if output_dir.glob('*.*'):
			
 
				-        #     return False
			
 
				-        return True
			
 
				-
			
 
				-    @abx.hookimpl
			
 
				-    def extract(self, snapshot_id: str) -> Dict[str, Any]:
			
 
				-        from core.models import Snapshot
			
 
				-        from archivebox import CONSTANTS
			
 
				-        
			
 
				-        snapshot = Snapshot.objects.get(id=snapshot_id)
			
 
				-        
			
 
				-        if not self.should_extract(snapshot):
			
 
				-            return {}
			
 
				-        
			
 
				-        status = 'failed'
			
 
				-        start_ts = timezone.now()
			
 
				-        uplink = self.detect_network_interface()
			
 
				-        installed_binary = self.detect_installed_binary()
			
 
				-        machine = installed_binary.machine
			
 
				-        assert uplink.machine == installed_binary.machine  # it would be *very* weird if this wasn't true
			
 
				-        
			
 
				-        output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid)
			
 
				-        output_dir.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-        # execute the extractor binary with the given args
			
 
				-        args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
			
 
				-        cmd = [str(installed_binary.abspath), *args]
			
 
				-        proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
			
 
				-
			
 
				-        # collect the output
			
 
				-        end_ts = timezone.now()
			
 
				-        output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
			
 
				-        stdout = proc.stdout.strip()
			
 
				-        stderr = proc.stderr.strip()
			
 
				-        output_json = None
			
 
				-        output_text = stdout
			
 
				-        try:
			
 
				-            output_json = json.loads(stdout.strip())
			
 
				-            output_text = None
			
 
				-        except json.JSONDecodeError:
			
 
				-            pass
			
 
				-        
			
 
				-        errors = []
			
 
				-        if proc.returncode == 0:
			
 
				-            status = 'success'
			
 
				-        else:
			
 
				-            errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')   
			
 
				-
			
 
				-        # increment health stats counters
			
 
				-        if status == 'success':
			
 
				-            machine.record_health_success()
			
 
				-            uplink.record_health_success()
			
 
				-            installed_binary.record_health_success()
			
 
				-        else:
			
 
				-            machine.record_health_failure()
			
 
				-            uplink.record_health_failure()
			
 
				-            installed_binary.record_health_failure()
			
 
				-
			
 
				-        return {
			
 
				-            'extractor': self.name,
			
 
				-            
			
 
				-            'snapshot': {
			
 
				-                'id': snapshot.id,
			
 
				-                'abid': snapshot.abid,
			
 
				-                'url': snapshot.url,
			
 
				-                'created_by_id': snapshot.created_by_id,
			
 
				-            },
			
 
				-            
			
 
				-            'machine': {
			
 
				-                'id': machine.id,
			
 
				-                'abid': machine.abid,
			
 
				-                'guid': machine.guid,
			
 
				-                'hostname': machine.hostname,
			
 
				-                'hw_in_docker': machine.hw_in_docker,
			
 
				-                'hw_in_vm': machine.hw_in_vm,
			
 
				-                'hw_manufacturer': machine.hw_manufacturer,
			
 
				-                'hw_product': machine.hw_product,
			
 
				-                'hw_uuid': machine.hw_uuid,
			
 
				-                'os_arch': machine.os_arch,
			
 
				-                'os_family': machine.os_family,
			
 
				-                'os_platform': machine.os_platform,
			
 
				-                'os_release': machine.os_release,
			
 
				-                'os_kernel': machine.os_kernel,
			
 
				-            },
			
 
				-            
			
 
				-            'uplink': { 
			
 
				-                'id': uplink.id,
			
 
				-                'abid': uplink.abid,
			
 
				-                'mac_address': uplink.mac_address,
			
 
				-                'ip_public': uplink.ip_public,
			
 
				-                'ip_local': uplink.ip_local,
			
 
				-                'dns_server': uplink.dns_server,
			
 
				-                'hostname': uplink.hostname,
			
 
				-                'iface': uplink.iface,
			
 
				-                'isp': uplink.isp,
			
 
				-                'city': uplink.city,
			
 
				-                'region': uplink.region,
			
 
				-                'country': uplink.country,
			
 
				-            },
			
 
				-            
			
 
				-            'binary': {
			
 
				-                'id': installed_binary.id,
			
 
				-                'abid': installed_binary.abid,
			
 
				-                'name': installed_binary.name,
			
 
				-                'binprovider': installed_binary.binprovider,
			
 
				-                'abspath': installed_binary.abspath,
			
 
				-                'version': installed_binary.version,
			
 
				-                'sha256': installed_binary.sha256,
			
 
				-            },
			
 
				-
			
 
				-            'cmd': cmd,
			
 
				-            'stdout': stdout,
			
 
				-            'stderr': stderr,
			
 
				-            'returncode': proc.returncode,
			
 
				-            'start_ts': start_ts,
			
 
				-            'end_ts': end_ts,
			
 
				-            
			
 
				-            'status': status,
			
 
				-            'errors': errors,
			
 
				-            'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
			
 
				-            'output_files': output_files,
			
 
				-            'output_json': output_json or {},
			
 
				-            'output_text': output_text or '',
			
 
				-        }
			
 
				-
			
 
				-    # TODO: move this to a hookimpl
			
 
				-    def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
			
 
				-        cwd = cwd or Path(os.getcwd())
			
 
				-        binary = self.load_binary(installed_binary=installed_binary)
			
 
				-        
			
 
				-        return binary.exec(cmd=args, cwd=cwd)
			
 
				-    
			
 
				-    @cached_property
			
 
				-    def BINARY(self) -> BaseBinary:
			
 
				-        import abx.archivebox.reads
			
 
				-        for binary in abx.archivebox.reads.get_BINARIES().values():
			
 
				-            if binary.name == self.binary:
			
 
				-                return binary
			
 
				-        raise ValueError(f'Binary {self.binary} not found')
			
 
				-    
			
 
				-    def detect_installed_binary(self):
			
 
				-        from machine.models import InstalledBinary
			
 
				-        # hydrates binary from DB/cache if record of installed version is recent enough
			
 
				-        # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
			
 
				-        return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
			
 
				-
			
 
				-    def load_binary(self, installed_binary=None) -> BaseBinary:
			
 
				-        installed_binary = installed_binary or self.detect_installed_binary()
			
 
				-        return installed_binary.load_from_db()
			
 
				-    
			
 
				-    def detect_network_interface(self):
			
 
				-        from machine.models import NetworkInterface
			
 
				-        return NetworkInterface.objects.current()
			
 
				-
			
 
				-    @abx.hookimpl
			
 
				-    def get_EXTRACTORS(self):
			
 
				-        return [self]
			
--- a/archivebox/abx/archivebox/base_replayer.py
+++ b/archivebox/abx/archivebox/base_replayer.py
@@ -1,25 +0,0 @@
 
				-__package__ = 'abx.archivebox'
			
 
				-
			
 
				-import abx
			
 
				-
			
 
				-
			
 
				-class BaseReplayer:
			
 
				-    """Describes how to render an ArchiveResult in several contexts"""
			
 
				-    
			
 
				-    url_pattern: str = '*'
			
 
				-
			
 
				-    row_template: str = 'plugins/generic_replayer/templates/row.html'
			
 
				-    embed_template: str = 'plugins/generic_replayer/templates/embed.html'
			
 
				-    fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html'
			
 
				-
			
 
				-    # row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view'
			
 
				-    # embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view'
			
 
				-    # fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view'
			
 
				-    # icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
			
 
				-    # thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
			
 
				-
			
 
				-    @abx.hookimpl
			
 
				-    def get_REPLAYERS(self):
			
 
				-        return [self]
			
 
				-
			
 
				-    # TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...
			
--- a/archivebox/abx/archivebox/base_searchbackend.py
+++ b/archivebox/abx/archivebox/base_searchbackend.py
@@ -1,25 +0,0 @@
 
				-__package__ = 'abx.archivebox'
			
 
				-
			
 
				-from typing import Iterable, List
			
 
				-import abc
			
 
				-
			
 
				-
			
 
				-
			
 
				-class BaseSearchBackend(abc.ABC):
			
 
				-    name: str
			
 
				-
			
 
				-    @staticmethod
			
 
				-    @abc.abstractmethod
			
 
				-    def index(snapshot_id: str, texts: List[str]):
			
 
				-        return
			
 
				-
			
 
				-    @staticmethod
			
 
				-    @abc.abstractmethod
			
 
				-    def flush(snapshot_ids: Iterable[str]):
			
 
				-        return
			
 
				-
			
 
				-    @staticmethod
			
 
				-    @abc.abstractmethod
			
 
				-    def search(text: str) -> List[str]:
			
 
				-        raise NotImplementedError("search method must be implemented by subclass")
			
 
				-
			
--- a/archivebox/abx/archivebox/hookspec.py
+++ b/archivebox/abx/archivebox/hookspec.py
@@ -1,52 +0,0 @@
 
				-__package__ = 'abx.archivebox'
			
 
				-
			
 
				-from typing import Dict, Any
			
 
				-
			
 
				-from .. import hookspec
			
 
				-
			
 
				-from .base_binary import BaseBinary, BaseBinProvider
			
 
				-from .base_configset import BaseConfigSet
			
 
				-from .base_extractor import BaseExtractor
			
 
				-from .base_searchbackend import BaseSearchBackend
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
			
 
				-    return {}
			
 
				-
			
 
				-@hookspec
			
 
				-def get_CONFIG() -> Dict[str, BaseConfigSet]:
			
 
				-    return {}
			
 
				-
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
			
 
				-    return {}
			
 
				-
			
 
				-@hookspec
			
 
				-def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
			
 
				-    return {}
			
 
				-
			
 
				-# @hookspec
			
 
				-# def get_REPLAYERS() -> Dict[str, BaseReplayer]:
			
 
				-#     return {}
			
 
				-
			
 
				-# @hookspec
			
 
				-# def get_ADMINDATAVIEWS():
			
 
				-#     return {}
			
 
				-
			
 
				-# @hookspec
			
 
				-# def get_QUEUES():
			
 
				-#     return {}
			
 
				-
			
 
				-
			
 
				-##############################################################
			
 
				-# provided by abx.pydantic_pkgr.hookspec:
			
 
				-# @hookspec
			
 
				-# def get_BINARIES() -> Dict[str, BaseBinary]:
			
 
				-#     return {}
			
 
				-
			
 
				-# @hookspec
			
 
				-# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
			
 
				-#     return {}
			
--- a/archivebox/abx/archivebox/reads.py
+++ b/archivebox/abx/archivebox/reads.py
@@ -1,160 +0,0 @@
 
				-__package__ = 'abx.archivebox'
			
 
				-
			
 
				-import importlib
			
 
				-from typing import Dict, Set, Any, TYPE_CHECKING
			
 
				-
			
 
				-from benedict import benedict
			
 
				-
			
 
				-import abx
			
 
				-from .. import pm
			
 
				-
			
 
				-if TYPE_CHECKING:
			
 
				-    from .base_configset import BaseConfigSet
			
 
				-    from .base_binary import BaseBinary, BaseBinProvider
			
 
				-    from .base_extractor import BaseExtractor
			
 
				-    from .base_searchbackend import BaseSearchBackend
			
 
				-    # from .base_replayer import BaseReplayer
			
 
				-    # from .base_queue import BaseQueue
			
 
				-    # from .base_admindataview import BaseAdminDataView
			
 
				-
			
 
				-# API exposed to ArchiveBox code
			
 
				-
			
 
				-def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
			
 
				-    return benedict({
			
 
				-        plugin_id: plugin
			
 
				-        for plugin_dict in pm.hook.get_PLUGIN()
			
 
				-            for plugin_id, plugin in plugin_dict.items()
			
 
				-    })
			
 
				-
			
 
				-def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
			
 
				-    plugin_info = get_PLUGINS().get(plugin_id, {})
			
 
				-    package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
			
 
				-    if not package:
			
 
				-        return {'id': plugin_id, 'hooks': {}}
			
 
				-    module = importlib.import_module(package)
			
 
				-    hooks = abx.get_plugin_hooks(module.__package__)
			
 
				-    assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
			
 
				-    
			
 
				-    return benedict({
			
 
				-        'id': plugin_id,
			
 
				-        'label': getattr(module, '__label__', plugin_id),
			
 
				-        'module': module,
			
 
				-        'package': module.__package__,
			
 
				-        'hooks': hooks,
			
 
				-        'version': getattr(module, '__version__', '999.999.999'),
			
 
				-        'author': getattr(module, '__author__', 'Unknown'),
			
 
				-        'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
			
 
				-        'dependencies': getattr(module, '__dependencies__', []),
			
 
				-        'source_code': module.__file__,
			
 
				-        **plugin_info,
			
 
				-    })
			
 
				-    
			
 
				-
			
 
				-def get_HOOKS() -> Set[str]:
			
 
				-    return {
			
 
				-        hook_name
			
 
				-        for plugin_id in get_PLUGINS().keys()
			
 
				-            for hook_name in get_PLUGIN(plugin_id).hooks
			
 
				-    }
			
 
				-
			
 
				-def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
			
 
				-    return benedict({
			
 
				-        config_id: configset
			
 
				-        for plugin_configs in pm.hook.get_CONFIG()
			
 
				-            for config_id, configset in plugin_configs.items()
			
 
				-    })
			
 
				-
			
 
				-
			
 
				-def get_FLAT_CONFIG() -> Dict[str, Any]:
			
 
				-    return benedict({
			
 
				-        key: value
			
 
				-        for configset in get_CONFIGS().values()
			
 
				-            for key, value in configset.model_dump().items()
			
 
				-    })
			
 
				-
			
 
				-def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
			
 
				-    # TODO: move these to plugins
			
 
				-    from abx.archivebox.base_binary import apt, brew, env
			
 
				-    builtin_binproviders = {
			
 
				-        'env': env,
			
 
				-        'apt': apt,
			
 
				-        'brew': brew,
			
 
				-    }
			
 
				-    
			
 
				-    return benedict({
			
 
				-        binprovider_id: binprovider
			
 
				-        for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
			
 
				-            for binprovider_id, binprovider in plugin_binproviders.items()
			
 
				-    })
			
 
				-
			
 
				-def get_BINARIES() -> Dict[str, 'BaseBinary']:
			
 
				-    return benedict({
			
 
				-        binary_id: binary
			
 
				-        for plugin_binaries in pm.hook.get_BINARIES()
			
 
				-            for binary_id, binary in plugin_binaries.items()
			
 
				-    })
			
 
				-
			
 
				-def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
			
 
				-    return benedict({
			
 
				-        extractor_id: extractor
			
 
				-        for plugin_extractors in pm.hook.get_EXTRACTORS()
			
 
				-            for extractor_id, extractor in plugin_extractors.items()
			
 
				-    })
			
 
				-
			
 
				-# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
			
 
				-#     return benedict({
			
 
				-#         replayer.id: replayer
			
 
				-#         for plugin_replayers in pm.hook.get_REPLAYERS()
			
 
				-#             for replayer in plugin_replayers
			
 
				-#     })
			
 
				-
			
 
				-# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
			
 
				-#     return benedict({
			
 
				-#         admin_dataview.id: admin_dataview
			
 
				-#         for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
			
 
				-#             for admin_dataview in plugin_admin_dataviews
			
 
				-#     })
			
 
				-
			
 
				-# def get_QUEUES() -> Dict[str, 'BaseQueue']:
			
 
				-#     return benedict({
			
 
				-#         queue.id: queue
			
 
				-#         for plugin_queues in pm.hook.get_QUEUES()
			
 
				-#             for queue in plugin_queues
			
 
				-#     })
			
 
				-
			
 
				-def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
			
 
				-    return benedict({
			
 
				-        searchbackend_id: searchbackend
			
 
				-        for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
			
 
				-            for searchbackend_id,searchbackend in plugin_searchbackends.items()
			
 
				-    })
			
 
				-
			
 
				-
			
 
				-
			
 
				-def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
			
 
				-    """Get all the relevant config for the given scope, in correct precedence order"""
			
 
				-    
			
 
				-    from django.conf import settings
			
 
				-    default_config: benedict = defaults or settings.CONFIG
			
 
				-    
			
 
				-    snapshot = snapshot or (archiveresult and archiveresult.snapshot)
			
 
				-    crawl = crawl or (snapshot and snapshot.crawl)
			
 
				-    seed = seed or (crawl and crawl.seed)
			
 
				-    persona = persona or (crawl and crawl.persona)
			
 
				-    
			
 
				-    persona_config = persona.config if persona else {}
			
 
				-    seed_config = seed.config if seed else {}
			
 
				-    crawl_config = crawl.config if crawl else {}
			
 
				-    snapshot_config = snapshot.config if snapshot else {}
			
 
				-    archiveresult_config = archiveresult.config if archiveresult else {}
			
 
				-    extra_config = extra_config or {}
			
 
				-    
			
 
				-    return {
			
 
				-        **default_config,               # defaults / config file / environment variables
			
 
				-        **persona_config,               # lowest precedence
			
 
				-        **seed_config,
			
 
				-        **crawl_config,
			
 
				-        **snapshot_config,
			
 
				-        **archiveresult_config,
			
 
				-        **extra_config,                 # highest precedence
			
 
				-    }
			
--- a/archivebox/abx/django/__init__.py
+++ b/archivebox/abx/django/__init__.py
@@ -1 +0,0 @@
 
				-__package__ = 'abx.django'
			
--- a/archivebox/abx/django/apps.py
+++ b/archivebox/abx/django/apps.py
@@ -1,13 +0,0 @@
 
				-__package__ = 'abx.django'
			
 
				-
			
 
				-from django.apps import AppConfig
			
 
				-
			
 
				-
			
 
				-class ABXConfig(AppConfig):
			
 
				-    name = 'abx'
			
 
				-
			
 
				-    def ready(self):
			
 
				-        import abx
			
 
				-        from django.conf import settings
			
 
				-        
			
 
				-        abx.pm.hook.ready(settings=settings)
			
--- a/archivebox/abx/django/hookspec.py
+++ b/archivebox/abx/django/hookspec.py
@@ -1,125 +0,0 @@
 
				-__package__ = 'abx.django'
			
 
				-
			
 
				-from ..hookspec import hookspec
			
 
				-
			
 
				-
			
 
				-###########################################################################################
			
 
				-
			
 
				-@hookspec
			
 
				-def get_INSTALLED_APPS():
			
 
				-    """Return a list of apps to add to INSTALLED_APPS"""
			
 
				-    # e.g. ['your_plugin_type.plugin_name']
			
 
				-    return []
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_INSTALLED_APPS(INSTALLED_APPS):
			
 
				-#     """Mutate INSTALLED_APPS in place to add your app in a specific position"""
			
 
				-#     # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
			
 
				-#     # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_TEMPLATE_DIRS():
			
 
				-    return []     # e.g. ['your_plugin_type/plugin_name/templates']
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
			
 
				-#     """Install django settings"""
			
 
				-#     # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_STATICFILES_DIRS():
			
 
				-    return []     # e.g. ['your_plugin_type/plugin_name/static']
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_STATICFILES_DIRS(STATICFILES_DIRS):
			
 
				-#     """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
			
 
				-#     # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_MIDDLEWARE():
			
 
				-    return []     # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_MIDDLEWARE(MIDDLEWARE):
			
 
				-#     """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
			
 
				-#     # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_AUTHENTICATION_BACKENDS():
			
 
				-    return []     # e.g. ['django_auth_ldap.backend.LDAPBackend']
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
			
 
				-#     """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
			
 
				-#     # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
			
 
				-#     pass
			
 
				-
			
 
				-@hookspec
			
 
				-def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME):
			
 
				-    return []     # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_DJANGO_HUEY(DJANGO_HUEY):
			
 
				-#     """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
			
 
				-#     # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def get_ADMIN_DATA_VIEWS_URLS():
			
 
				-    return []
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
			
 
				-#     """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
			
 
				-#     # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_settings(settings):
			
 
				-#     """Mutate settings in place to add your settings / modify existing settings"""
			
 
				-#     # settings.SOME_KEY = 'some_value'
			
 
				-#     pass
			
 
				-
			
 
				-
			
 
				-###########################################################################################
			
 
				-
			
 
				-@hookspec
			
 
				-def get_urlpatterns():
			
 
				-    return []     # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
			
 
				-
			
 
				-# @hookspec
			
 
				-# def register_urlpatterns(urlpatterns):
			
 
				-#     """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
			
 
				-#     # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
			
 
				-#     pass
			
 
				-
			
 
				-###########################################################################################
			
 
				-
			
 
				-@hookspec
			
 
				-def register_checks():
			
 
				-    """Register django checks with django system checks system"""
			
 
				-    pass
			
 
				-
			
 
				-@hookspec
			
 
				-def register_admin(admin_site):
			
 
				-    """Register django admin views/models with the main django admin site instance"""
			
 
				-    pass
			
 
				-
			
 
				-
			
 
				-###########################################################################################
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-def ready():
			
 
				-    """Called when Django apps app.ready() are triggered"""
			
 
				-    pass
			
--- a/archivebox/abx/django/use.py
+++ b/archivebox/abx/django/use.py
@@ -1,101 +0,0 @@
 
				-__package__ = 'abx.django'
			
 
				-
			
 
				-import itertools
			
 
				-# from benedict import benedict
			
 
				-
			
 
				-from .. import pm
			
 
				-
			
 
				-
			
 
				-def get_INSTALLED_APPS():
			
 
				-    return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
			
 
				-
			
 
				-# def register_INSTALLLED_APPS(INSTALLED_APPS):
			
 
				-#     pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
			
 
				-
			
 
				-
			
 
				-def get_MIDDLEWARES():
			
 
				-    return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
			
 
				-
			
 
				-# def register_MIDDLEWARES(MIDDLEWARE):
			
 
				-#     pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
			
 
				-
			
 
				-
			
 
				-def get_AUTHENTICATION_BACKENDS():
			
 
				-    return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
			
 
				-
			
 
				-# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
			
 
				-#     pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
			
 
				-
			
 
				-
			
 
				-def get_STATICFILES_DIRS():
			
 
				-    return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
			
 
				-
			
 
				-# def register_STATICFILES_DIRS(STATICFILES_DIRS):
			
 
				-#     pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
			
 
				-
			
 
				-
			
 
				-def get_TEMPLATE_DIRS():
			
 
				-    return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
			
 
				-
			
 
				-# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
			
 
				-#     pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
			
 
				-
			
 
				-def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
			
 
				-    HUEY_QUEUES = {}
			
 
				-    for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
			
 
				-        HUEY_QUEUES.update(plugin_result)
			
 
				-    return HUEY_QUEUES
			
 
				-
			
 
				-# def register_DJANGO_HUEY(DJANGO_HUEY):
			
 
				-#     pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
			
 
				-
			
 
				-def get_ADMIN_DATA_VIEWS_URLS():
			
 
				-    return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
			
 
				-
			
 
				-# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
			
 
				-#     pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
			
 
				-
			
 
				-
			
 
				-# def register_settings(settings):
			
 
				-#     # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
			
 
				-#     settings_as_obj = benedict(settings, keypath_separator=None)
			
 
				-    
			
 
				-#     # set default values for settings that are used by plugins
			
 
				-#     # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
			
 
				-#     # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
			
 
				-#     # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
			
 
				-#     # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
			
 
				-#     # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
			
 
				-#     # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
			
 
				-#     # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
			
 
				-    
			
 
				-#     # # call all the hook functions to mutate the settings values in-place
			
 
				-#     # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
			
 
				-#     # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
			
 
				-#     # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
			
 
				-#     # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
			
 
				-#     # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
			
 
				-#     # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
			
 
				-#     # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
			
 
				-    
			
 
				-#     # calls Plugin.settings(settings) on each registered plugin
			
 
				-#     pm.hook.register_settings(settings=settings_as_obj)
			
 
				-    
			
 
				-#     # then finally update the settings globals() object will all the new settings
			
 
				-#     # settings.update(settings_as_obj)
			
 
				-
			
 
				-
			
 
				-def get_urlpatterns():
			
 
				-    return list(itertools.chain(*pm.hook.urlpatterns()))
			
 
				-
			
 
				-def register_urlpatterns(urlpatterns):
			
 
				-    pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
			
 
				-
			
 
				-
			
 
				-def register_checks():
			
 
				-    """register any django system checks"""
			
 
				-    pm.hook.register_checks()
			
 
				-
			
 
				-def register_admin(admin_site):
			
 
				-    """register any django admin models/views with the main django admin site instance"""
			
 
				-    pm.hook.register_admin(admin_site=admin_site)
			
--- a/archivebox/abx/hookspec.py
+++ b/archivebox/abx/hookspec.py
@@ -1,22 +0,0 @@
 
				-from pathlib import Path
			
 
				-
			
 
				-from pluggy import HookimplMarker
			
 
				-from pluggy import HookspecMarker
			
 
				-
			
 
				-spec = hookspec = HookspecMarker("abx")
			
 
				-impl = hookimpl = HookimplMarker("abx")
			
 
				-
			
 
				-
			
 
				-@hookspec
			
 
				-@hookimpl
			
 
				-def get_system_user() -> str:
			
 
				-    # Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases
			
 
				-    # - sudo (EUD != UID != SUID)
			
 
				-    # - running with an autodetected UID based on data dir ownership
			
 
				-    #   but mapping of UID:username is broken because it was created
			
 
				-    #   by a different host system, e.g. 911's $HOME outside of docker
			
 
				-    #   might be /usr/lib/lxd instead of /home/archivebox
			
 
				-    # - running as a user that doens't have a home directory
			
 
				-    # - home directory is set to a path that doesn't exist, or is inside a dir we cant read
			
 
				-    return Path('~').expanduser().name
			
 
				-
			
--- a/archivebox/abx/manager.py
+++ b/archivebox/abx/manager.py
@@ -1,30 +0,0 @@
 
				-import inspect
			
 
				-
			
 
				-import pluggy
			
 
				-
			
 
				-
			
 
				-class PluginManager(pluggy.PluginManager):
			
 
				-    """
			
 
				-    Patch to fix pluggy's PluginManager to work with pydantic models.
			
 
				-    See: https://github.com/pytest-dev/pluggy/pull/536
			
 
				-    """
			
 
				-    def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
			
 
				-        # IMPORTANT: @property methods can have side effects, and are never hookimpl
			
 
				-        # if attr is a property, skip it in advance
			
 
				-        plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
			
 
				-        if isinstance(getattr(plugin_class, name, None), property):
			
 
				-            return None
			
 
				-
			
 
				-        # pydantic model fields are like attrs and also can never be hookimpls
			
 
				-        plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
			
 
				-        if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
			
 
				-            # pydantic models mess with the class and attr __signature__
			
 
				-            # so inspect.isroutine(...) throws exceptions and cant be used
			
 
				-            return None
			
 
				-        
			
 
				-        try:
			
 
				-            return super().parse_hookimpl_opts(plugin, name)
			
 
				-        except AttributeError:
			
 
				-            return super().parse_hookimpl_opts(type(plugin), name)
			
 
				-
			
 
				-pm = PluginManager("abx")
			
--- a/archivebox/abx/pydantic_pkgr/__init__.py
+++ b/archivebox/abx/pydantic_pkgr/__init__.py
@@ -1 +0,0 @@
 
				-__package__ = 'abx.pydantic_pkgr'
			
--- a/archivebox/abx/pydantic_pkgr/hookspec.py
+++ b/archivebox/abx/pydantic_pkgr/hookspec.py
@@ -1,13 +0,0 @@
 
				-
			
 
				-from ..hookspec import hookspec
			
 
				-
			
 
				-###########################################################################################
			
 
				-
			
 
				-@hookspec
			
 
				-def get_BINPROVIDERS():
			
 
				-    return {}
			
 
				-
			
 
				-@hookspec
			
 
				-def get_BINARIES():
			
 
				-    return {}
			
 
				-
			
--- a/archivebox/plugins_auth/__init__.py
+++ b/archivebox/plugins_auth/__init__.py
--- a/archivebox/actors/actor.py
+++ b/archivebox/actors/actor.py
@@ -0,0 +1,313 @@
 
				+__package__ = 'archivebox.actors'
			
 
				+
			
 
				+import os
			
 
				+import time
			
 
				+from abc import ABC, abstractmethod
			
 
				+from typing import ClassVar, Generic, TypeVar, Any, cast, Literal, Type
			
 
				+from django.utils.functional import classproperty
			
 
				+
			
 
				+from rich import print
			
 
				+import psutil
			
 
				+
			
 
				+from django import db
			
 
				+from django.db import models
			
 
				+from django.db.models import QuerySet
			
 
				+from multiprocessing import Process, cpu_count
			
 
				+from threading import Thread, get_native_id
			
 
				+
			
 
				+# from archivebox.logging_util import TimedProgress
			
 
				+
			
 
				+LaunchKwargs = dict[str, Any]
			
 
				+
			
 
				+ModelType = TypeVar('ModelType', bound=models.Model)
			
 
				+
			
 
				+class ActorType(ABC, Generic[ModelType]):
			
 
				+    """
			
 
				+    Base class for all actors. Usage:
			
 
				+    class FaviconActor(ActorType[ArchiveResult]):
			
 
				+        QUERYSET: ClassVar[QuerySet] = ArchiveResult.objects.filter(status='queued', extractor='favicon')
			
 
				+        CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
			
 
				+        CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
			
 
				+        ATOMIC: ClassVar[bool] = True
			
 
				+
			
 
				+        def claim_sql_set(self, obj: ArchiveResult) -> str:
			
 
				+            # SQL fields to update atomically while claiming an object from the queue
			
 
				+            retry_at = datetime.now() + timedelta(seconds=self.MAX_TICK_TIME)
			
 
				+            return f"status = 'started', locked_by = {self.pid}, retry_at = {retry_at}"
			
 
				+
			
 
				+        def tick(self, obj: ArchiveResult) -> None:
			
 
				+            run_favicon_extractor(obj)
			
 
				+            ArchiveResult.objects.filter(pk=obj.pk, status='started').update(status='success')
			
 
				+    """
			
 
				+    pid: int
			
 
				+    idle_count: int = 0
			
 
				+    launch_kwargs: LaunchKwargs = {}
			
 
				+    mode: Literal['thread', 'process'] = 'process'
			
 
				+    
			
 
				+    MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8)   # min 2, max 8, up to 60% of available cpu cores
			
 
				+    MAX_TICK_TIME: ClassVar[int] = 60                          # maximum duration in seconds to process a single object
			
 
				+    
			
 
				+    QUERYSET: ClassVar[QuerySet]                      # the QuerySet to claim objects from
			
 
				+    CLAIM_WHERE: ClassVar[str] = 'status = "queued"'  # the WHERE clause to filter the objects when atomically getting the next object from the queue
			
 
				+    CLAIM_SET: ClassVar[str] = 'status = "started"'   # the SET clause to claim the object when atomically getting the next object from the queue
			
 
				+    CLAIM_ORDER: ClassVar[str] = 'created_at DESC'    # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue
			
 
				+    CLAIM_FROM_TOP: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10  # the number of objects to consider when atomically getting the next object from the queue
			
 
				+    ATOMIC: ClassVar[bool] = True                     # whether to atomically fetch+claim the nextobject in one step, or fetch and lock it in two steps
			
 
				+    
			
 
				+    # model_type: Type[ModelType]
			
 
				+    
			
 
				+    _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = []   # record all the pids of Actors spawned by this class
			
 
				+    
			
 
				+    def __init__(self, mode: Literal['thread', 'process']|None=None, **launch_kwargs: LaunchKwargs):
			
 
				+        self.mode = mode or self.mode
			
 
				+        self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs)
			
 
				+    
			
 
				+    @classproperty
			
 
				+    def name(cls) -> str:
			
 
				+        return cls.__name__  # type: ignore
			
 
				+    
			
 
				+    def __str__(self) -> str:
			
 
				+        return self.__repr__()
			
 
				+    
			
 
				+    def __repr__(self) -> str:
			
 
				+        """FaviconActor[pid=1234]"""
			
 
				+        label = 'pid' if self.mode == 'process' else 'tid'
			
 
				+        return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]'
			
 
				+    
			
 
				+    ### Class Methods: Called by Orchestrator on ActorType class before it has been spawned
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def get_running_actors(cls) -> list[int]:
			
 
				+        """returns a list of pids of all running actors of this type"""
			
 
				+        # WARNING: only works for process actors, not thread actors
			
 
				+        if cls.mode == 'thread':
			
 
				+            raise NotImplementedError('get_running_actors() is not implemented for thread actors')
			
 
				+        return [
			
 
				+            proc.pid for proc in cls._SPAWNED_ACTOR_PIDS
			
 
				+            if proc.is_running() and proc.status() != 'zombie'
			
 
				+        ]
			
 
				+        
			
 
				+    @classmethod
			
 
				+    def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]:
			
 
				+        """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors"""
			
 
				+        queue_length = queue.count()
			
 
				+        if not queue_length:                                      # queue is empty, spawn 0 actors
			
 
				+            return []
			
 
				+        
			
 
				+        actors_to_spawn: list[LaunchKwargs] = []
			
 
				+        max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors)
			
 
				+        
			
 
				+        # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them,
			
 
				+        # let the next orchestrator tick handle starting another 2 on the next tick()
			
 
				+        # if queue_length > 10:                                   # queue is long, spawn as many as possible
			
 
				+        #   actors_to_spawn += max_spawnable * [{}]
			
 
				+        
			
 
				+        if queue_length > 4:                                    # queue is medium, spawn 1 or 2 actors
			
 
				+            actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}]
			
 
				+        else:                                                     # queue is short, spawn 1 actor
			
 
				+            actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}]
			
 
				+        return actors_to_spawn
			
 
				+        
			
 
				+    @classmethod
			
 
				+    def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int:
			
 
				+        if mode == 'thread':
			
 
				+            return cls.fork_actor_as_thread(**launch_kwargs)
			
 
				+        elif mode == 'process':
			
 
				+            return cls.fork_actor_as_process(**launch_kwargs)
			
 
				+        raise ValueError(f'Invalid actor mode: {mode} must be "thread" or "process"')
			
 
				+        
			
 
				+    @classmethod
			
 
				+    def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int:
			
 
				+        """Spawn a new background thread running the actor's runloop"""
			
 
				+        actor = cls(mode='thread', **launch_kwargs)
			
 
				+        bg_actor_thread = Thread(target=actor.runloop)
			
 
				+        bg_actor_thread.start()
			
 
				+        assert bg_actor_thread.native_id is not None
			
 
				+        return bg_actor_thread.native_id
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int:
			
 
				+        """Spawn a new background process running the actor's runloop"""
			
 
				+        actor = cls(mode='process', **launch_kwargs)
			
 
				+        bg_actor_process = Process(target=actor.runloop)
			
 
				+        bg_actor_process.start()
			
 
				+        assert bg_actor_process.pid is not None
			
 
				+        cls._SPAWNED_ACTOR_PIDS.append(psutil.Process(pid=bg_actor_process.pid))
			
 
				+        return bg_actor_process.pid
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def get_model(cls) -> Type[ModelType]:
			
 
				+        # wish this was a @classproperty but Generic[ModelType] return type cant be statically inferred for @classproperty
			
 
				+        return cls.QUERYSET.model
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def get_queue(cls) -> QuerySet:
			
 
				+        """override this to provide your queryset as the queue"""
			
 
				+        # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot'))
			
 
				+        return cls.QUERYSET
			
 
				+    
			
 
				+    ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process)
			
 
				+    
			
 
				+    def runloop(self):
			
 
				+        """The main runloop that starts running when the actor is spawned (as subprocess or thread) and exits when the queue is empty"""
			
 
				+        self.on_startup()
			
 
				+        try:
			
 
				+            while True:
			
 
				+                obj_to_process: ModelType | None = None
			
 
				+                try:
			
 
				+                    obj_to_process = cast(ModelType, self.get_next(atomic=self.atomic))
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+                
			
 
				+                if obj_to_process:
			
 
				+                    self.idle_count = 0   # reset idle count if we got an object
			
 
				+                else:
			
 
				+                    if self.idle_count >= 30:
			
 
				+                        break             # stop looping and exit if queue is empty and we have idled for 30sec
			
 
				+                    else:
			
 
				+                        # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...')
			
 
				+                        self.idle_count += 1
			
 
				+                        time.sleep(1)
			
 
				+                        continue
			
 
				+                
			
 
				+                self.on_tick_start(obj_to_process)
			
 
				+                
			
 
				+                # Process the object
			
 
				+                try:
			
 
				+                    self.tick(obj_to_process)
			
 
				+                except Exception as err:
			
 
				+                    print(f'[red]🏃‍♂️ ERROR: {self}.tick()[/red]', err)
			
 
				+                    db.connections.close_all()                         # always reset the db connection after an exception to clear any pending transactions
			
 
				+                    self.on_tick_exception(obj_to_process, err)
			
 
				+                finally:
			
 
				+                    self.on_tick_end(obj_to_process)
			
 
				+            
			
 
				+            self.on_shutdown(err=None)
			
 
				+        except BaseException as err:
			
 
				+            if isinstance(err, KeyboardInterrupt):
			
 
				+                print()
			
 
				+            else:
			
 
				+                print(f'\n[red]🏃‍♂️ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err)
			
 
				+            self.on_shutdown(err=err)
			
 
				+    
			
 
				+    def get_next(self, atomic: bool | None=None) -> ModelType | None:
			
 
				+        """get the next object from the queue, atomically locking it if self.atomic=True"""
			
 
				+        if atomic is None:
			
 
				+            atomic = self.ATOMIC
			
 
				+
			
 
				+        if atomic:
			
 
				+            # fetch and claim the next object from in the queue in one go atomically
			
 
				+            obj = self.get_next_atomic()
			
 
				+        else:
			
 
				+            # two-step claim: fetch the next object and lock it in a separate query
			
 
				+            obj = self.get_queue().last()
			
 
				+            assert obj and self.lock_next(obj), f'Unable to fetch+lock the next {self.get_model().__name__} ojbect from {self}.QUEUE'
			
 
				+        return obj
			
 
				+    
			
 
				+    def lock_next(self, obj: ModelType) -> bool:
			
 
				+        """override this to implement a custom two-step (non-atomic)lock mechanism"""
			
 
				+        # For example:
			
 
				+        # assert obj._model.objects.filter(pk=obj.pk, status='queued').update(status='started', locked_by=self.pid)
			
 
				+        # Not needed if using get_next_and_lock() to claim the object atomically
			
 
				+        # print(f'[blue]🏃‍♂️ {self}.lock()[/blue]', obj.abid or obj.id)
			
 
				+        return True
			
 
				+    
			
 
				+    def claim_sql_where(self) -> str:
			
 
				+        """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """
			
 
				+        return self.CLAIM_WHERE
			
 
				+    
			
 
				+    def claim_sql_set(self) -> str:
			
 
				+        """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """
			
 
				+        return self.CLAIM_SET
			
 
				+    
			
 
				+    def claim_sql_order(self) -> str:
			
 
				+        """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """
			
 
				+        return self.CLAIM_ORDER
			
 
				+    
			
 
				+    def claim_from_top(self) -> int:
			
 
				+        """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue"""
			
 
				+        return self.CLAIM_FROM_TOP
			
 
				+        
			
 
				+    def get_next_atomic(self, shallow: bool=True) -> ModelType | None:
			
 
				+        """
			
 
				+        claim a random object from the top n=50 objects in the queue (atomically updates status=queued->started for claimed object)
			
 
				+        optimized for minimizing contention on the queue with other actors selecting from the same list
			
 
				+        slightly faster than claim_any_obj() which selects randomly from the entire queue but needs to know the total count
			
 
				+        """
			
 
				+        Model = self.get_model()                                     # e.g. ArchiveResult
			
 
				+        table = f'{Model._meta.app_label}_{Model._meta.model_name}'  # e.g. core_archiveresult
			
 
				+        
			
 
				+        where_sql = self.claim_sql_where()
			
 
				+        set_sql = self.claim_sql_set()
			
 
				+        order_by_sql = self.claim_sql_order()
			
 
				+        choose_from_top = self.claim_from_top()
			
 
				+        
			
 
				+        with db.connection.cursor() as cursor:
			
 
				+            # subquery gets the pool of the top 50 candidates sorted by sort and order
			
 
				+            # main query selects a random one from that pool
			
 
				+            cursor.execute(f"""
			
 
				+                UPDATE {table} 
			
 
				+                SET {set_sql}
			
 
				+                WHERE {where_sql} and id = (
			
 
				+                    SELECT id FROM (
			
 
				+                        SELECT id FROM {table}
			
 
				+                        WHERE {where_sql}
			
 
				+                        ORDER BY {order_by_sql}
			
 
				+                        LIMIT {choose_from_top}
			
 
				+                    ) candidates
			
 
				+                    ORDER BY RANDOM()
			
 
				+                    LIMIT 1
			
 
				+                )
			
 
				+                RETURNING id;
			
 
				+            """)
			
 
				+            result = cursor.fetchone()
			
 
				+            
			
 
				+            if result is None:
			
 
				+                return None           # If no rows were claimed, return None
			
 
				+
			
 
				+            if shallow:
			
 
				+                # shallow: faster, returns potentially incomplete object instance missing some django auto-populated fields:
			
 
				+                columns = [col[0] for col in cursor.description or ['id']]
			
 
				+                return Model(**dict(zip(columns, result)))
			
 
				+
			
 
				+            # if not shallow do one extra query to get a more complete object instance (load it fully from scratch)
			
 
				+            return Model.objects.get(id=result[0])
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def tick(self, obj: ModelType) -> None:
			
 
				+        """override this to process the object"""
			
 
				+        print(f'[blue]🏃‍♂️ {self}.tick()[/blue]', obj.abid or obj.id)
			
 
				+        # For example:
			
 
				+        # do_some_task(obj)
			
 
				+        # do_something_else(obj)
			
 
				+        # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success')
			
 
				+        raise NotImplementedError('tick() must be implemented by the Actor subclass')
			
 
				+    
			
 
				+    def on_startup(self) -> None:
			
 
				+        if self.mode == 'thread':
			
 
				+            self.pid = get_native_id()  # thread id
			
 
				+            print(f'[green]🏃‍♂️ {self}.on_startup() STARTUP (THREAD)[/green]')
			
 
				+        else:
			
 
				+            self.pid = os.getpid()      # process id
			
 
				+            print(f'[green]🏃‍♂️ {self}.on_startup() STARTUP (PROCESS)[/green]')
			
 
				+        # abx.pm.hook.on_actor_startup(self)
			
 
				+        
			
 
				+    def on_shutdown(self, err: BaseException | None=None) -> None:
			
 
				+        print(f'[grey53]🏃‍♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
			
 
				+        # abx.pm.hook.on_actor_shutdown(self)
			
 
				+        
			
 
				+    def on_tick_start(self, obj: ModelType) -> None:
			
 
				+        # print(f'🏃‍♂️ {self}.on_tick_start()', obj.abid or obj.id)
			
 
				+        # abx.pm.hook.on_actor_tick_start(self, obj_to_process)
			
 
				+        # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix='      ')
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_end(self, obj: ModelType) -> None:
			
 
				+        # print(f'🏃‍♂️ {self}.on_tick_end()', obj.abid or obj.id)
			
 
				+        # abx.pm.hook.on_actor_tick_end(self, obj_to_process)
			
 
				+        # self.timer.end()
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_exception(self, obj: ModelType, err: BaseException) -> None:
			
 
				+        print(f'[red]🏃‍♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err)
			
 
				+        # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err)
			
--- a/archivebox/actors/admin.py
+++ b/archivebox/actors/admin.py
@@ -0,0 +1,3 @@
 
				+from django.contrib import admin
			
 
				+
			
 
				+# Register your models here.
			
--- a/archivebox/actors/apps.py
+++ b/archivebox/actors/apps.py
@@ -0,0 +1,6 @@
 
				+from django.apps import AppConfig
			
 
				+
			
 
				+
			
 
				+class ActorsConfig(AppConfig):
			
 
				+    default_auto_field = "django.db.models.BigAutoField"
			
 
				+    name = "actors"
			
--- a/archivebox/actors/migrations/__init__.py
+++ b/archivebox/actors/migrations/__init__.py
--- a/archivebox/actors/models.py
+++ b/archivebox/actors/models.py
@@ -0,0 +1,3 @@
 
				+from django.db import models
			
 
				+
			
 
				+# Create your models here.
			
--- a/archivebox/actors/orchestrator.py
+++ b/archivebox/actors/orchestrator.py
@@ -0,0 +1,244 @@
 
				+__package__ = 'archivebox.actors'
			
 
				+
			
 
				+import os
			
 
				+import time
			
 
				+import itertools
			
 
				+from typing import Dict, Type, Literal, ClassVar
			
 
				+from django.utils.functional import classproperty
			
 
				+
			
 
				+from multiprocessing import Process, cpu_count
			
 
				+from threading import Thread, get_native_id
			
 
				+
			
 
				+
			
 
				+from rich import print
			
 
				+
			
 
				+from django.db.models import QuerySet
			
 
				+
			
 
				+from django.apps import apps
			
 
				+from .actor import ActorType
			
 
				+
			
 
				+class Orchestrator:
			
 
				+    pid: int
			
 
				+    idle_count: int = 0
			
 
				+    actor_types: Dict[str, Type[ActorType]]
			
 
				+    mode: Literal['thread', 'process'] = 'process'
			
 
				+
			
 
				+    def __init__(self, actor_types: Dict[str, Type[ActorType]] | None = None, mode: Literal['thread', 'process'] | None=None):
			
 
				+        self.actor_types = actor_types or self.actor_types or self.autodiscover_actor_types()
			
 
				+        self.mode = mode or self.mode
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        label = 'tid' if self.mode == 'thread' else 'pid'
			
 
				+        return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]'
			
 
				+    
			
 
				+    def __str__(self) -> str:
			
 
				+        return self.__repr__()
			
 
				+    
			
 
				+    @classproperty
			
 
				+    def name(cls) -> str:
			
 
				+        return cls.__name__   # type: ignore
			
 
				+    
			
 
				+    def fork_as_thread(self):
			
 
				+        self.thread = Thread(target=self.runloop)
			
 
				+        self.thread.start()
			
 
				+        assert self.thread.native_id is not None
			
 
				+        return self.thread.native_id
			
 
				+    
			
 
				+    def fork_as_process(self):
			
 
				+        self.process = Process(target=self.runloop)
			
 
				+        self.process.start()
			
 
				+        assert self.process.pid is not None
			
 
				+        return self.process.pid
			
 
				+
			
 
				+    def start(self) -> int:
			
 
				+        if self.mode == 'thread':
			
 
				+            return self.fork_as_thread()
			
 
				+        elif self.mode == 'process':
			
 
				+            return self.fork_as_process()
			
 
				+        raise ValueError(f'Invalid orchestrator mode: {self.mode}')
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def autodiscover_actor_types(cls) -> Dict[str, Type[ActorType]]:
			
 
				+        # returns a Dict of all discovered {actor_type_id: ActorType} across the codebase
			
 
				+        # override this method in a subclass to customize the actor types that are used
			
 
				+        # return {'Snapshot': SnapshotActorType, 'ArchiveResult_chrome': ChromeActorType, ...}
			
 
				+        return {
			
 
				+            # look through all models and find all classes that inherit from ActorType
			
 
				+            # actor_type.__name__: actor_type
			
 
				+            # for actor_type in abx.pm.hook.get_all_ACTORS_TYPES().values()
			
 
				+        }
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def get_orphaned_objects(cls, all_queues) -> list:
			
 
				+        # returns a list of objects that are in the queues of all actor types but not in the queues of any other actor types
			
 
				+        all_queued_ids = itertools.chain(*[queue.values('id', flat=True) for queue in all_queues.values()])
			
 
				+        orphaned_objects = []
			
 
				+        for model in apps.get_models():
			
 
				+            if hasattr(model, 'retry_at'):
			
 
				+                orphaned_objects.extend(model.objects.filter(retry_at__lt=timezone.now()).exclude(id__in=all_queued_ids))
			
 
				+        return orphaned_objects
			
 
				+    
			
 
				+    def on_startup(self):
			
 
				+        if self.mode == 'thread':
			
 
				+            self.pid = get_native_id()
			
 
				+            print(f'[green]👨‍✈️ {self}.on_startup() STARTUP (THREAD)[/green]')
			
 
				+        elif self.mode == 'process':
			
 
				+            self.pid = os.getpid()
			
 
				+            print(f'[green]👨‍✈️ {self}.on_startup() STARTUP (PROCESS)[/green]')
			
 
				+        # abx.pm.hook.on_orchestrator_startup(self)
			
 
				+    
			
 
				+    def on_shutdown(self, err: BaseException | None = None):
			
 
				+        print(f'[grey53]👨‍✈️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
			
 
				+        # abx.pm.hook.on_orchestrator_shutdown(self)
			
 
				+        
			
 
				+    def on_tick_started(self, all_queues):
			
 
				+        # total_pending = sum(queue.count() for queue in all_queues.values())
			
 
				+        # print(f'👨‍✈️ {self}.on_tick_started()', f'total_pending={total_pending}')
			
 
				+        # abx.pm.hook.on_orchestrator_tick_started(self, actor_types, all_queues)
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_finished(self, all_queues, all_existing_actors, all_spawned_actors):
			
 
				+        if all_spawned_actors:
			
 
				+            total_queue_length = sum(queue.count() for queue in all_queues.values())
			
 
				+            print(f'[grey53]👨‍✈️ {self}.on_tick_finished() queue={total_queue_length} existing_actors={len(all_existing_actors)} spawned_actors={len(all_spawned_actors)}[/grey53]')
			
 
				+        # abx.pm.hook.on_orchestrator_tick_finished(self, actor_types, all_queues)
			
 
				+
			
 
				+    def on_idle(self, all_queues):
			
 
				+        # print(f'👨‍✈️ {self}.on_idle()')
			
 
				+        # abx.pm.hook.on_orchestrator_idle(self)
			
 
				+        # check for orphaned objects left behind
			
 
				+        if self.idle_count == 60:
			
 
				+            orphaned_objects = self.get_orphaned_objects(all_queues)
			
 
				+            if orphaned_objects:
			
 
				+                print('[red]👨‍✈️ WARNING: some objects may not be processed, no actor has claimed them after 60s:[/red]', orphaned_objects)
			
 
				+
			
 
				+    def runloop(self):
			
 
				+        self.on_startup()
			
 
				+        try:
			
 
				+            while True:
			
 
				+                all_queues = {
			
 
				+                    actor_type: actor_type.get_queue()
			
 
				+                    for actor_type in self.actor_types.values()
			
 
				+                }
			
 
				+                if not all_queues:
			
 
				+                    raise Exception('Failed to find any actor_types to process')
			
 
				+
			
 
				+                self.on_tick_started(all_queues)
			
 
				+
			
 
				+                all_existing_actors = []
			
 
				+                all_spawned_actors = []
			
 
				+
			
 
				+                for actor_type, queue in all_queues.items():
			
 
				+                    try:
			
 
				+                        existing_actors = actor_type.get_running_actors()
			
 
				+                        all_existing_actors.extend(existing_actors)
			
 
				+                        actors_to_spawn = actor_type.get_actors_to_spawn(queue, existing_actors)
			
 
				+                        for launch_kwargs in actors_to_spawn:
			
 
				+                            new_actor_pid = actor_type.start(mode='process', **launch_kwargs)
			
 
				+                            all_spawned_actors.append(new_actor_pid)
			
 
				+                    except Exception as err:
			
 
				+                        print(f'🏃‍♂️ ERROR: {self} Failed to get {actor_type} queue & running actors', err)
			
 
				+                    except BaseException:
			
 
				+                        raise
			
 
				+
			
 
				+                if not any(queue.exists() for queue in all_queues.values()):
			
 
				+                    self.on_idle(all_queues)
			
 
				+                    self.idle_count += 1
			
 
				+                    time.sleep(1)
			
 
				+                else:
			
 
				+                    self.idle_count = 0
			
 
				+                    
			
 
				+                self.on_tick_finished(all_queues, all_existing_actors, all_spawned_actors)
			
 
				+                time.sleep(1)
			
 
				+
			
 
				+        except BaseException as err:
			
 
				+            if isinstance(err, KeyboardInterrupt):
			
 
				+                print()
			
 
				+            else:
			
 
				+                print(f'\n[red]🏃‍♂️ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err)
			
 
				+            self.on_shutdown(err=err)
			
 
				+
			
 
				+
			
 
				+
			
 
				+from archivebox.config.django import setup_django
			
 
				+
			
 
				+setup_django()
			
 
				+
			
 
				+from core.models import ArchiveResult, Snapshot
			
 
				+
			
 
				+from django.utils import timezone
			
 
				+
			
 
				+from django import db
			
 
				+from django.db import connection
			
 
				+
			
 
				+
			
 
				+from crawls.actors import CrawlActor
			
 
				+from .actor_snapshot import SnapshotActor
			
 
				+
			
 
				+from abx_plugin_singlefile.actors import SinglefileActor
			
 
				+
			
 
				+
			
 
				+class FaviconActor(ActorType[ArchiveResult]):
			
 
				+    CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
			
 
				+    CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
			
 
				+    CLAIM_SET: ClassVar[str] = 'status = "started"'
			
 
				+    
			
 
				+    @classproperty
			
 
				+    def QUERYSET(cls) -> QuerySet:
			
 
				+        return ArchiveResult.objects.filter(status='failed', extractor='favicon')
			
 
				+
			
 
				+    def tick(self, obj: ArchiveResult):
			
 
				+        print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
			
 
				+        updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
			
 
				+        if not updated:
			
 
				+            raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
			
 
				+        obj.refresh_from_db()
			
 
				+        obj.save()
			
 
				+
			
 
				+
			
 
				+class ExtractorsOrchestrator(Orchestrator):
			
 
				+    actor_types = {
			
 
				+        'CrawlActor': CrawlActor,
			
 
				+        'SnapshotActor': SnapshotActor,
			
 
				+        'FaviconActor': FaviconActor,
			
 
				+        'SinglefileActor': SinglefileActor,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':    
			
 
				+    orchestrator = ExtractorsOrchestrator()
			
 
				+    orchestrator.start()
			
 
				+    
			
 
				+    snap = Snapshot.objects.last()
			
 
				+    assert snap is not None
			
 
				+    created = 0
			
 
				+    while True:
			
 
				+        time.sleep(0.05)
			
 
				+        # try:
			
 
				+        #     ArchiveResult.objects.bulk_create([
			
 
				+        #         ArchiveResult(
			
 
				+        #             id=uuid.uuid4(),
			
 
				+        #             snapshot=snap,
			
 
				+        #             status='failed',
			
 
				+        #             extractor='favicon',
			
 
				+        #             cmd=['echo', '"hello"'],
			
 
				+        #             cmd_version='1.0',
			
 
				+        #             pwd='.',
			
 
				+        #             start_ts=timezone.now(),
			
 
				+        #             end_ts=timezone.now(),
			
 
				+        #             created_at=timezone.now(),
			
 
				+        #             modified_at=timezone.now(),
			
 
				+        #             created_by_id=1,
			
 
				+        #         )
			
 
				+        #         for _ in range(100)
			
 
				+        #     ])
			
 
				+        #     created += 100
			
 
				+        #     if created % 1000 == 0:
			
 
				+        #         print(f'[blue]Created {created} ArchiveResults...[/blue]')
			
 
				+        #         time.sleep(25)
			
 
				+        # except Exception as err:
			
 
				+        #     print(err)
			
 
				+        #     db.connections.close_all()
			
 
				+        # except BaseException as err:
			
 
				+        #     print(err)
			
 
				+        #     break
			
--- a/archivebox/actors/statemachine.py
+++ b/archivebox/actors/statemachine.py
@@ -0,0 +1,286 @@
 
				+from statemachine import State, StateMachine
			
 
				+from django.db import models
			
 
				+from multiprocessing import Process
			
 
				+import psutil
			
 
				+import time
			
 
				+
			
 
				+# State Machine Definitions
			
 
				+#################################################
			
 
				+
			
 
				+class SnapshotMachine(StateMachine):
			
 
				+    """State machine for managing Snapshot lifecycle."""
			
 
				+    
			
 
				+    # States
			
 
				+    queued = State(initial=True)
			
 
				+    started = State()
			
 
				+    sealed = State(final=True)
			
 
				+    
			
 
				+    # Transitions
			
 
				+    start = queued.to(started, cond='can_start')
			
 
				+    seal = started.to(sealed, cond='is_finished')
			
 
				+    
			
 
				+    # Events
			
 
				+    tick = (
			
 
				+        queued.to.itself(unless='can_start') |
			
 
				+        queued.to(started, cond='can_start') |
			
 
				+        started.to.itself(unless='is_finished') |
			
 
				+        started.to(sealed, cond='is_finished')
			
 
				+    )
			
 
				+    
			
 
				+    def __init__(self, snapshot):
			
 
				+        self.snapshot = snapshot
			
 
				+        super().__init__()
			
 
				+        
			
 
				+    def can_start(self):
			
 
				+        return True
			
 
				+        
			
 
				+    def is_finished(self):
			
 
				+        return not self.snapshot.has_pending_archiveresults()
			
 
				+        
			
 
				+    def before_start(self):
			
 
				+        """Pre-start validation and setup."""
			
 
				+        self.snapshot.cleanup_dir()
			
 
				+        
			
 
				+    def after_start(self):
			
 
				+        """Post-start side effects."""
			
 
				+        self.snapshot.create_pending_archiveresults()
			
 
				+        self.snapshot.update_indices()
			
 
				+        self.snapshot.bump_retry_at(seconds=10)
			
 
				+        
			
 
				+    def before_seal(self):
			
 
				+        """Pre-seal validation and cleanup."""
			
 
				+        self.snapshot.cleanup_dir()
			
 
				+        
			
 
				+    def after_seal(self):
			
 
				+        """Post-seal actions."""
			
 
				+        self.snapshot.update_indices()
			
 
				+        self.snapshot.seal_dir()
			
 
				+        self.snapshot.upload_dir()
			
 
				+        self.snapshot.retry_at = None
			
 
				+        self.snapshot.save()
			
 
				+
			
 
				+
			
 
				+class ArchiveResultMachine(StateMachine):
			
 
				+    """State machine for managing ArchiveResult lifecycle."""
			
 
				+    
			
 
				+    # States
			
 
				+    queued = State(initial=True)
			
 
				+    started = State()
			
 
				+    succeeded = State(final=True)
			
 
				+    backoff = State()
			
 
				+    failed = State(final=True)
			
 
				+    
			
 
				+    # Transitions
			
 
				+    start = queued.to(started, cond='can_start')
			
 
				+    succeed = started.to(succeeded, cond='extractor_succeeded')
			
 
				+    backoff = started.to(backoff, unless='extractor_succeeded')
			
 
				+    retry = backoff.to(queued, cond='can_retry')
			
 
				+    fail = backoff.to(failed, unless='can_retry')
			
 
				+    
			
 
				+    # Events
			
 
				+    tick = (
			
 
				+        queued.to.itself(unless='can_start') |
			
 
				+        queued.to(started, cond='can_start') |
			
 
				+        started.to.itself(cond='extractor_still_running') |
			
 
				+        started.to(succeeded, cond='extractor_succeeded') |
			
 
				+        started.to(backoff, unless='extractor_succeeded') |
			
 
				+        backoff.to.itself(cond='still_waiting_to_retry') |
			
 
				+        backoff.to(queued, cond='can_retry') |
			
 
				+        backoff.to(failed, unless='can_retry')
			
 
				+    )
			
 
				+    
			
 
				+    def __init__(self, archiveresult):
			
 
				+        self.archiveresult = archiveresult
			
 
				+        super().__init__()
			
 
				+    
			
 
				+    def can_start(self):
			
 
				+        return True
			
 
				+    
			
 
				+    def extractor_still_running(self):
			
 
				+        return self.archiveresult.start_ts > time.now() - timedelta(seconds=5)
			
 
				+    
			
 
				+    def extractor_succeeded(self):
			
 
				+        # return check_if_extractor_succeeded(self.archiveresult)
			
 
				+        return self.archiveresult.start_ts < time.now() - timedelta(seconds=5)
			
 
				+    
			
 
				+    def can_retry(self):
			
 
				+        return self.archiveresult.retries < self.archiveresult.max_retries
			
 
				+        
			
 
				+    def before_start(self):
			
 
				+        """Pre-start initialization."""
			
 
				+        self.archiveresult.retries += 1
			
 
				+        self.archiveresult.start_ts = time.now()
			
 
				+        self.archiveresult.output = None
			
 
				+        self.archiveresult.error = None
			
 
				+        
			
 
				+    def after_start(self):
			
 
				+        """Post-start execution."""
			
 
				+        self.archiveresult.bump_retry_at(seconds=self.archiveresult.timeout + 5)
			
 
				+        execute_extractor(self.archiveresult)
			
 
				+        self.archiveresult.snapshot.bump_retry_at(seconds=5)
			
 
				+        
			
 
				+    def before_succeed(self):
			
 
				+        """Pre-success validation."""
			
 
				+        self.archiveresult.output = get_archiveresult_output(self.archiveresult)
			
 
				+        
			
 
				+    def after_succeed(self):
			
 
				+        """Post-success cleanup."""
			
 
				+        self.archiveresult.end_ts = time.now()
			
 
				+        self.archiveresult.retry_at = None
			
 
				+        self.archiveresult.update_indices()
			
 
				+        
			
 
				+    def before_backoff(self):
			
 
				+        """Pre-backoff error capture."""
			
 
				+        self.archiveresult.error = get_archiveresult_error(self.archiveresult)
			
 
				+        
			
 
				+    def after_backoff(self):
			
 
				+        """Post-backoff retry scheduling."""
			
 
				+        self.archiveresult.end_ts = time.now()
			
 
				+        self.archiveresult.bump_retry_at(
			
 
				+            seconds=self.archiveresult.timeout * self.archiveresult.retries
			
 
				+        )
			
 
				+        self.archiveresult.update_indices()
			
 
				+        
			
 
				+    def before_fail(self):
			
 
				+        """Pre-failure finalization."""
			
 
				+        self.archiveresult.retry_at = None
			
 
				+        
			
 
				+    def after_fail(self):
			
 
				+        """Post-failure cleanup."""
			
 
				+        self.archiveresult.update_indices()
			
 
				+
			
 
				+# Models
			
 
				+#################################################
			
 
				+
			
 
				+class Snapshot(models.Model):
			
 
				+    status = models.CharField(max_length=32, default='queued')
			
 
				+    retry_at = models.DateTimeField(null=True)
			
 
				+    
			
 
				+    @property
			
 
				+    def sm(self):
			
 
				+        """Get the state machine for this snapshot."""
			
 
				+        return SnapshotMachine(self)
			
 
				+    
			
 
				+    def has_pending_archiveresults(self):
			
 
				+        return self.archiveresult_set.exclude(
			
 
				+            status__in=['succeeded', 'failed']
			
 
				+        ).exists()
			
 
				+    
			
 
				+    def bump_retry_at(self, seconds):
			
 
				+        self.retry_at = time.now() + timedelta(seconds=seconds)
			
 
				+        self.save()
			
 
				+        
			
 
				+    def cleanup_dir(self):
			
 
				+        cleanup_snapshot_dir(self)
			
 
				+        
			
 
				+    def create_pending_archiveresults(self):
			
 
				+        create_snapshot_pending_archiveresults(self)
			
 
				+        
			
 
				+    def update_indices(self):
			
 
				+        update_snapshot_index_json(self)
			
 
				+        update_snapshot_index_html(self)
			
 
				+        
			
 
				+    def seal_dir(self):
			
 
				+        seal_snapshot_dir(self)
			
 
				+        
			
 
				+    def upload_dir(self):
			
 
				+        upload_snapshot_dir(self)
			
 
				+
			
 
				+
			
 
				+class ArchiveResult(models.Model):
			
 
				+    snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
			
 
				+    status = models.CharField(max_length=32, default='queued')
			
 
				+    retry_at = models.DateTimeField(null=True)
			
 
				+    retries = models.IntegerField(default=0)
			
 
				+    max_retries = models.IntegerField(default=3)
			
 
				+    timeout = models.IntegerField(default=60)
			
 
				+    start_ts = models.DateTimeField(null=True)
			
 
				+    end_ts = models.DateTimeField(null=True)
			
 
				+    output = models.TextField(null=True)
			
 
				+    error = models.TextField(null=True)
			
 
				+    
			
 
				+    def get_machine(self):
			
 
				+        return ArchiveResultMachine(self)
			
 
				+    
			
 
				+    def bump_retry_at(self, seconds):
			
 
				+        self.retry_at = time.now() + timedelta(seconds=seconds)
			
 
				+        self.save()
			
 
				+        
			
 
				+    def update_indices(self):
			
 
				+        update_archiveresult_index_json(self)
			
 
				+        update_archiveresult_index_html(self)
			
 
				+
			
 
				+
			
 
				+# Actor System
			
 
				+#################################################
			
 
				+
			
 
				+class BaseActor:
			
 
				+    MAX_TICK_TIME = 60
			
 
				+    
			
 
				+    def tick(self, obj):
			
 
				+        """Process a single object through its state machine."""
			
 
				+        machine = obj.get_machine()
			
 
				+        
			
 
				+        if machine.is_queued:
			
 
				+            if machine.can_start():
			
 
				+                machine.start()
			
 
				+                
			
 
				+        elif machine.is_started:
			
 
				+            if machine.can_seal():
			
 
				+                machine.seal()
			
 
				+                
			
 
				+        elif machine.is_backoff:
			
 
				+            if machine.can_retry():
			
 
				+                machine.retry()
			
 
				+            else:
			
 
				+                machine.fail()
			
 
				+
			
 
				+
			
 
				+class Orchestrator:
			
 
				+    """Main orchestrator that manages all actors."""
			
 
				+    
			
 
				+    def __init__(self):
			
 
				+        self.pid = None
			
 
				+        
			
 
				+    @classmethod
			
 
				+    def spawn(cls):
			
 
				+        orchestrator = cls()
			
 
				+        proc = Process(target=orchestrator.runloop)
			
 
				+        proc.start()
			
 
				+        return proc.pid
			
 
				+        
			
 
				+    def runloop(self):
			
 
				+        self.pid = os.getpid()
			
 
				+        abx.pm.hook.on_orchestrator_startup(self)
			
 
				+        
			
 
				+        try:
			
 
				+            while True:
			
 
				+                self.process_queue(Snapshot)
			
 
				+                self.process_queue(ArchiveResult)
			
 
				+                time.sleep(0.1)
			
 
				+                
			
 
				+        except (KeyboardInterrupt, SystemExit):
			
 
				+            abx.pm.hook.on_orchestrator_shutdown(self)
			
 
				+            
			
 
				+    def process_queue(self, model):
			
 
				+        retry_at_reached = Q(retry_at__isnull=True) | Q(retry_at__lte=time.now())
			
 
				+        queue = model.objects.filter(retry_at_reached)
			
 
				+        
			
 
				+        if queue.exists():
			
 
				+            actor = BaseActor()
			
 
				+            for obj in queue:
			
 
				+                try:
			
 
				+                    with transaction.atomic():
			
 
				+                        actor.tick(obj)
			
 
				+                except Exception as e:
			
 
				+                    abx.pm.hook.on_actor_tick_exception(actor, obj, e)
			
 
				+
			
 
				+
			
 
				+# Periodic Tasks
			
 
				+#################################################
			
 
				+
			
 
				[email protected]_task(schedule=djhuey.crontab(minute='*'))
			
 
				+def ensure_orchestrator_running():
			
 
				+    """Ensure orchestrator is running, start if not."""
			
 
				+    if not any(p.name().startswith('Orchestrator') for p in psutil.process_iter()):
			
 
				+        Orchestrator.spawn()
			
--- a/archivebox/actors/tests.py
+++ b/archivebox/actors/tests.py
@@ -0,0 +1,3 @@
 
				+from django.test import TestCase
			
 
				+
			
 
				+# Create your tests here.
			
--- a/archivebox/actors/views.py
+++ b/archivebox/actors/views.py
@@ -0,0 +1,3 @@
 
				+from django.shortcuts import render
			
 
				+
			
 
				+# Create your views here.
			
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -1,4 +1,5 @@
 
				-__package__ = 'archivebox.config'
			
 
				+__package__ = 'config'
			
 
				+__order__ = 200
			
 
				 
			
 
				 from .paths import (
			
 
				     PACKAGE_DIR,                                    # noqa
			
@@ -8,35 +9,28 @@ from .paths import (
 
				 from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR      # noqa
			
 
				 from .version import VERSION                        # noqa
			
 
				 
			
 
				-
			
 
				-import abx
			
 
				-
			
 
				+# import abx
			
 
				 
			
 
				 # @abx.hookimpl
			
 
				-# def get_INSTALLED_APPS():
			
 
				-#     return ['config']
			
 
				-
			
 
				+# def get_CONFIG():
			
 
				+#     from .common import (
			
 
				+#         SHELL_CONFIG,
			
 
				+#         STORAGE_CONFIG,
			
 
				+#         GENERAL_CONFIG,
			
 
				+#         SERVER_CONFIG,
			
 
				+#         ARCHIVING_CONFIG,
			
 
				+#         SEARCH_BACKEND_CONFIG,
			
 
				+#     )
			
 
				+#     return {
			
 
				+#         'SHELL_CONFIG': SHELL_CONFIG,
			
 
				+#         'STORAGE_CONFIG': STORAGE_CONFIG,
			
 
				+#         'GENERAL_CONFIG': GENERAL_CONFIG,
			
 
				+#         'SERVER_CONFIG': SERVER_CONFIG,
			
 
				+#         'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
			
 
				+#         'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
			
 
				+#     }
			
 
				 
			
 
				[email protected]
			
 
				-def get_CONFIG():
			
 
				-    from .common import (
			
 
				-        SHELL_CONFIG,
			
 
				-        STORAGE_CONFIG,
			
 
				-        GENERAL_CONFIG,
			
 
				-        SERVER_CONFIG,
			
 
				-        ARCHIVING_CONFIG,
			
 
				-        SEARCH_BACKEND_CONFIG,
			
 
				-    )
			
 
				-    return {
			
 
				-        'SHELL_CONFIG': SHELL_CONFIG,
			
 
				-        'STORAGE_CONFIG': STORAGE_CONFIG,
			
 
				-        'GENERAL_CONFIG': GENERAL_CONFIG,
			
 
				-        'SERVER_CONFIG': SERVER_CONFIG,
			
 
				-        'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
			
 
				-        'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
			
 
				-    }
			
 
				-
			
 
				[email protected]
			
 
				-def ready():
			
 
				-    for config in get_CONFIG().values():
			
 
				-        config.validate()
			
 
				+# @abx.hookimpl
			
 
				+# def ready():
			
 
				+#     for config in get_CONFIG().values():
			
 
				+#         config.validate()
			
--- a/archivebox/config/collection.py
+++ b/archivebox/config/collection.py
@@ -9,16 +9,18 @@ from configparser import ConfigParser
 
				 
			
 
				 from benedict import benedict
			
 
				 
			
 
				+import archivebox
			
 
				+
			
 
				 from archivebox.config.constants import CONSTANTS
			
 
				 
			
 
				 from archivebox.misc.logging import stderr
			
 
				 
			
 
				 
			
 
				 def get_real_name(key: str) -> str:
			
 
				-    """get the current canonical name for a given deprecated config key"""
			
 
				-    from django.conf import settings
			
 
				+    """get the up-to-date canonical name for a given old alias or current key"""
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				     
			
 
				-    for section in settings.CONFIGS.values():
			
 
				+    for section in CONFIGS.values():
			
 
				         try:
			
 
				             return section.aliases[key]
			
 
				         except KeyError:
			
@@ -115,17 +117,15 @@ def load_config_file() -> Optional[benedict]:
 
				 
			
 
				 
			
 
				 def section_for_key(key: str) -> Any:
			
 
				-    from django.conf import settings
			
 
				-    for config_section in settings.CONFIGS.values():
			
 
				+    for config_section in archivebox.pm.hook.get_CONFIGS().values():
			
 
				         if hasattr(config_section, key):
			
 
				             return config_section
			
 
				-    return None
			
 
				+    raise ValueError(f'No config section found for key: {key}')
			
 
				 
			
 
				 
			
 
				 def write_config_file(config: Dict[str, str]) -> benedict:
			
 
				     """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
			
 
				 
			
 
				-    import abx.archivebox.reads
			
 
				     from archivebox.misc.system import atomic_write
			
 
				 
			
 
				     CONFIG_HEADER = (
			
@@ -175,7 +175,7 @@ def write_config_file(config: Dict[str, str]) -> benedict:
 
				     updated_config = {}
			
 
				     try:
			
 
				         # validate the updated_config by attempting to re-parse it
			
 
				-        updated_config = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
			
 
				+        updated_config = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
			
 
				     except BaseException:                                                       # lgtm [py/catch-base-exception]
			
 
				         # something went horribly wrong, revert to the previous version
			
 
				         with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
			
@@ -233,11 +233,11 @@ def load_config(defaults: Dict[str, Any],
 
				     return benedict(extended_config)
			
 
				 
			
 
				 def load_all_config():
			
 
				-    import abx.archivebox.reads
			
 
				+    import abx
			
 
				     
			
 
				     flat_config = benedict()
			
 
				     
			
 
				-    for config_section in abx.archivebox.reads.get_CONFIGS().values():
			
 
				+    for config_section in abx.pm.hook.get_CONFIGS().values():
			
 
				         config_section.__init__()
			
 
				         flat_config.update(config_section.model_dump())
			
 
				         
			
--- a/archivebox/config/common.py
+++ b/archivebox/config/common.py
@@ -10,7 +10,7 @@ from rich import print
 
				 from pydantic import Field, field_validator
			
 
				 from django.utils.crypto import get_random_string
			
 
				 
			
 
				-from abx.archivebox.base_configset import BaseConfigSet
			
 
				+from abx_spec_config.base_configset import BaseConfigSet
			
 
				 
			
 
				 from .constants import CONSTANTS
			
 
				 from .version import get_COMMIT_HASH, get_BUILD_TIME, VERSION
			
@@ -45,8 +45,6 @@ class ShellConfig(BaseConfigSet):
 
				     def BUILD_TIME(self) -> str:
			
 
				         return get_BUILD_TIME()
			
 
				  
			
 
				-    # def VERSIONS_AVAILABLE() -> bool             # .check_for_update.get_versions_available_on_github(c)},
			
 
				-    # def CAN_UPGRADE() -> bool                    # .check_for_update.can_upgrade(c)},
			
 
				 
			
 
				 SHELL_CONFIG = ShellConfig()
			
 
				 
			
--- a/archivebox/config/constants.py
+++ b/archivebox/config/constants.py
@@ -1,3 +1,15 @@
 
				+"""
			
 
				+Constants are for things that never change at runtime.
			
 
				+(but they can change from run-to-run or machine-to-machine)
			
 
				+
			
 
				+DATA_DIR will never change at runtime, but you can run
			
 
				+archivebox from inside a different DATA_DIR on the same machine.
			
 
				+
			
 
				+This is loaded very early in the archivebox startup flow, so nothing in this file 
			
 
				+or imported from this file should import anything from archivebox.config.common, 
			
 
				+django, other INSTALLED_APPS, or anything else that is not in a standard library.
			
 
				+"""
			
 
				+
			
 
				 __package__ = 'archivebox.config'
			
 
				 
			
 
				 import re
			
@@ -197,10 +209,12 @@ class ConstantsDict(Mapping):
 
				 
			
 
				     @classmethod
			
 
				     def __getitem__(cls, key: str):
			
 
				+        # so it behaves like a dict[key] == dict.key or object attr
			
 
				         return getattr(cls, key)
			
 
				     
			
 
				     @classmethod
			
 
				     def __benedict__(cls):
			
 
				+        # when casting to benedict, only include uppercase keys that don't start with an underscore
			
 
				         return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')})
			
 
				     
			
 
				     @classmethod
			
@@ -214,5 +228,6 @@ class ConstantsDict(Mapping):
 
				 CONSTANTS = ConstantsDict()
			
 
				 CONSTANTS_CONFIG = CONSTANTS.__benedict__()
			
 
				 
			
 
				-# add all key: values to globals() for easier importing
			
 
				-globals().update(CONSTANTS)
			
 
				+# add all key: values to globals() for easier importing, e.g.:
			
 
				+# from archivebox.config.constants import IS_ROOT, PERSONAS_DIR, ...
			
 
				+# globals().update(CONSTANTS)
			
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
 
				         return
			
 
				 
			
 
				     with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
			
 
				-        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
			
 
				+        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False)
			
 
				         
			
 
				         from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
			
 
				     
			
@@ -97,7 +97,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
 
				                 except Exception as e:
			
 
				                     bump_startup_progress_bar(advance=1000)
			
 
				                     
			
 
				-                    is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init'))
			
 
				+                    is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
			
 
				                     if not is_using_meta_cmd:
			
 
				                         # show error message to user only if they're not running a meta command / just trying to get help
			
 
				                         STDERR.print()
			
--- a/archivebox/config/version.py
+++ b/archivebox/config/version.py
@@ -45,7 +45,7 @@ def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR):
 
				 @cache
			
 
				 def get_COMMIT_HASH() -> Optional[str]:
			
 
				     try:
			
 
				-        git_dir = PACKAGE_DIR / '../.git'
			
 
				+        git_dir = PACKAGE_DIR.parent / '.git'
			
 
				         ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
			
 
				         commit_hash = git_dir.joinpath(ref).read_text().strip()
			
 
				         return commit_hash
			
@@ -53,7 +53,7 @@ def get_COMMIT_HASH() -> Optional[str]:
 
				         pass
			
 
				 
			
 
				     try:
			
 
				-        return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
			
 
				+        return list((PACKAGE_DIR.parent / '.git/refs/heads/').glob('*'))[0].read_text().strip()
			
 
				     except Exception:
			
 
				         pass
			
 
				     
			
@@ -62,8 +62,12 @@ def get_COMMIT_HASH() -> Optional[str]:
 
				 @cache
			
 
				 def get_BUILD_TIME() -> str:
			
 
				     if IN_DOCKER:
			
 
				-        docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
			
 
				-        return docker_build_end_time
			
 
				+        try:
			
 
				+            # if we're in the archivebox official docker image, /VERSION.txt will contain the build time
			
 
				+            docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
			
 
				+            return docker_build_end_time
			
 
				+        except Exception:
			
 
				+            pass
			
 
				 
			
 
				     src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
			
 
				     return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
			
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -14,8 +14,8 @@ from django.utils.html import format_html, mark_safe
 
				 from admin_data_views.typing import TableContext, ItemContext
			
 
				 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
			
 
				 
			
 
				-import abx.archivebox.reads
			
 
				-
			
 
				+import abx
			
 
				+import archivebox
			
 
				 from archivebox.config import CONSTANTS
			
 
				 from archivebox.misc.util import parse_date
			
 
				 
			
@@ -65,7 +65,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
 
				 
			
 
				 @render_with_table_view
			
 
				 def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
			
 
				-
			
 
				+    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
			
 
				     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
			
 
				 
			
 
				     rows = {
			
@@ -81,12 +81,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
				 
			
 
				     relevant_configs = {
			
 
				         key: val
			
 
				-        for key, val in settings.FLAT_CONFIG.items()
			
 
				+        for key, val in FLAT_CONFIG.items()
			
 
				         if '_BINARY' in key or '_VERSION' in key
			
 
				     }
			
 
				 
			
 
				-    for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items():
			
 
				-        plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
			
 
				+    for plugin_id, plugin in abx.get_all_plugins().items():
			
 
				         if not plugin.hooks.get('get_BINARIES'):
			
 
				             continue
			
 
				         
			
@@ -131,17 +130,16 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
				 @render_with_item_view
			
 
				 def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
			
 
				 
			
 
				-    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
			
 
				+    assert request.user and request.user.is_superuser, 'Must be a superuser to view configuration settings.'
			
 
				 
			
 
				     binary = None
			
 
				     plugin = None
			
 
				-    for plugin_id in abx.archivebox.reads.get_PLUGINS().keys():
			
 
				-        loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
			
 
				+    for plugin_id, plugin in abx.get_all_plugins().items():
			
 
				         try:
			
 
				-            for loaded_binary in loaded_plugin.hooks.get_BINARIES().values():
			
 
				+            for loaded_binary in plugin['hooks'].get_BINARIES().values():
			
 
				                 if loaded_binary.name == key:
			
 
				                     binary = loaded_binary
			
 
				-                    plugin = loaded_plugin
			
 
				+                    plugin = plugin
			
 
				                     # break  # last write wins
			
 
				         except Exception as e:
			
 
				             print(e)
			
@@ -161,7 +159,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 
				                 "name": binary.name,
			
 
				                 "description": binary.abspath,
			
 
				                 "fields": {
			
 
				-                    'plugin': plugin.package,
			
 
				+                    'plugin': plugin['package'],
			
 
				                     'binprovider': binary.loaded_binprovider,
			
 
				                     'abspath': binary.loaded_abspath,
			
 
				                     'version': binary.loaded_version,
			
@@ -215,9 +213,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
				                 return color
			
 
				         return 'black'
			
 
				 
			
 
				-    for plugin_id in settings.PLUGINS.keys():
			
 
				-        
			
 
				-        plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
			
 
				+    for plugin_id, plugin in abx.get_all_plugins().items():
			
 
				         plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
			
 
				         plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
			
 
				         plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
			
@@ -263,7 +259,7 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 
				 
			
 
				     assert plugin_id, f'Could not find a plugin matching the specified name: {key}'
			
 
				 
			
 
				-    plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
			
 
				+    plugin = abx.get_plugin(plugin_id)
			
 
				 
			
 
				     return ItemContext(
			
 
				         slug=key,
			
--- a/archivebox/core/__init__.py
+++ b/archivebox/core/__init__.py
@@ -1,2 +1,31 @@
 
				 __package__ = 'archivebox.core'
			
 
				 
			
 
				+import abx
			
 
				+
			
 
				[email protected]
			
 
				+def register_admin(admin_site):
			
 
				+    """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
			
 
				+    from core.admin import register_admin
			
 
				+    register_admin(admin_site)
			
 
				+
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from archivebox.config.common import (
			
 
				+        SHELL_CONFIG,
			
 
				+        STORAGE_CONFIG,
			
 
				+        GENERAL_CONFIG,
			
 
				+        SERVER_CONFIG,
			
 
				+        ARCHIVING_CONFIG,
			
 
				+        SEARCH_BACKEND_CONFIG,
			
 
				+    )
			
 
				+    return {
			
 
				+        'SHELL_CONFIG': SHELL_CONFIG,
			
 
				+        'STORAGE_CONFIG': STORAGE_CONFIG,
			
 
				+        'GENERAL_CONFIG': GENERAL_CONFIG,
			
 
				+        'SERVER_CONFIG': SERVER_CONFIG,
			
 
				+        'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
			
 
				+        'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
			
 
				+    }
			
 
				+
			
--- a/archivebox/core/actors.py
+++ b/archivebox/core/actors.py
@@ -0,0 +1,73 @@
 
				+__package__ = 'archivebox.core'
			
 
				+
			
 
				+from typing import ClassVar
			
 
				+
			
 
				+from rich import print
			
 
				+
			
 
				+from django.db.models import QuerySet
			
 
				+from django.utils import timezone
			
 
				+from datetime import timedelta
			
 
				+from core.models import Snapshot
			
 
				+
			
 
				+from actors.actor import ActorType
			
 
				+
			
 
				+
			
 
				+class SnapshotActor(ActorType[Snapshot]):
			
 
				+    
			
 
				+    QUERYSET: ClassVar[QuerySet] = Snapshot.objects.filter(status='queued')
			
 
				+    CLAIM_WHERE: ClassVar[str] = 'status = "queued"'  # the WHERE clause to filter the objects when atomically getting the next object from the queue
			
 
				+    CLAIM_SET: ClassVar[str] = 'status = "started"'   # the SET clause to claim the object when atomically getting the next object from the queue
			
 
				+    CLAIM_ORDER: ClassVar[str] = 'created_at DESC'    # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue
			
 
				+    CLAIM_FROM_TOP: ClassVar[int] = 50                # the number of objects to consider when atomically getting the next object from the queue
			
 
				+    
			
 
				+    # model_type: Type[ModelType]
			
 
				+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 4               # min 2, max 8, up to 60% of available cpu cores
			
 
				+    MAX_TICK_TIME: ClassVar[int] = 60                          # maximum duration in seconds to process a single object
			
 
				+    
			
 
				+    def claim_sql_where(self) -> str:
			
 
				+        """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """
			
 
				+        return self.CLAIM_WHERE
			
 
				+    
			
 
				+    def claim_sql_set(self) -> str:
			
 
				+        """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """
			
 
				+        retry_at = timezone.now() + timedelta(seconds=self.MAX_TICK_TIME)
			
 
				+        # format as 2024-10-31 10:14:33.240903
			
 
				+        retry_at_str = retry_at.strftime('%Y-%m-%d %H:%M:%S.%f')
			
 
				+        return f'{self.CLAIM_SET}, retry_at = {retry_at_str}'
			
 
				+    
			
 
				+    def claim_sql_order(self) -> str:
			
 
				+        """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """
			
 
				+        return self.CLAIM_ORDER
			
 
				+    
			
 
				+    def claim_from_top(self) -> int:
			
 
				+        """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue"""
			
 
				+        return self.CLAIM_FROM_TOP
			
 
				+        
			
 
				+    def tick(self, obj: Snapshot) -> None:
			
 
				+        """override this to process the object"""
			
 
				+        print(f'[blue]🏃‍♂️ {self}.tick()[/blue]', obj.abid or obj.id)
			
 
				+        # For example:
			
 
				+        # do_some_task(obj)
			
 
				+        # do_something_else(obj)
			
 
				+        # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success')
			
 
				+        # raise NotImplementedError('tick() must be implemented by the Actor subclass')
			
 
				+    
			
 
				+    def on_shutdown(self, err: BaseException | None=None) -> None:
			
 
				+        print(f'[grey53]🏃‍♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
			
 
				+        # abx.pm.hook.on_actor_shutdown(self)
			
 
				+        
			
 
				+    def on_tick_start(self, obj: Snapshot) -> None:
			
 
				+        # print(f'🏃‍♂️ {self}.on_tick_start()', obj.abid or obj.id)
			
 
				+        # abx.pm.hook.on_actor_tick_start(self, obj_to_process)
			
 
				+        # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix='      ')
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_end(self, obj: Snapshot) -> None:
			
 
				+        # print(f'🏃‍♂️ {self}.on_tick_end()', obj.abid or obj.id)
			
 
				+        # abx.pm.hook.on_actor_tick_end(self, obj_to_process)
			
 
				+        # self.timer.end()
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_exception(self, obj: Snapshot, err: BaseException) -> None:
			
 
				+        print(f'[red]🏃‍♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err)
			
 
				+        # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err)
			
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -8,7 +8,7 @@ from django.utils.html import format_html, mark_safe
 
				 from django.core.exceptions import ValidationError
			
 
				 from django.urls import reverse, resolve
			
 
				 from django.utils import timezone
			
 
				-from django.forms import forms
			
 
				+from django_jsonform.forms.fields import JSONFormField
			
 
				 
			
 
				 from huey_monitor.admin import TaskModel
			
 
				 
			
@@ -83,7 +83,7 @@ class ArchiveResultInline(admin.TabularInline):
 
				         formset.form.base_fields['cmd_version'].initial = '-'
			
 
				         formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
			
 
				         formset.form.base_fields['created_by'].initial = request.user
			
 
				-        formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
			
 
				+        formset.form.base_fields['cmd'] = JSONFormField(initial=['-'])
			
 
				         formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
			
 
				         
			
 
				         if obj is not None:
			
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
 
				 
			
 
				 from django.contrib import admin
			
 
				 
			
 
				-import abx.django.use
			
 
				+import archivebox
			
 
				 
			
 
				 class ArchiveBoxAdmin(admin.AdminSite):
			
 
				     site_header = 'ArchiveBox'
			
@@ -37,6 +37,6 @@ def register_admin_site():
 
				     sites.site = archivebox_admin
			
 
				     
			
 
				     # register all plugins admin classes
			
 
				-    abx.django.use.register_admin(archivebox_admin)
			
 
				+    archivebox.pm.hook.register_admin(admin_site=archivebox_admin)
			
 
				     
			
 
				     return archivebox_admin
			
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
 
				 
			
 
				 from django.apps import AppConfig
			
 
				 
			
 
				-import abx
			
 
				+import archivebox
			
 
				 
			
 
				 
			
 
				 class CoreConfig(AppConfig):
			
@@ -10,16 +10,11 @@ class CoreConfig(AppConfig):
 
				 
			
 
				     def ready(self):
			
 
				         """Register the archivebox.core.admin_site as the main django admin site"""
			
 
				+        from django.conf import settings
			
 
				+        archivebox.pm.hook.ready(settings=settings)
			
 
				+        
			
 
				         from core.admin_site import register_admin_site
			
 
				         register_admin_site()
			
 
				         
			
 
				-        abx.pm.hook.ready()
			
 
				-
			
 
				-
			
 
				 
			
 
				 
			
 
				[email protected]
			
 
				-def register_admin(admin_site):
			
 
				-    """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
			
 
				-    from core.admin import register_admin
			
 
				-    register_admin(admin_site)
			
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -8,21 +8,25 @@ import os
 
				 import json
			
 
				 
			
 
				 from pathlib import Path
			
 
				+from datetime import timedelta
			
 
				 
			
 
				 from django.db import models
			
 
				 from django.utils.functional import cached_property
			
 
				 from django.utils.text import slugify
			
 
				+from django.utils import timezone
			
 
				 from django.core.cache import cache
			
 
				 from django.urls import reverse, reverse_lazy
			
 
				 from django.db.models import Case, When, Value, IntegerField
			
 
				 from django.contrib import admin
			
 
				 from django.conf import settings
			
 
				 
			
 
				+from statemachine.mixins import MachineMixin
			
 
				+
			
 
				 from archivebox.config import CONSTANTS
			
 
				 
			
 
				 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
			
 
				 from queues.tasks import bg_archive_snapshot
			
 
				-# from crawls.models import Crawl
			
 
				+from crawls.models import Crawl
			
 
				 # from machine.models import Machine, NetworkInterface
			
 
				 
			
 
				 from archivebox.misc.system import get_dir_size
			
@@ -152,7 +156,7 @@ class SnapshotManager(models.Manager):
 
				         return super().get_queryset().prefetch_related('tags', 'archiveresult_set')  # .annotate(archiveresult_count=models.Count('archiveresult')).distinct()
			
 
				 
			
 
				 
			
 
				-class Snapshot(ABIDModel):
			
 
				+class Snapshot(ABIDModel, MachineMixin):
			
 
				     abid_prefix = 'snp_'
			
 
				     abid_ts_src = 'self.created_at'
			
 
				     abid_uri_src = 'self.url'
			
@@ -160,6 +164,17 @@ class Snapshot(ABIDModel):
 
				     abid_rand_src = 'self.id'
			
 
				     abid_drift_allowed = True
			
 
				 
			
 
				+    state_field_name = 'status'
			
 
				+    state_machine_name = 'core.statemachines.SnapshotMachine'
			
 
				+    state_machine_attr = 'sm'
			
 
				+    
			
 
				+    class SnapshotStatus(models.TextChoices):
			
 
				+        QUEUED = 'queued', 'Queued'
			
 
				+        STARTED = 'started', 'Started'
			
 
				+        SEALED = 'sealed', 'Sealed'
			
 
				+        
			
 
				+    status = models.CharField(max_length=15, default=SnapshotStatus.QUEUED, null=False, blank=False)
			
 
				+
			
 
				     id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				     abid = ABIDField(prefix=abid_prefix)
			
 
				 
			
@@ -171,7 +186,7 @@ class Snapshot(ABIDModel):
 
				     bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True)
			
 
				     downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True)
			
 
				 
			
 
				-    # crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set')
			
 
				+    crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set')
			
 
				 
			
 
				     url = models.URLField(unique=True, db_index=True)
			
 
				     timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
			
@@ -396,6 +411,25 @@ class Snapshot(ABIDModel):
 
				                 tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
			
 
				         self.tags.clear()
			
 
				         self.tags.add(*tags_id)
			
 
				+        
			
 
				+    def has_pending_archiveresults(self) -> bool:
			
 
				+        pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED]
			
 
				+        pending_archiveresults = self.archiveresult_set.filter(status__in=pending_statuses)
			
 
				+        return pending_archiveresults.exists()
			
 
				+    
			
 
				+    def create_pending_archiveresults(self) -> list['ArchiveResult']:
			
 
				+        archiveresults = []
			
 
				+        for extractor in EXTRACTORS:
			
 
				+            archiveresult, _created = ArchiveResult.objects.get_or_create(
			
 
				+                snapshot=self,
			
 
				+                extractor=extractor,
			
 
				+                status=ArchiveResult.ArchiveResultStatus.QUEUED,
			
 
				+            )
			
 
				+            archiveresults.append(archiveresult)
			
 
				+        return archiveresults
			
 
				+    
			
 
				+    def bump_retry_at(self, seconds: int = 10):
			
 
				+        self.retry_at = timezone.now() + timedelta(seconds=seconds)
			
 
				 
			
 
				 
			
 
				     # def get_storage_dir(self, create=True, symlink=True) -> Path:
			
@@ -452,6 +486,20 @@ class ArchiveResult(ABIDModel):
 
				     abid_subtype_src = 'self.extractor'
			
 
				     abid_rand_src = 'self.id'
			
 
				     abid_drift_allowed = True
			
 
				+    
			
 
				+    state_field_name = 'status'
			
 
				+    state_machine_name = 'core.statemachines.ArchiveResultMachine'
			
 
				+    state_machine_attr = 'sm'
			
 
				+
			
 
				+    class ArchiveResultStatus(models.TextChoices):
			
 
				+        QUEUED = 'queued', 'Queued'
			
 
				+        STARTED = 'started', 'Started'
			
 
				+        SUCCEEDED = 'succeeded', 'Succeeded'
			
 
				+        FAILED = 'failed', 'Failed'
			
 
				+        SKIPPED = 'skipped', 'Skipped'
			
 
				+        BACKOFF = 'backoff', 'Waiting to retry'
			
 
				+        
			
 
				+    status = models.CharField(max_length=15, choices=ArchiveResultStatus.choices, default=ArchiveResultStatus.QUEUED, null=False, blank=False)
			
 
				 
			
 
				     EXTRACTOR_CHOICES = (
			
 
				         ('htmltotext', 'htmltotext'),
			
@@ -469,11 +517,7 @@ class ArchiveResult(ABIDModel):
 
				         ('title', 'title'),
			
 
				         ('wget', 'wget'),
			
 
				     )
			
 
				-    STATUS_CHOICES = [
			
 
				-        ("succeeded", "succeeded"),
			
 
				-        ("failed", "failed"),
			
 
				-        ("skipped", "skipped")
			
 
				-    ]
			
 
				+
			
 
				 
			
 
				     id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				     abid = ABIDField(prefix=abid_prefix)
			
@@ -491,7 +535,6 @@ class ArchiveResult(ABIDModel):
 
				     output = models.CharField(max_length=1024)
			
 
				     start_ts = models.DateTimeField(db_index=True)
			
 
				     end_ts = models.DateTimeField()
			
 
				-    status = models.CharField(max_length=16, choices=STATUS_CHOICES)
			
 
				 
			
 
				     # the network interface that was used to download this result
			
 
				     # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
			
@@ -552,7 +595,15 @@ class ArchiveResult(ABIDModel):
 
				         return link.canonical_outputs().get(f'{self.extractor}_path')
			
 
				 
			
 
				     def output_exists(self) -> bool:
			
 
				-        return os.access(self.output_path(), os.R_OK)
			
 
				+        return os.path.exists(self.output_path())
			
 
				+    
			
 
				+    def bump_retry_at(self, seconds: int = 10):
			
 
				+        self.retry_at = timezone.now() + timedelta(seconds=seconds)
			
 
				+        
			
 
				+    def create_output_dir(self):
			
 
				+        snap_dir = self.snapshot_dir
			
 
				+        snap_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        return snap_dir / self.output_path()
			
 
				 
			
 
				 
			
 
				     # def get_storage_dir(self, create=True, symlink=True):
			
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -9,13 +9,12 @@ from pathlib import Path
 
				 from django.utils.crypto import get_random_string
			
 
				 
			
 
				 import abx
			
 
				-import abx.archivebox
			
 
				-import abx.archivebox.reads
			
 
				-import abx.django.use
			
 
				+import archivebox
			
 
				 
			
 
				-from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
			
 
				+from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS  # noqa
			
 
				 from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG      # noqa
			
 
				 
			
 
				+
			
 
				 IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
			
 
				 IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
			
 
				 IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
			
@@ -25,45 +24,8 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v
 
				 ### ArchiveBox Plugin Settings
			
 
				 ################################################################################
			
 
				 
			
 
				-PLUGIN_HOOKSPECS = [
			
 
				-    'abx.django.hookspec',
			
 
				-    'abx.pydantic_pkgr.hookspec',
			
 
				-    'abx.archivebox.hookspec',
			
 
				-]
			
 
				-abx.register_hookspecs(PLUGIN_HOOKSPECS)
			
 
				-
			
 
				-BUILTIN_PLUGIN_DIRS = {
			
 
				-    'archivebox':              PACKAGE_DIR,
			
 
				-    'plugins_pkg':             PACKAGE_DIR / 'plugins_pkg',
			
 
				-    'plugins_auth':            PACKAGE_DIR / 'plugins_auth',
			
 
				-    'plugins_search':          PACKAGE_DIR / 'plugins_search',
			
 
				-    'plugins_extractor':       PACKAGE_DIR / 'plugins_extractor',
			
 
				-}
			
 
				-USER_PLUGIN_DIRS = {
			
 
				-    # 'user_plugins':            DATA_DIR / 'user_plugins',
			
 
				-}
			
 
				-
			
 
				-# Discover ArchiveBox plugins
			
 
				-BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
			
 
				-PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
			
 
				-USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
			
 
				-ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
			
 
				-
			
 
				-# Load ArchiveBox plugins
			
 
				-PLUGIN_MANAGER = abx.pm
			
 
				-abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
			
 
				-PLUGINS = abx.archivebox.reads.get_PLUGINS()
			
 
				-
			
 
				-# Load ArchiveBox config from plugins
			
 
				-CONFIGS = abx.archivebox.reads.get_CONFIGS()
			
 
				-CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
			
 
				-BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
			
 
				-BINARIES = abx.archivebox.reads.get_BINARIES()
			
 
				-EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
			
 
				-SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
			
 
				-# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
			
 
				-# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
			
 
				-
			
 
				+ALL_PLUGINS = archivebox.ALL_PLUGINS
			
 
				+LOADED_PLUGINS = archivebox.LOADED_PLUGINS
			
 
				 
			
 
				 ################################################################################
			
 
				 ### Django Core Settings
			
@@ -102,7 +64,8 @@ INSTALLED_APPS = [
 
				     # 'abid_utils',                # handles ABID ID creation, handling, and models
			
 
				     'config',                    # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 
			
 
				     'machine',                   # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
			
 
				-    'queues',                    # handles starting and managing background workers and processes
			
 
				+    'actors',                    # handles starting and managing background workers and processes (orchestrators and actors)
			
 
				+    'queues',                    # handles starting and managing background workers and processes (supervisord)
			
 
				     'seeds',                     # handles Seed model and URL source management
			
 
				     'crawls',                    # handles Crawl and CrawlSchedule models and management
			
 
				     'personas',                  # handles Persona and session management
			
@@ -110,7 +73,7 @@ INSTALLED_APPS = [
 
				     'api',                       # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
			
 
				 
			
 
				     # ArchiveBox plugins
			
 
				-    *abx.django.use.get_INSTALLED_APPS(),  # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
			
 
				+    *abx.as_list(abx.pm.hook.get_INSTALLED_APPS()),  # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
			
 
				 
			
 
				     # 3rd-party apps from PyPI that need to be loaded last
			
 
				     'admin_data_views',          # handles rendering some convenient automatic read-only views of data in Django admin
			
@@ -125,6 +88,7 @@ INSTALLED_APPS = [
 
				 
			
 
				 
			
 
				 
			
 
				+
			
 
				 MIDDLEWARE = [
			
 
				     'core.middleware.TimezoneMiddleware',
			
 
				     'django.middleware.security.SecurityMiddleware',
			
@@ -135,7 +99,7 @@ MIDDLEWARE = [
 
				     'core.middleware.ReverseProxyAuthMiddleware',
			
 
				     'django.contrib.messages.middleware.MessageMiddleware',
			
 
				     'core.middleware.CacheControlMiddleware',
			
 
				-    *abx.django.use.get_MIDDLEWARES(),
			
 
				+    *abx.as_list(abx.pm.hook.get_MIDDLEWARES()),
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -148,7 +112,7 @@ MIDDLEWARE = [
 
				 AUTHENTICATION_BACKENDS = [
			
 
				     'django.contrib.auth.backends.RemoteUserBackend',
			
 
				     'django.contrib.auth.backends.ModelBackend',
			
 
				-    *abx.django.use.get_AUTHENTICATION_BACKENDS(),
			
 
				+    *abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()),
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -169,7 +133,7 @@ AUTHENTICATION_BACKENDS = [
 
				 
			
 
				 STATIC_URL = '/static/'
			
 
				 TEMPLATES_DIR_NAME = 'templates'
			
 
				-CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir()
			
 
				+CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK)
			
 
				 STATICFILES_DIRS = [
			
 
				     *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []),
			
 
				     # *[
			
@@ -177,7 +141,7 @@ STATICFILES_DIRS = [
 
				     #     for plugin_dir in PLUGIN_DIRS.values()
			
 
				     #     if (plugin_dir / 'static').is_dir()
			
 
				     # ],
			
 
				-    *abx.django.use.get_STATICFILES_DIRS(),
			
 
				+    *abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()),
			
 
				     str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
			
 
				 ]
			
 
				 
			
@@ -188,7 +152,7 @@ TEMPLATE_DIRS = [
 
				     #     for plugin_dir in PLUGIN_DIRS.values()
			
 
				     #     if (plugin_dir / 'templates').is_dir()
			
 
				     # ],
			
 
				-    *abx.django.use.get_TEMPLATE_DIRS(),
			
 
				+    *abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()),
			
 
				     str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
			
 
				     str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
			
 
				     str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
			
@@ -228,7 +192,7 @@ SQLITE_CONNECTION_OPTIONS = {
 
				         # https://gcollazo.com/optimal-sqlite-settings-for-django/
			
 
				         # https://litestream.io/tips/#busy-timeout
			
 
				         # https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options
			
 
				-        "timeout": 5,
			
 
				+        "timeout": 10,
			
 
				         "check_same_thread": False,
			
 
				         "transaction_mode": "IMMEDIATE",
			
 
				         "init_command": (
			
@@ -267,7 +231,7 @@ if not IS_GETTING_VERSION_OR_HELP:             # dont create queue.sqlite3 file
 
				     HUEY = {
			
 
				         "huey_class": "huey.SqliteHuey",
			
 
				         "filename": CONSTANTS.QUEUE_DATABASE_FILENAME,
			
 
				-        "name": "system_tasks",
			
 
				+        "name": "commands",
			
 
				         "results": True,
			
 
				         "store_none": True,
			
 
				         "immediate": False,
			
@@ -288,11 +252,11 @@ if not IS_GETTING_VERSION_OR_HELP:             # dont create queue.sqlite3 file
 
				     # https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
			
 
				     # https://github.com/gaiacoop/django-huey
			
 
				     DJANGO_HUEY = {
			
 
				-        "default": "system_tasks",
			
 
				+        "default": "commands",
			
 
				         "queues": {
			
 
				             HUEY["name"]: HUEY.copy(),
			
 
				             # more registered here at plugin import-time by BaseQueue.register()
			
 
				-            **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME),
			
 
				+            **abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)),
			
 
				         },
			
 
				     }
			
 
				 
			
@@ -517,7 +481,7 @@ ADMIN_DATA_VIEWS = {
 
				                 "name": "log",
			
 
				             },
			
 
				         },
			
 
				-        *abx.django.use.get_ADMIN_DATA_VIEWS_URLS(),
			
 
				+        *abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()),
			
 
				     ],
			
 
				 }
			
 
				 
			
@@ -611,7 +575,4 @@ if DEBUG_REQUESTS_TRACKER:
 
				 # JET_TOKEN = 'some-api-token-here'
			
 
				 
			
 
				 
			
 
				-abx.django.use.register_checks()
			
 
				-# abx.archivebox.reads.register_all_hooks(globals())
			
 
				-
			
 
				 # import ipdb; ipdb.set_trace()
			
--- a/archivebox/core/settings_logging.py
+++ b/archivebox/core/settings_logging.py
@@ -163,11 +163,6 @@ SETTINGS_LOGGING = {
 
				             "level": "DEBUG",
			
 
				             "propagate": False,
			
 
				         },
			
 
				-        "plugins_extractor": {
			
 
				-            "handlers": ["default", "logfile"],
			
 
				-            "level": "DEBUG",
			
 
				-            "propagate": False,
			
 
				-        },
			
 
				         "httpx": {
			
 
				             "handlers": ["outbound_webhooks"],
			
 
				             "level": "INFO",
			
--- a/archivebox/core/statemachines.py
+++ b/archivebox/core/statemachines.py
@@ -0,0 +1,115 @@
 
				+__package__ = 'archivebox.snapshots'
			
 
				+
			
 
				+from django.utils import timezone
			
 
				+
			
 
				+from statemachine import State, StateMachine
			
 
				+
			
 
				+from core.models import Snapshot, ArchiveResult
			
 
				+
			
 
				+# State Machine Definitions
			
 
				+#################################################
			
 
				+
			
 
				+
			
 
				+class SnapshotMachine(StateMachine, strict_states=True):
			
 
				+    """State machine for managing Snapshot lifecycle."""
			
 
				+    
			
 
				+    model: Snapshot
			
 
				+    
			
 
				+    # States
			
 
				+    queued = State(value=Snapshot.SnapshotStatus.QUEUED, initial=True)
			
 
				+    started = State(value=Snapshot.SnapshotStatus.STARTED)
			
 
				+    sealed = State(value=Snapshot.SnapshotStatus.SEALED, final=True)
			
 
				+    
			
 
				+    # Tick Event
			
 
				+    tick = (
			
 
				+        queued.to.itself(unless='can_start', internal=True) |
			
 
				+        queued.to(started, cond='can_start') |
			
 
				+        started.to.itself(unless='is_finished', internal=True) |
			
 
				+        started.to(sealed, cond='is_finished')
			
 
				+    )
			
 
				+    
			
 
				+    def __init__(self, snapshot, *args, **kwargs):
			
 
				+        self.snapshot = snapshot
			
 
				+        super().__init__(snapshot, *args, **kwargs)
			
 
				+        
			
 
				+    def can_start(self) -> bool:
			
 
				+        return self.snapshot.seed and self.snapshot.seed.uri
			
 
				+        
			
 
				+    def is_finished(self) -> bool:
			
 
				+        return not self.snapshot.has_pending_archiveresults()
			
 
				+        
			
 
				+    def on_started(self):
			
 
				+        self.snapshot.create_pending_archiveresults()
			
 
				+        self.snapshot.bump_retry_at(seconds=60)
			
 
				+        self.snapshot.save()
			
 
				+        
			
 
				+    def on_sealed(self):
			
 
				+        self.snapshot.retry_at = None
			
 
				+        self.snapshot.save()
			
 
				+
			
 
				+class ArchiveResultMachine(StateMachine, strict_states=True):
			
 
				+    """State machine for managing ArchiveResult lifecycle."""
			
 
				+    
			
 
				+    model: ArchiveResult
			
 
				+    
			
 
				+    # States
			
 
				+    queued = State(value=ArchiveResult.ArchiveResultStatus.QUEUED, initial=True)
			
 
				+    started = State(value=ArchiveResult.ArchiveResultStatus.STARTED)
			
 
				+    backoff = State(value=ArchiveResult.ArchiveResultStatus.BACKOFF)
			
 
				+    succeeded = State(value=ArchiveResult.ArchiveResultStatus.SUCCEEDED, final=True)
			
 
				+    failed = State(value=ArchiveResult.ArchiveResultStatus.FAILED, final=True)
			
 
				+    
			
 
				+    # Tick Event
			
 
				+    tick = (
			
 
				+        queued.to.itself(unless='can_start', internal=True) |
			
 
				+        queued.to(started, cond='can_start') |
			
 
				+        started.to.itself(unless='is_finished', internal=True) |
			
 
				+        started.to(succeeded, cond='is_succeeded') |
			
 
				+        started.to(failed, cond='is_failed') |
			
 
				+        started.to(backoff, cond='is_backoff') |
			
 
				+        backoff.to.itself(unless='can_start', internal=True) |
			
 
				+        backoff.to(started, cond='can_start') |
			
 
				+        backoff.to(succeeded, cond='is_succeeded') |
			
 
				+        backoff.to(failed, cond='is_failed')
			
 
				+    )
			
 
				+
			
 
				+    def __init__(self, archiveresult, *args, **kwargs):
			
 
				+        self.archiveresult = archiveresult
			
 
				+        super().__init__(archiveresult, *args, **kwargs)
			
 
				+        
			
 
				+    def can_start(self) -> bool:
			
 
				+        return self.archiveresult.snapshot and self.archiveresult.snapshot.is_started()
			
 
				+    
			
 
				+    def is_succeeded(self) -> bool:
			
 
				+        return self.archiveresult.output_exists()
			
 
				+    
			
 
				+    def is_failed(self) -> bool:
			
 
				+        return not self.archiveresult.output_exists()
			
 
				+    
			
 
				+    def is_backoff(self) -> bool:
			
 
				+        return self.archiveresult.status == ArchiveResult.ArchiveResultStatus.BACKOFF
			
 
				+
			
 
				+    def on_started(self):
			
 
				+        self.archiveresult.start_ts = timezone.now()
			
 
				+        self.archiveresult.create_output_dir()
			
 
				+        self.archiveresult.bump_retry_at(seconds=60)
			
 
				+        self.archiveresult.save()
			
 
				+
			
 
				+    def on_backoff(self):
			
 
				+        self.archiveresult.bump_retry_at(seconds=60)
			
 
				+        self.archiveresult.save()
			
 
				+
			
 
				+    def on_succeeded(self):
			
 
				+        self.archiveresult.end_ts = timezone.now()
			
 
				+        self.archiveresult.save()
			
 
				+
			
 
				+    def on_failed(self):
			
 
				+        self.archiveresult.end_ts = timezone.now()
			
 
				+        self.archiveresult.save()
			
 
				+        
			
 
				+    def after_transition(self, event: str, source: State, target: State):
			
 
				+        print(f"after '{event}' from '{source.id}' to '{target.id}'")
			
 
				+        # self.archiveresult.save_merkle_index()
			
 
				+        # self.archiveresult.save_html_index()
			
 
				+        # self.archiveresult.save_json_index()
			
 
				+        return "after_transition"
			
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -12,7 +12,6 @@ from django.views import View
 
				 from django.views.generic.list import ListView
			
 
				 from django.views.generic import FormView
			
 
				 from django.db.models import Q
			
 
				-from django.conf import settings
			
 
				 from django.contrib import messages
			
 
				 from django.contrib.auth.mixins import UserPassesTestMixin
			
 
				 from django.views.decorators.csrf import csrf_exempt
			
@@ -21,6 +20,7 @@ from django.utils.decorators import method_decorator
 
				 from admin_data_views.typing import TableContext, ItemContext
			
 
				 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
			
 
				 
			
 
				+import archivebox
			
 
				 
			
 
				 from core.models import Snapshot
			
 
				 from core.forms import AddLinkForm
			
@@ -32,9 +32,8 @@ from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
 
				 from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
			
 
				 from archivebox.misc.serve_static import serve_static_with_byterange_support
			
 
				 
			
 
				-from ..plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
			
 
				-from ..logging_util import printable_filesize
			
 
				-from ..search import query_search_index
			
 
				+from archivebox.logging_util import printable_filesize
			
 
				+from archivebox.search import query_search_index
			
 
				 
			
 
				 
			
 
				 class HomepageView(View):
			
@@ -69,7 +68,7 @@ class SnapshotView(View):
 
				                 and embed_path
			
 
				                 and os.access(abs_path, os.R_OK)
			
 
				                 and abs_path.exists()):
			
 
				-                if abs_path.is_dir() and not any(abs_path.glob('*.*')):
			
 
				+                if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')):
			
 
				                     continue
			
 
				 
			
 
				                 result_info = {
			
@@ -103,7 +102,7 @@ class SnapshotView(View):
 
				 
			
 
				         # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
			
 
				         snap_dir = Path(snapshot.link_dir)
			
 
				-        assert os.access(snap_dir, os.R_OK) and os.access(snap_dir, os.X_OK)
			
 
				+        assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK)
			
 
				         
			
 
				         for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
			
 
				             extension = result_file.suffix.lstrip('.').lower()
			
@@ -154,7 +153,7 @@ class SnapshotView(View):
 
				             'status_color': 'success' if link.is_archived else 'danger',
			
 
				             'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
			
 
				             'warc_path': warc_path,
			
 
				-            'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
			
 
				+            'SAVE_ARCHIVE_DOT_ORG': archivebox.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG,
			
 
				             'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
			
 
				             'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
			
 
				             'best_result': best_result,
			
@@ -500,21 +499,25 @@ class HealthCheckView(View):
 
				 
			
 
				 
			
 
				 def find_config_section(key: str) -> str:
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				+    
			
 
				     if key in CONSTANTS_CONFIG:
			
 
				         return 'CONSTANT'
			
 
				     matching_sections = [
			
 
				-        section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields
			
 
				+        section_id for section_id, section in CONFIGS.items() if key in section.model_fields
			
 
				     ]
			
 
				     section = matching_sections[0] if matching_sections else 'DYNAMIC'
			
 
				     return section
			
 
				 
			
 
				 def find_config_default(key: str) -> str:
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				+    
			
 
				     if key in CONSTANTS_CONFIG:
			
 
				         return str(CONSTANTS_CONFIG[key])
			
 
				     
			
 
				     default_val = None
			
 
				 
			
 
				-    for config in settings.CONFIGS.values():
			
 
				+    for config in CONFIGS.values():
			
 
				         if key in config.model_fields:
			
 
				             default_val = config.model_fields[key].default
			
 
				             break
			
@@ -530,7 +533,9 @@ def find_config_default(key: str) -> str:
 
				     return default_val
			
 
				 
			
 
				 def find_config_type(key: str) -> str:
			
 
				-    for config in settings.CONFIGS.values():
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				+    
			
 
				+    for config in CONFIGS.values():
			
 
				         if hasattr(config, key):
			
 
				             type_hints = get_type_hints(config)
			
 
				             try:
			
@@ -547,7 +552,8 @@ def key_is_safe(key: str) -> bool:
 
				 
			
 
				 @render_with_table_view
			
 
				 def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
			
 
				-
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				+    
			
 
				     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
			
 
				 
			
 
				     rows = {
			
@@ -560,7 +566,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
				         # "Aliases": [],
			
 
				     }
			
 
				 
			
 
				-    for section_id, section in reversed(list(settings.CONFIGS.items())):
			
 
				+    for section_id, section in reversed(list(CONFIGS.items())):
			
 
				         for key, field in section.model_fields.items():
			
 
				             rows['Section'].append(section_id)   # section.replace('_', ' ').title().replace(' Config', '')
			
 
				             rows['Key'].append(ItemLink(key, key=key))
			
@@ -570,7 +576,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
				             # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
			
 
				             # rows['Aliases'].append(', '.join(find_config_aliases(key)))
			
 
				 
			
 
				-   
			
 
				     section = 'CONSTANT'
			
 
				     for key in CONSTANTS_CONFIG.keys():
			
 
				         rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
			
@@ -589,7 +594,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
				 
			
 
				 @render_with_item_view
			
 
				 def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
			
 
				-
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				+    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
			
 
				+    
			
 
				     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
			
 
				 
			
 
				     # aliases = USER_CONFIG.get(key, {}).get("aliases", [])
			
@@ -597,7 +604,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
 
				 
			
 
				     if key in CONSTANTS_CONFIG:
			
 
				         section_header = mark_safe(f'[CONSTANTS]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, hardcoded by ArchiveBox)</small>')
			
 
				-    elif key in settings.FLAT_CONFIG:
			
 
				+    elif key in FLAT_CONFIG:
			
 
				         section_header = mark_safe(f'data / ArchiveBox.conf &nbsp; [{find_config_section(key)}]  &nbsp; <b><code style="color: lightgray">{key}</code></b>')
			
 
				     else:
			
 
				         section_header = mark_safe(f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, calculated at runtime)</small>')
			
@@ -613,7 +620,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
 
				                 "fields": {
			
 
				                     'Key': key,
			
 
				                     'Type': find_config_type(key),
			
 
				-                    'Value': settings.FLAT_CONFIG.get(key, settings.CONFIGS.get(key, None)) if key_is_safe(key) else '********',
			
 
				+                    'Value': FLAT_CONFIG.get(key, CONFIGS.get(key, None)) if key_is_safe(key) else '********',
			
 
				                 },
			
 
				                 "help_texts": {
			
 
				                     'Key': mark_safe(f'''
			
@@ -635,13 +642,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
 
				                             <code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
			
 
				                         </a>
			
 
				                         <br/><br/>
			
 
				-                        <p style="display: {"block" if key in settings.FLAT_CONFIG else "none"}">
			
 
				+                        <p style="display: {"block" if key in FLAT_CONFIG else "none"}">
			
 
				                             <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
			
 
				                             <br/><br/>
			
 
				                             <code>archivebox config --set {key}="{
			
 
				                                 val.strip("'")
			
 
				                                 if (val := find_config_default(key)) else
			
 
				-                                (repr(settings.FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
			
 
				+                                (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
			
 
				                             }"</code>
			
 
				                         </p>
			
 
				                     '''),
			
--- a/archivebox/crawls/actors.py
+++ b/archivebox/crawls/actors.py
@@ -0,0 +1,69 @@
 
				+__package__ = 'archivebox.crawls'
			
 
				+
			
 
				+from typing import ClassVar
			
 
				+
			
 
				+from rich import print
			
 
				+
			
 
				+from django.db.models import QuerySet
			
 
				+
			
 
				+from crawls.models import Crawl
			
 
				+
			
 
				+from actors.actor import ActorType
			
 
				+
			
 
				+
			
 
				+class CrawlActor(ActorType[Crawl]):
			
 
				+    
			
 
				+    QUERYSET: ClassVar[QuerySet] = Crawl.objects.filter(status='queued')
			
 
				+    CLAIM_WHERE: ClassVar[str] = 'status = "queued"'  # the WHERE clause to filter the objects when atomically getting the next object from the queue
			
 
				+    CLAIM_SET: ClassVar[str] = 'status = "started"'   # the SET clause to claim the object when atomically getting the next object from the queue
			
 
				+    CLAIM_ORDER: ClassVar[str] = 'created_at DESC'    # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue
			
 
				+    CLAIM_FROM_TOP: ClassVar[int] = 50                # the number of objects to consider when atomically getting the next object from the queue
			
 
				+    
			
 
				+    # model_type: Type[ModelType]
			
 
				+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 4               # min 2, max 8, up to 60% of available cpu cores
			
 
				+    MAX_TICK_TIME: ClassVar[int] = 60                          # maximum duration in seconds to process a single object
			
 
				+    
			
 
				+    def claim_sql_where(self) -> str:
			
 
				+        """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """
			
 
				+        return self.CLAIM_WHERE
			
 
				+    
			
 
				+    def claim_sql_set(self) -> str:
			
 
				+        """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """
			
 
				+        return self.CLAIM_SET
			
 
				+    
			
 
				+    def claim_sql_order(self) -> str:
			
 
				+        """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """
			
 
				+        return self.CLAIM_ORDER
			
 
				+    
			
 
				+    def claim_from_top(self) -> int:
			
 
				+        """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue"""
			
 
				+        return self.CLAIM_FROM_TOP
			
 
				+        
			
 
				+    def tick(self, obj: Crawl) -> None:
			
 
				+        """override this to process the object"""
			
 
				+        print(f'[blue]🏃‍♂️ {self}.tick()[/blue]', obj.abid or obj.id)
			
 
				+        # For example:
			
 
				+        # do_some_task(obj)
			
 
				+        # do_something_else(obj)
			
 
				+        # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success')
			
 
				+        # raise NotImplementedError('tick() must be implemented by the Actor subclass')
			
 
				+    
			
 
				+    def on_shutdown(self, err: BaseException | None=None) -> None:
			
 
				+        print(f'[grey53]🏃‍♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
			
 
				+        # abx.pm.hook.on_actor_shutdown(self)
			
 
				+        
			
 
				+    def on_tick_start(self, obj: Crawl) -> None:
			
 
				+        # print(f'🏃‍♂️ {self}.on_tick_start()', obj.abid or obj.id)
			
 
				+        # abx.pm.hook.on_actor_tick_start(self, obj_to_process)
			
 
				+        # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix='      ')
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_end(self, obj: Crawl) -> None:
			
 
				+        # print(f'🏃‍♂️ {self}.on_tick_end()', obj.abid or obj.id)
			
 
				+        # abx.pm.hook.on_actor_tick_end(self, obj_to_process)
			
 
				+        # self.timer.end()
			
 
				+        pass
			
 
				+    
			
 
				+    def on_tick_exception(self, obj: Crawl, err: BaseException) -> None:
			
 
				+        print(f'[red]🏃‍♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err)
			
 
				+        # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err)
			
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -1,13 +1,20 @@
 
				 __package__ = 'archivebox.crawls'
			
 
				 
			
 
				+from typing import TYPE_CHECKING
			
 
				 from django_stubs_ext.db.models import TypedModelMeta
			
 
				 
			
 
				+from datetime import timedelta
			
 
				+
			
 
				 from django.db import models
			
 
				-from django.db.models import Q
			
 
				 from django.core.validators import MaxValueValidator, MinValueValidator 
			
 
				 from django.conf import settings
			
 
				-from django.utils import timezone
			
 
				 from django.urls import reverse_lazy
			
 
				+from django.utils import timezone
			
 
				+
			
 
				+from statemachine.mixins import MachineMixin
			
 
				+
			
 
				+if TYPE_CHECKING:
			
 
				+    from core.models import Snapshot
			
 
				 
			
 
				 from seeds.models import Seed
			
 
				 
			
@@ -41,8 +48,9 @@ class CrawlSchedule(ABIDModel, ModelWithHealthStats):
 
				         """The base crawl that each new scheduled job should copy as a template"""
			
 
				         return self.crawl_set.first()
			
 
				 
			
 
				+    
			
 
				 
			
 
				-class Crawl(ABIDModel, ModelWithHealthStats):
			
 
				+class Crawl(ABIDModel, ModelWithHealthStats, MachineMixin):
			
 
				     """
			
 
				     A single session of URLs to archive starting from a given Seed and expanding outwards. An "archiving session" so to speak.
			
 
				 
			
@@ -55,16 +63,29 @@ class Crawl(ABIDModel, ModelWithHealthStats):
 
				     abid_prefix = 'crl_'
			
 
				     abid_ts_src = 'self.created_at'
			
 
				     abid_uri_src = 'self.seed.uri'
			
 
				-    abid_subtype_src = 'self.persona_id'
			
 
				+    abid_subtype_src = 'self.persona'
			
 
				     abid_rand_src = 'self.id'
			
 
				     abid_drift_allowed = True
			
 
				+    
			
 
				+    state_field_name = 'status'
			
 
				+    state_machine_name = 'crawls.statemachines.CrawlMachine'
			
 
				+    state_machine_attr = 'sm'
			
 
				+    bind_events_as_methods = True
			
 
				+
			
 
				+    class CrawlStatus(models.TextChoices):
			
 
				+        QUEUED = 'queued', 'Queued'
			
 
				+        STARTED = 'started', 'Started'
			
 
				+        SEALED = 'sealed', 'Sealed'
			
 
				 
			
 
				+    status = models.CharField(choices=CrawlStatus.choices, max_length=15, default=CrawlStatus.QUEUED, null=False, blank=False)
			
 
				+    
			
 
				     id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				     abid = ABIDField(prefix=abid_prefix)
			
 
				 
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='crawl_set')
			
 
				     created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				+    
			
 
				 
			
 
				     seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False)
			
 
				     max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
			
@@ -79,7 +100,7 @@ class Crawl(ABIDModel, ModelWithHealthStats):
 
				     # schedule = models.JSONField()
			
 
				     # config = models.JSONField()
			
 
				     
			
 
				-    # snapshot_set: models.Manager['Snapshot']
			
 
				+    snapshot_set: models.Manager['Snapshot']
			
 
				     
			
 
				 
			
 
				     class Meta(TypedModelMeta):
			
@@ -102,6 +123,28 @@ class Crawl(ABIDModel, ModelWithHealthStats):
 
				     @property
			
 
				     def api_docs_url(self) -> str:
			
 
				         return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
			
 
				+    
			
 
				+    def has_pending_archiveresults(self) -> bool:
			
 
				+        from core.models import ArchiveResult
			
 
				+        
			
 
				+        pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED]
			
 
				+        
			
 
				+        snapshot_ids = self.snapshot_set.values_list('id', flat=True)
			
 
				+        pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, status__in=pending_statuses)
			
 
				+        return pending_archiveresults.exists()
			
 
				+    
			
 
				+    def create_root_snapshot(self) -> 'Snapshot':
			
 
				+        from core.models import Snapshot
			
 
				+        
			
 
				+        root_snapshot, _ = Snapshot.objects.get_or_create(
			
 
				+            crawl=self,
			
 
				+            url=self.seed.uri,
			
 
				+        )
			
 
				+        return root_snapshot
			
 
				+    
			
 
				+    def bump_retry_at(self, seconds: int = 10):
			
 
				+        self.retry_at = timezone.now() + timedelta(seconds=seconds)
			
 
				+        self.save()
			
 
				 
			
 
				 
			
 
				 class Outlink(models.Model):
			
--- a/archivebox/crawls/statemachines.py
+++ b/archivebox/crawls/statemachines.py
@@ -0,0 +1,48 @@
 
				+__package__ = 'archivebox.crawls'
			
 
				+
			
 
				+from statemachine import State, StateMachine
			
 
				+
			
 
				+from crawls.models import Crawl
			
 
				+
			
 
				+# State Machine Definitions
			
 
				+#################################################
			
 
				+
			
 
				+
			
 
				+class CrawlMachine(StateMachine, strict_states=True):
			
 
				+    """State machine for managing Crawl lifecycle."""
			
 
				+    
			
 
				+    model: Crawl
			
 
				+    
			
 
				+    # States
			
 
				+    queued = State(value=Crawl.CrawlStatus.QUEUED, initial=True)
			
 
				+    started = State(value=Crawl.CrawlStatus.STARTED)
			
 
				+    sealed = State(value=Crawl.CrawlStatus.SEALED, final=True)
			
 
				+    
			
 
				+    # Tick Event
			
 
				+    tick = (
			
 
				+        queued.to.itself(unless='can_start', internal=True) |
			
 
				+        queued.to(started, cond='can_start') |
			
 
				+        started.to.itself(unless='is_finished', internal=True) |
			
 
				+        started.to(sealed, cond='is_finished')
			
 
				+    )
			
 
				+    
			
 
				+    def __init__(self, crawl, *args, **kwargs):
			
 
				+        self.crawl = crawl
			
 
				+        super().__init__(crawl, *args, **kwargs)
			
 
				+        
			
 
				+    def can_start(self) -> bool:
			
 
				+        return self.crawl.seed and self.crawl.seed.uri
			
 
				+        
			
 
				+    def is_finished(self) -> bool:
			
 
				+        return not self.crawl.has_pending_archiveresults()
			
 
				+
			
 
				+
			
 
				+        
			
 
				+    def on_started(self):
			
 
				+        self.crawl.create_root_snapshot()
			
 
				+        self.crawl.bump_retry_at(seconds=10)
			
 
				+        self.crawl.save()
			
 
				+        
			
 
				+    def on_sealed(self):
			
 
				+        self.crawl.retry_at = None
			
 
				+        self.crawl.save()
			
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -27,43 +27,29 @@ from ..logging_util import (
 
				     log_archive_method_finished,
			
 
				 )
			
 
				 
			
 
				-from .title import should_save_title, save_title
			
 
				-from .favicon import should_save_favicon, save_favicon
			
 
				-from .wget import should_save_wget, save_wget
			
 
				-from .singlefile import should_save_singlefile, save_singlefile
			
 
				-from .readability import should_save_readability, save_readability
			
 
				-from .mercury import should_save_mercury, save_mercury
			
 
				-from .htmltotext import should_save_htmltotext, save_htmltotext
			
 
				-from .pdf import should_save_pdf, save_pdf
			
 
				-from .screenshot import should_save_screenshot, save_screenshot
			
 
				-from .dom import should_save_dom, save_dom
			
 
				-from .git import should_save_git, save_git
			
 
				-from .media import should_save_media, save_media
			
 
				-from .archive_org import should_save_archive_dot_org, save_archive_dot_org
			
 
				-from .headers import should_save_headers, save_headers
			
 
				-
			
 
				 
			
 
				 ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool]
			
 
				 SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult]
			
 
				 ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction]
			
 
				 
			
 
				 def get_default_archive_methods() -> List[ArchiveMethodEntry]:
			
 
				+    # TODO: move to abx.pm.hook.get_EXTRACTORS()
			
 
				     return [
			
 
				-        ('favicon', should_save_favicon, save_favicon),
			
 
				-        ('headers', should_save_headers, save_headers),
			
 
				-        ('singlefile', should_save_singlefile, save_singlefile),
			
 
				-        ('pdf', should_save_pdf, save_pdf),
			
 
				-        ('screenshot', should_save_screenshot, save_screenshot),
			
 
				-        ('dom', should_save_dom, save_dom),
			
 
				-        ('wget', should_save_wget, save_wget),
			
 
				-        # keep title, readability, and htmltotext below wget and singlefile, as they depend on them
			
 
				-        ('title', should_save_title, save_title),
			
 
				-        ('readability', should_save_readability, save_readability),
			
 
				-        ('mercury', should_save_mercury, save_mercury),
			
 
				-        ('htmltotext', should_save_htmltotext, save_htmltotext),
			
 
				-        ('git', should_save_git, save_git),
			
 
				-        ('media', should_save_media, save_media),
			
 
				-        ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
			
 
				+        # ('favicon', should_save_favicon, save_favicon),
			
 
				+        # ('headers', should_save_headers, save_headers),
			
 
				+        # ('singlefile', should_save_singlefile, save_singlefile),
			
 
				+        # ('pdf', should_save_pdf, save_pdf),
			
 
				+        # ('screenshot', should_save_screenshot, save_screenshot),
			
 
				+        # ('dom', should_save_dom, save_dom),
			
 
				+        # ('wget', should_save_wget, save_wget),
			
 
				+        # # keep title, readability, and htmltotext below wget and singlefile, as they depend on them
			
 
				+        # ('title', should_save_title, save_title),
			
 
				+        # ('readability', should_save_readability, save_readability),
			
 
				+        # ('mercury', should_save_mercury, save_mercury),
			
 
				+        # ('htmltotext', should_save_htmltotext, save_htmltotext),
			
 
				+        # ('git', should_save_git, save_git),
			
 
				+        # ('media', should_save_media, save_media),
			
 
				+        # ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
			
 
				     ]
			
 
				 
			
 
				 ARCHIVE_METHODS_INDEXING_PRECEDENCE = [
			
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -8,6 +8,8 @@ from typing import List, Optional, Iterator, Mapping
 
				 from django.utils.html import format_html, mark_safe   # type: ignore
			
 
				 from django.core.cache import cache
			
 
				 
			
 
				+import abx
			
 
				+
			
 
				 from archivebox.misc.system import atomic_write
			
 
				 from archivebox.misc.util import (
			
 
				     enforce_types,
			
@@ -19,7 +21,6 @@ from archivebox.misc.util import (
 
				 from archivebox.config import CONSTANTS, DATA_DIR, VERSION
			
 
				 from archivebox.config.common import SERVER_CONFIG
			
 
				 from archivebox.config.version import get_COMMIT_HASH
			
 
				-from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
			
 
				 
			
 
				 from .schema import Link
			
 
				 from ..logging_util import printable_filesize
			
@@ -79,8 +80,10 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
 
				 
			
 
				 @enforce_types
			
 
				 def link_details_template(link: Link) -> str:
			
 
				-
			
 
				-    from ..extractors.wget import wget_output_path
			
 
				+    
			
 
				+    from abx_plugin_wget_extractor.wget import wget_output_path
			
 
				+    
			
 
				+    SAVE_ARCHIVE_DOT_ORG = abx.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG
			
 
				 
			
 
				     link_info = link._asdict(extended=True)
			
 
				 
			
@@ -102,7 +105,7 @@ def link_details_template(link: Link) -> str:
 
				         'status': 'archived' if link.is_archived else 'not yet archived',
			
 
				         'status_color': 'success' if link.is_archived else 'danger',
			
 
				         'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
			
 
				-        'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
			
 
				+        'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
			
 
				         'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
			
 
				     })
			
 
				 
			
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -8,6 +8,8 @@ from pathlib import Path
 
				 from datetime import datetime, timezone
			
 
				 from typing import List, Optional, Iterator, Any, Union
			
 
				 
			
 
				+import abx
			
 
				+
			
 
				 from archivebox.config import VERSION, DATA_DIR, CONSTANTS
			
 
				 from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
			
 
				 
			
@@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
 
				 
			
 
				 @enforce_types
			
 
				 def generate_json_index_from_links(links: List[Link], with_headers: bool):
			
 
				-    from django.conf import settings
			
 
				-    
			
 
				     MAIN_INDEX_HEADER = {
			
 
				         'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
			
 
				         'schema': 'archivebox.index.json',
			
@@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
 
				             'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
			
 
				             'source': 'https://github.com/ArchiveBox/ArchiveBox',
			
 
				             'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
			
 
				-            'dependencies': settings.BINARIES,
			
 
				+            'dependencies': dict(abx.pm.hook.get_BINARIES()),
			
 
				         },
			
 
				     }
			
 
				     
			
 
				-    
			
 
				     if with_headers:
			
 
				         output = {
			
 
				             **MAIN_INDEX_HEADER,
			
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -17,9 +17,9 @@ from dataclasses import dataclass, asdict, field, fields
 
				 
			
 
				 from django.utils.functional import cached_property
			
 
				 
			
 
				-from archivebox.config import ARCHIVE_DIR, CONSTANTS
			
 
				+import abx
			
 
				 
			
 
				-from plugins_extractor.favicon.config import FAVICON_CONFIG
			
 
				+from archivebox.config import ARCHIVE_DIR, CONSTANTS
			
 
				 
			
 
				 from archivebox.misc.system import get_dir_size
			
 
				 from archivebox.misc.util import ts_to_date_str, parse_date
			
@@ -426,7 +426,10 @@ class Link:
 
				     def canonical_outputs(self) -> Dict[str, Optional[str]]:
			
 
				         """predict the expected output paths that should be present after archiving"""
			
 
				 
			
 
				-        from ..extractors.wget import wget_output_path
			
 
				+        from abx_plugin_wget.wget import wget_output_path
			
 
				+        
			
 
				+        FAVICON_CONFIG = abx.pm.hook.get_CONFIGS().favicon
			
 
				+        
			
 
				         # TODO: banish this awful duplication from the codebase and import these
			
 
				         # from their respective extractor files
			
 
				         canonical = {
			
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -8,9 +8,10 @@ from django.db import models
 
				 from django.utils import timezone
			
 
				 from django.utils.functional import cached_property
			
 
				 
			
 
				-import abx.archivebox.reads
			
 
				+import abx
			
 
				+import archivebox
			
 
				 
			
 
				-from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
			
 
				+from pydantic_pkgr import Binary, BinProvider
			
 
				 from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
			
 
				 
			
 
				 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
			
@@ -180,7 +181,7 @@ class NetworkInterface(ABIDModel, ModelWithHealthStats):
 
				 
			
 
				 
			
 
				 class InstalledBinaryManager(models.Manager):
			
 
				-    def get_from_db_or_cache(self, binary: BaseBinary) -> 'InstalledBinary':
			
 
				+    def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary':
			
 
				         """Get or create an InstalledBinary record for a Binary on the local machine"""
			
 
				         
			
 
				         global _CURRENT_BINARIES
			
@@ -216,7 +217,7 @@ class InstalledBinaryManager(models.Manager):
 
				             # if binary was not yet loaded from filesystem, do it now
			
 
				             # this is expensive, we have to find it's abspath, version, and sha256, but it's necessary
			
 
				             # to make sure we have a good, up-to-date record of it in the DB & in-memroy cache
			
 
				-            binary = binary.load(fresh=True)
			
 
				+            binary = archivebox.pm.hook.binary_load(binary=binary, fresh=True)
			
 
				 
			
 
				         assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
			
 
				         
			
@@ -291,8 +292,8 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
 
				         if not hasattr(self, 'machine'):
			
 
				             self.machine = Machine.objects.current()
			
 
				         if not self.binprovider:
			
 
				-            all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values())
			
 
				-            binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True)
			
 
				+            all_known_binproviders = list(abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values())
			
 
				+            binary = archivebox.pm.hook.binary_load(binary=Binary(name=self.name, binproviders=all_known_binproviders), fresh=True)
			
 
				             self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
			
 
				         if not self.abspath:
			
 
				             self.abspath = self.BINPROVIDER.get_abspath(self.name)
			
@@ -304,16 +305,16 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
 
				         super().clean(*args, **kwargs)
			
 
				 
			
 
				     @cached_property
			
 
				-    def BINARY(self) -> BaseBinary:
			
 
				-        for binary in abx.archivebox.reads.get_BINARIES().values():
			
 
				+    def BINARY(self) -> Binary:
			
 
				+        for binary in abx.as_dict(archivebox.pm.hook.get_BINARIES()).values():
			
 
				             if binary.name == self.name:
			
 
				                 return binary
			
 
				         raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
			
 
				         # TODO: we could technically reconstruct it from scratch, but why would we ever want to do that?
			
 
				 
			
 
				     @cached_property
			
 
				-    def BINPROVIDER(self) -> BaseBinProvider:
			
 
				-        for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values():
			
 
				+    def BINPROVIDER(self) -> BinProvider:
			
 
				+        for binprovider in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values():
			
 
				             if binprovider.name == self.binprovider:
			
 
				                 return binprovider
			
 
				         raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
			
@@ -321,7 +322,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
 
				     # maybe not a good idea to provide this? Binary in DB is a record of the binary's config
			
 
				     # whereas a loaded binary is a not-yet saved instance that may not have the same config
			
 
				     # why would we want to load a binary record from the db when it could be freshly loaded?
			
 
				-    def load_from_db(self) -> BaseBinary:
			
 
				+    def load_from_db(self) -> Binary:
			
 
				         # TODO: implement defaults arg in pydantic_pkgr
			
 
				         # return self.BINARY.load(defaults={
			
 
				         #     'binprovider': self.BINPROVIDER,
			
@@ -330,7 +331,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
 
				         #     'sha256': self.sha256,
			
 
				         # })
			
 
				         
			
 
				-        return BaseBinary.model_validate({
			
 
				+        return Binary.model_validate({
			
 
				             **self.BINARY.model_dump(),
			
 
				             'abspath': self.abspath and Path(self.abspath),
			
 
				             'version': self.version,
			
@@ -340,5 +341,5 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
 
				             'overrides': self.BINARY.overrides,
			
 
				         })
			
 
				 
			
 
				-    def load_fresh(self) -> BaseBinary:
			
 
				-        return self.BINARY.load(fresh=True)
			
 
				+    def load_fresh(self) -> Binary:
			
 
				+        return archivebox.pm.hook.binary_load(binary=self.BINARY, fresh=True)
			
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -14,6 +14,10 @@ from crontab import CronTab, CronSlices
 
				 from django.db.models import QuerySet
			
 
				 from django.utils import timezone
			
 
				 
			
 
				+from pydantic_pkgr import Binary
			
 
				+
			
 
				+import abx
			
 
				+import archivebox
			
 
				 from archivebox.misc.checks import check_data_folder
			
 
				 from archivebox.misc.util import enforce_types                         # type: ignore
			
 
				 from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
			
@@ -22,7 +26,7 @@ from archivebox.misc.logging import stderr, hint
 
				 from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR
			
 
				 from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
			
 
				 from archivebox.config.permissions import SudoPermission, IN_DOCKER
			
 
				-from archivebox.config.configfile import (
			
 
				+from archivebox.config.collection import (
			
 
				     write_config_file,
			
 
				     load_all_config,
			
 
				     get_real_name,
			
@@ -195,15 +199,13 @@ def version(quiet: bool=False,
 
				     console = Console()
			
 
				     prnt = console.print
			
 
				     
			
 
				-    from django.conf import settings
			
 
				-    
			
 
				-    from abx.archivebox.base_binary import BaseBinary, apt, brew, env
			
 
				+    from abx_plugin_default_binproviders import apt, brew, env
			
 
				     
			
 
				     from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
			
 
				     from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
			
 
				     from archivebox.config.paths import get_data_locations, get_code_locations
			
 
				     
			
 
				-    from plugins_auth.ldap.config import LDAP_CONFIG
			
 
				+    LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED
			
 
				 
			
 
				 
			
 
				     # 0.7.1
			
@@ -242,7 +244,7 @@ def version(quiet: bool=False,
 
				         f'SUDO={CONSTANTS.IS_ROOT}',
			
 
				         f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
			
 
				         f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
			
 
				-        f'LDAP={LDAP_CONFIG.LDAP_ENABLED}',
			
 
				+        f'LDAP={LDAP_ENABLED}',
			
 
				         #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
			
 
				     )
			
 
				     prnt()
			
@@ -264,7 +266,8 @@ def version(quiet: bool=False,
 
				 
			
 
				     prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
			
 
				     failures = []
			
 
				-    for name, binary in list(settings.BINARIES.items()):
			
 
				+    BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
			
 
				+    for name, binary in list(BINARIES.items()):
			
 
				         if binary.name == 'archivebox':
			
 
				             continue
			
 
				         
			
@@ -295,14 +298,15 @@ def version(quiet: bool=False,
 
				             
			
 
				     prnt()
			
 
				     prnt('[gold3][i] Package Managers:[/gold3]')
			
 
				-    for name, binprovider in list(settings.BINPROVIDERS.items()):
			
 
				+    BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
			
 
				+    for name, binprovider in list(BINPROVIDERS.items()):
			
 
				         err = None
			
 
				         
			
 
				         if binproviders and binprovider.name not in binproviders:
			
 
				             continue
			
 
				         
			
 
				         # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
			
 
				-        loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
			
 
				+        loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
			
 
				         
			
 
				         abspath = None
			
 
				         if loaded_bin.abspath:
			
@@ -1050,9 +1054,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
				     #    - recommend user re-run with sudo if any deps need to be installed as root
			
 
				 
			
 
				     from rich import print
			
 
				-    from django.conf import settings
			
 
				     
			
 
				-    from archivebox import CONSTANTS
			
 
				     from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
			
 
				     from archivebox.config.paths import get_or_create_working_lib_dir
			
 
				 
			
@@ -1075,11 +1077,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
				     
			
 
				     package_manager_names = ', '.join(
			
 
				         f'[yellow]{binprovider.name}[/yellow]'
			
 
				-        for binprovider in list(settings.BINPROVIDERS.values())
			
 
				+        for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
			
 
				         if not binproviders or (binproviders and binprovider.name in binproviders)
			
 
				     )
			
 
				     print(f'[+] Setting up package managers {package_manager_names}...')
			
 
				-    for binprovider in list(settings.BINPROVIDERS.values()):
			
 
				+    for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
			
 
				         if binproviders and binprovider.name not in binproviders:
			
 
				             continue
			
 
				         try:
			
@@ -1092,7 +1094,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
				     
			
 
				     print()
			
 
				     
			
 
				-    for binary in list(settings.BINARIES.values()):
			
 
				+    for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
			
 
				         if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
			
 
				             # obviously must already be installed if we are running
			
 
				             continue
			
@@ -1122,7 +1124,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
				                                 result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
			
 
				                                 sys.stderr.write("\033[00m\n")     # reset
			
 
				                             else:
			
 
				-                                result = binary.load_or_install(binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
			
 
				+                                loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
			
 
				+                                result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
			
 
				                             if result and result['loaded_version']:
			
 
				                                 break
			
 
				                         except Exception as e:
			
@@ -1133,7 +1136,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
				                         binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
			
 
				                         sys.stderr.write("\033[00m\n")  # reset
			
 
				                     else:
			
 
				-                        binary.load_or_install(fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
			
 
				+                        loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
			
 
				+                        result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
			
 
				             if IS_ROOT and LIB_DIR:
			
 
				                 with SudoPermission(uid=0):
			
 
				                     if ARCHIVEBOX_USER == 0:
			
@@ -1157,7 +1161,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
 
				     
			
 
				     print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
			
 
				     
			
 
				-    from plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
			
 
				+    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
			
 
				     
			
 
				     extra_args = []
			
 
				     if binproviders:
			
@@ -1183,8 +1187,6 @@ def config(config_options_str: Optional[str]=None,
 
				            out_dir: Path=DATA_DIR) -> None:
			
 
				     """Get and set your ArchiveBox project configuration values"""
			
 
				 
			
 
				-    import abx.archivebox.reads
			
 
				-
			
 
				     from rich import print
			
 
				 
			
 
				     check_data_folder()
			
@@ -1198,7 +1200,8 @@ def config(config_options_str: Optional[str]=None,
 
				     elif config_options_str:
			
 
				         config_options = config_options_str.split('\n')
			
 
				 
			
 
				-    from django.conf import settings
			
 
				+    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
			
 
				+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
			
 
				     
			
 
				     config_options = config_options or []
			
 
				 
			
@@ -1208,8 +1211,8 @@ def config(config_options_str: Optional[str]=None,
 
				     if search:
			
 
				         if config_options:
			
 
				             config_options = [get_real_name(key) for key in config_options]
			
 
				-            matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
			
 
				-            for config_section in settings.CONFIGS.values():
			
 
				+            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
			
 
				+            for config_section in CONFIGS.values():
			
 
				                 aliases = config_section.aliases
			
 
				                 
			
 
				                 for search_key in config_options:
			
@@ -1228,15 +1231,15 @@ def config(config_options_str: Optional[str]=None,
 
				     elif get or no_args:
			
 
				         if config_options:
			
 
				             config_options = [get_real_name(key) for key in config_options]
			
 
				-            matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
			
 
				-            failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG]
			
 
				+            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
			
 
				+            failed_config = [key for key in config_options if key not in FLAT_CONFIG]
			
 
				             if failed_config:
			
 
				                 stderr()
			
 
				                 stderr('[X] These options failed to get', color='red')
			
 
				                 stderr('    {}'.format('\n    '.join(config_options)))
			
 
				                 raise SystemExit(1)
			
 
				         else:
			
 
				-            matching_config = settings.FLAT_CONFIG
			
 
				+            matching_config = FLAT_CONFIG
			
 
				         
			
 
				         print(printable_config(matching_config))
			
 
				         raise SystemExit(not matching_config)
			
@@ -1257,20 +1260,20 @@ def config(config_options_str: Optional[str]=None,
 
				             if key != raw_key:
			
 
				                 stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
			
 
				 
			
 
				-            if key in settings.FLAT_CONFIG:
			
 
				+            if key in FLAT_CONFIG:
			
 
				                 new_config[key] = val.strip()
			
 
				             else:
			
 
				                 failed_options.append(line)
			
 
				 
			
 
				         if new_config:
			
 
				-            before = settings.FLAT_CONFIG
			
 
				+            before = FLAT_CONFIG
			
 
				             matching_config = write_config_file(new_config)
			
 
				-            after = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
			
 
				+            after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
			
 
				             print(printable_config(matching_config))
			
 
				 
			
 
				             side_effect_changes = {}
			
 
				             for key, val in after.items():
			
 
				-                if key in settings.FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
			
 
				+                if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
			
 
				                     side_effect_changes[key] = after[key]
			
 
				                     # import ipdb; ipdb.set_trace()
			
 
				 
			
@@ -1312,7 +1315,7 @@ def schedule(add: bool=False,
 
				     """Set ArchiveBox to regularly import URLs at specific times using cron"""
			
 
				     
			
 
				     check_data_folder()
			
 
				-    from archivebox.plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
			
 
				+    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
			
 
				     from archivebox.config.permissions import USER
			
 
				 
			
 
				     Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
			
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -201,6 +201,7 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
 
				 
			
 
				 
			
 
				 def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
			
 
				+    import archivebox
			
 
				     from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
			
 
				     from archivebox.misc.logging import STDERR
			
 
				     from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
			
@@ -209,6 +210,8 @@ def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_ex
 
				     
			
 
				     lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
			
 
				     
			
 
				+    assert lib_dir == archivebox.pm.hook.get_LIB_DIR(), "lib_dir is not the same as the one in the flat config"
			
 
				+    
			
 
				     if not must_exist and not os.path.isdir(lib_dir):
			
 
				         return True
			
 
				     
			
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@@ -23,7 +23,7 @@ from archivebox import CONSTANTS           # noqa
 
				 from ..main import *                       # noqa
			
 
				 from ..cli import CLI_SUBCOMMANDS
			
 
				 
			
 
				-CONFIG = settings.FLAT_CONFIG
			
 
				+CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
			
 
				 CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
			
 
				 
			
 
				 if __name__ == '__main__':
			
@@ -55,6 +55,5 @@ if __name__ == '__main__':
 
				     prnt('    add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink]                                                                        [grey53]# add ? after anything to get help[/]')
			
 
				     prnt('    add("https://example.com/some/new/url")                                     [grey53]# call CLI methods from the shell[/]')
			
 
				     prnt('    snap = Snapshot.objects.filter(url__contains="https://example.com").last()  [grey53]# query for individual snapshots[/]')
			
 
				-    prnt('    archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id)      [grey53]# call an extractor directly[/]')
			
 
				     prnt('    snap.archiveresult_set.all()                                                [grey53]# see extractor results[/]')
			
 
				     prnt('    bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]')
			
--- a/archivebox/misc/util.py
+++ b/archivebox/misc/util.py
@@ -5,7 +5,7 @@ import requests
 
				 import json as pyjson
			
 
				 import http.cookiejar
			
 
				 
			
 
				-from typing import List, Optional, Any
			
 
				+from typing import List, Optional, Any, Callable
			
 
				 from pathlib import Path
			
 
				 from inspect import signature
			
 
				 from functools import wraps
			
@@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
 
				 from base32_crockford import encode as base32_encode                            # type: ignore
			
 
				 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
			
 
				 try:
			
 
				-    import chardet
			
 
				+    import chardet    # type:ignore
			
 
				     detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
			
 
				 except ImportError:
			
 
				     detect_encoding = lambda rawdata: "utf-8"
			
 
				 
			
 
				 
			
 
				-from archivebox.config import CONSTANTS
			
 
				-from archivebox.config.common import ARCHIVING_CONFIG
			
 
				+from archivebox.config.constants import CONSTANTS
			
 
				 
			
 
				 from .logging import COLOR_DICT
			
 
				 
			
@@ -126,6 +125,7 @@ def is_static_file(url: str):
 
				 def enforce_types(func):
			
 
				     """
			
 
				     Enforce function arg and kwarg types at runtime using its python3 type hints
			
 
				+    Simpler version of pydantic @validate_call decorator
			
 
				     """
			
 
				     # TODO: check return type as well
			
 
				 
			
@@ -186,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
 
				 
			
 
				 
			
 
				 @enforce_types
			
 
				-def parse_date(date: Any) -> Optional[datetime]:
			
 
				+def parse_date(date: Any) -> datetime:
			
 
				     """Parse unix timestamps, iso format, and human-readable strings"""
			
 
				     
			
 
				     if date is None:
			
 
				-        return None
			
 
				+        return None    # type: ignore
			
 
				 
			
 
				     if isinstance(date, datetime):
			
 
				         if date.tzinfo is None:
			
@@ -212,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
 
				 def download_url(url: str, timeout: int=None) -> str:
			
 
				     """Download the contents of a remote url and return the text"""
			
 
				 
			
 
				+    from archivebox.config.common import ARCHIVING_CONFIG
			
 
				+
			
 
				     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
			
 
				     session = requests.Session()
			
 
				 
			
@@ -241,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
 
				         return url.rsplit('/', 1)[-1]
			
 
				 
			
 
				 @enforce_types
			
 
				-def get_headers(url: str, timeout: int=None) -> str:
			
 
				+def get_headers(url: str, timeout: int | None=None) -> str:
			
 
				     """Download the contents of a remote url and return the headers"""
			
 
				+    # TODO: get rid of this and use an abx pluggy hook instead
			
 
				+    
			
 
				+    from archivebox.config.common import ARCHIVING_CONFIG
			
 
				+    
			
 
				     timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
			
 
				 
			
 
				     try:
			
@@ -283,6 +289,7 @@ def get_headers(url: str, timeout: int=None) -> str:
 
				 def ansi_to_html(text: str) -> str:
			
 
				     """
			
 
				     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
			
 
				+    Simple way to render colored CLI stdout/stderr in HTML properly, Textual/rich is probably better though.
			
 
				     """
			
 
				 
			
 
				     TEMPLATE = '<span style="color: rgb{}"><br>'
			
@@ -306,13 +313,13 @@ def ansi_to_html(text: str) -> str:
 
				 @enforce_types
			
 
				 def dedupe(options: List[str]) -> List[str]:
			
 
				     """
			
 
				-    Deduplicates the given options. Options that come later clobber earlier
			
 
				-    conflicting options.
			
 
				+    Deduplicates the given CLI args by key=value. Options that come later override earlier.
			
 
				     """
			
 
				     deduped = {}
			
 
				 
			
 
				     for option in options:
			
 
				-        deduped[option.split('=')[0]] = option
			
 
				+        key = option.split('=')[0]
			
 
				+        deduped[key] = option
			
 
				 
			
 
				     return list(deduped.values())
			
 
				 
			
@@ -344,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
 
				         
			
 
				         elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
			
 
				             return tuple(obj)
			
 
				+        
			
 
				+        elif isinstance(obj, Callable):
			
 
				+            return str(obj)
			
 
				 
			
 
				         return pyjson.JSONEncoder.default(self, obj)
			
 
				 
			
--- a/archivebox/parsers/generic_jsonl.py
+++ b/archivebox/parsers/generic_jsonl.py
@@ -1,14 +1,11 @@
 
				 __package__ = 'archivebox.parsers'
			
 
				 
			
 
				 import json
			
 
				-
			
 
				 from typing import IO, Iterable
			
 
				 
			
 
				-from ..index.schema import Link
			
 
				-from archivebox.misc.util import (
			
 
				-    enforce_types,
			
 
				-)
			
 
				+from archivebox.misc.util import enforce_types
			
 
				 
			
 
				+from ..index.schema import Link
			
 
				 from .generic_json import jsonObjectToLink
			
 
				 
			
 
				 def parse_line(line: str):
			
--- a/archivebox/parsers/pocket_api.py
+++ b/archivebox/parsers/pocket_api.py
@@ -6,8 +6,7 @@ import re
 
				 from typing import IO, Iterable, Optional
			
 
				 from configparser import ConfigParser
			
 
				 
			
 
				-from pocket import Pocket
			
 
				-
			
 
				+import archivebox
			
 
				 from archivebox.config import CONSTANTS
			
 
				 from archivebox.misc.util import enforce_types
			
 
				 from archivebox.misc.system import atomic_write
			
@@ -22,7 +21,7 @@ API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db'
 
				 _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
			
 
				 
			
 
				 
			
 
				-def get_pocket_articles(api: Pocket, since=None, page=0):
			
 
				+def get_pocket_articles(api, since=None, page=0):
			
 
				     body, headers = api.get(
			
 
				         state='archive',
			
 
				         sort='oldest',
			
@@ -94,7 +93,9 @@ def should_parse_as_pocket_api(text: str) -> bool:
 
				 def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
			
 
				     """Parse bookmarks from the Pocket API"""
			
 
				 
			
 
				-    from archivebox.plugins_extractor.pocket.config import POCKET_CONFIG
			
 
				+    from pocket import Pocket
			
 
				+
			
 
				+    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
			
 
				 
			
 
				     input_buffer.seek(0)
			
 
				     pattern = re.compile(r"^pocket:\/\/(\w+)")
			
@@ -102,7 +103,7 @@ def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
 
				         if should_parse_as_pocket_api(line):
			
 
				             
			
 
				             username = pattern.search(line).group(1)
			
 
				-            api = Pocket(POCKET_CONFIG.POCKET_CONSUMER_KEY, POCKET_CONFIG.POCKET_ACCESS_TOKENS[username])
			
 
				+            api = Pocket(FLAT_CONFIG.POCKET_CONSUMER_KEY, FLAT_CONFIG.POCKET_ACCESS_TOKENS[username])
			
 
				             api.last_since = None
			
 
				     
			
 
				             for article in get_pocket_articles(api, since=read_since(username)):
			
--- a/archivebox/parsers/readwise_reader_api.py
+++ b/archivebox/parsers/readwise_reader_api.py
@@ -8,9 +8,10 @@ from datetime import datetime
 
				 from typing import IO, Iterable, Optional
			
 
				 from configparser import ConfigParser
			
 
				 
			
 
				+import abx
			
 
				+
			
 
				 from archivebox.misc.util import enforce_types
			
 
				 from archivebox.misc.system import atomic_write
			
 
				-from archivebox.plugins_extractor.readwise.config import READWISE_CONFIG
			
 
				 
			
 
				 from ..index.schema import Link
			
 
				 
			
@@ -62,26 +63,30 @@ def link_from_article(article: dict, sources: list):
 
				 
			
 
				 
			
 
				 def write_cursor(username: str, since: str):
			
 
				-    if not READWISE_CONFIG.READWISE_DB_PATH.exists():
			
 
				-        atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
			
 
				+    READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
			
 
				+    
			
 
				+    if not READWISE_DB_PATH.exists():
			
 
				+        atomic_write(READWISE_DB_PATH, "")
			
 
				 
			
 
				     since_file = ConfigParser()
			
 
				     since_file.optionxform = str
			
 
				-    since_file.read(READWISE_CONFIG.READWISE_DB_PATH)
			
 
				+    since_file.read(READWISE_DB_PATH)
			
 
				 
			
 
				     since_file[username] = {"since": since}
			
 
				 
			
 
				-    with open(READWISE_CONFIG.READWISE_DB_PATH, "w+") as new:
			
 
				+    with open(READWISE_DB_PATH, "w+") as new:
			
 
				         since_file.write(new)
			
 
				 
			
 
				 
			
 
				 def read_cursor(username: str) -> Optional[str]:
			
 
				-    if not READWISE_CONFIG.READWISE_DB_PATH.exists():
			
 
				-        atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
			
 
				+    READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
			
 
				+    
			
 
				+    if not READWISE_DB_PATH.exists():
			
 
				+        atomic_write(READWISE_DB_PATH, "")
			
 
				 
			
 
				     config_file = ConfigParser()
			
 
				     config_file.optionxform = str
			
 
				-    config_file.read(READWISE_CONFIG.READWISE_DB_PATH)
			
 
				+    config_file.read(READWISE_DB_PATH)
			
 
				 
			
 
				     return config_file.get(username, "since", fallback=None)
			
 
				 
			
@@ -97,12 +102,14 @@ def should_parse_as_readwise_reader_api(text: str) -> bool:
 
				 def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
			
 
				     """Parse bookmarks from the Readwise Reader API"""
			
 
				 
			
 
				+    READWISE_READER_TOKENS = abx.pm.hook.get_CONFIG().READWISE_READER_TOKENS
			
 
				+
			
 
				     input_buffer.seek(0)
			
 
				     pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
			
 
				     for line in input_buffer:
			
 
				         if should_parse_as_readwise_reader_api(line):
			
 
				             username = pattern.search(line).group(1)
			
 
				-            api = ReadwiseReaderAPI(READWISE_CONFIG.READWISE_READER_TOKENS[username], cursor=read_cursor(username))
			
 
				+            api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
			
 
				 
			
 
				             for article in get_readwise_reader_articles(api):
			
 
				                 yield link_from_article(article, sources=[line])
			
--- a/archivebox/pkgs/__init__.py
+++ b/archivebox/pkgs/__init__.py
@@ -0,0 +1,39 @@
 
				+import sys
			
 
				+import importlib
			
 
				+from pathlib import Path
			
 
				+
			
 
				+PKGS_DIR = Path(__file__).parent
			
 
				+
			
 
				+VENDORED_PKGS = [
			
 
				+    'abx',
			
 
				+    # 'pydantic-pkgr',
			
 
				+]
			
 
				+
			
 
				+# scan ./pkgs and add all dirs present to list of available VENDORED_PKGS
			
 
				+for subdir in reversed(sorted(PKGS_DIR.iterdir())):
			
 
				+    if subdir.is_dir() and subdir.name not in VENDORED_PKGS and not subdir.name.startswith('_'):
			
 
				+        VENDORED_PKGS.append(subdir.name)
			
 
				+
			
 
				+
			
 
				+def load_vendored_pkgs():
			
 
				+    """Add archivebox/vendor to sys.path and import all vendored libraries present within"""
			
 
				+    if str(PKGS_DIR) not in sys.path:
			
 
				+        sys.path.append(str(PKGS_DIR))
			
 
				+    
			
 
				+    for pkg_name in VENDORED_PKGS:
			
 
				+        pkg_dir = PKGS_DIR / pkg_name
			
 
				+        assert pkg_dir.is_dir(), f'Required vendored pkg {pkg_name} could not be found in {pkg_dir}'
			
 
				+
			
 
				+        try:
			
 
				+            lib = importlib.import_module(pkg_name)
			
 
				+            # print(f"Successfully imported lib from environment {pkg_name}")
			
 
				+        except ImportError:
			
 
				+            sys.path.append(str(pkg_dir))
			
 
				+            try:
			
 
				+                lib = importlib.import_module(pkg_name)
			
 
				+                # print(f"Successfully imported lib from vendored fallback {pkg_name}: {inspect.getfile(lib)}")
			
 
				+            except ImportError as e:
			
 
				+                print(f"Failed to import lib from environment or vendored fallback {pkg_name}: {e}", file=sys.stderr)
			
 
				+                sys.exit(1)
			
 
				+        
			
 
				+
			
--- a/archivebox/pkgs/abx-plugin-archivedotorg/README.md
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/README.md
--- a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py
@@ -0,0 +1,21 @@
 
				+__label__ = 'Archive.org'
			
 
				+__homepage__ = 'https://archive.org'
			
 
				+
			
 
				+import abx
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from .config import ARCHIVEDOTORG_CONFIG
			
 
				+    
			
 
				+    return {
			
 
				+        'ARCHIVEDOTORG_CONFIG': ARCHIVEDOTORG_CONFIG
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# @abx.hookimpl
			
 
				+# def get_EXTRACTORS():
			
 
				+#     from .extractors import ARCHIVEDOTORG_EXTRACTOR
			
 
				+#
			
 
				+#     return {
			
 
				+#         'archivedotorg': ARCHIVEDOTORG_EXTRACTOR,
			
 
				+#     }
			
--- a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py
--- a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
@@ -1,7 +1,4 @@
 
				-__package__ = 'plugins_extractor.archivedotorg'
			
 
				-
			
 
				-
			
 
				-from abx.archivebox.base_configset import BaseConfigSet
			
 
				+from abx_spec_config.base_configset import BaseConfigSet
			
 
				 
			
 
				 
			
 
				 class ArchivedotorgConfig(BaseConfigSet):
			
--- a/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml
@@ -0,0 +1,18 @@
 
				+[project]
			
 
				+name = "abx-plugin-archivedotorg"
			
 
				+version = "2024.10.28"
			
 
				+description = "Add your description here"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.10"
			
 
				+dependencies = [
			
 
				+    "abx>=0.1.0",
			
 
				+    "abx-spec-config>=0.1.0",
			
 
				+    "abx-plugin-curl>=2024.10.24",
			
 
				+]
			
 
				+
			
 
				+[build-system]
			
 
				+requires = ["hatchling"]
			
 
				+build-backend = "hatchling.build"
			
 
				+
			
 
				+[project.entry-points.abx]
			
 
				+abx_plugin_archivedotorg = "abx_plugin_archivedotorg"
			
--- a/archivebox/pkgs/abx-plugin-chrome/README.md
+++ b/archivebox/pkgs/abx-plugin-chrome/README.md
--- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py
@@ -0,0 +1,34 @@
 
				+__label__ = 'Chrome'
			
 
				+__author__ = 'ArchiveBox'
			
 
				+
			
 
				+import abx
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from .config import CHROME_CONFIG
			
 
				+    
			
 
				+    return {
			
 
				+        'CHROME_CONFIG': CHROME_CONFIG
			
 
				+    }
			
 
				+
			
 
				[email protected]
			
 
				+def get_BINARIES():
			
 
				+    from .binaries import CHROME_BINARY
			
 
				+    
			
 
				+    return {
			
 
				+        'chrome': CHROME_BINARY,
			
 
				+    }
			
 
				+
			
 
				[email protected]
			
 
				+def ready():
			
 
				+    from .config import CHROME_CONFIG
			
 
				+    CHROME_CONFIG.validate()
			
 
				+
			
 
				+
			
 
				+# @abx.hookimpl
			
 
				+# def get_EXTRACTORS():
			
 
				+#     return {
			
 
				+#         'pdf': PDF_EXTRACTOR,
			
 
				+#         'screenshot': SCREENSHOT_EXTRACTOR,
			
 
				+#         'dom': DOM_EXTRACTOR,
			
 
				+#     }
			
--- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
@@ -1,5 +1,3 @@
 
				-__package__ = 'plugins_extractor.chrome'
			
 
				-
			
 
				 import os
			
 
				 import platform
			
 
				 from pathlib import Path
			
@@ -7,21 +5,22 @@ from typing import List, Optional
 
				 
			
 
				 from pydantic import InstanceOf
			
 
				 from pydantic_pkgr import (
			
 
				+    Binary,
			
 
				     BinProvider,
			
 
				     BinName,
			
 
				     BinaryOverrides,
			
 
				     bin_abspath,
			
 
				 )
			
 
				 
			
 
				-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
			
 
				+import abx
			
 
				 
			
 
				-# Depends on Other Plugins:
			
 
				-from archivebox.config.common import SHELL_CONFIG
			
 
				-from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
			
 
				-from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
			
 
				+from abx_plugin_default_binproviders import apt, brew, env
			
 
				+from abx_plugin_puppeteer.binproviders import PUPPETEER_BINPROVIDER
			
 
				+from abx_plugin_playwright.binproviders import PLAYWRIGHT_BINPROVIDER
			
 
				 
			
 
				 
			
 
				 from .config import CHROME_CONFIG
			
 
				+
			
 
				 CHROMIUM_BINARY_NAMES_LINUX = [
			
 
				     "chromium",
			
 
				     "chromium-browser",
			
@@ -48,12 +47,13 @@ CHROME_BINARY_NAMES_MACOS = [
 
				 ]
			
 
				 CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
			
 
				 
			
 
				-APT_DEPENDENCIES = [
			
 
				-    'apt-transport-https', 'at-spi2-common', 'chromium-browser',
			
 
				+CHROME_APT_DEPENDENCIES = [
			
 
				+    'apt-transport-https', 'at-spi2-common',
			
 
				     'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
			
 
				     'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
			
 
				     'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
			
 
				     'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
			
 
				+    'chromium-browser',
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -80,7 +80,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
 
				 ###################### Config ##########################
			
 
				 
			
 
				 
			
 
				-class ChromeBinary(BaseBinary):
			
 
				+class ChromeBinary(Binary):
			
 
				     name: BinName = CHROME_CONFIG.CHROME_BINARY
			
 
				     binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew]
			
 
				     
			
@@ -95,7 +95,7 @@ class ChromeBinary(BaseBinary):
 
				             'packages': ['chromium'],                   # playwright install chromium
			
 
				         },
			
 
				         apt.name: {
			
 
				-            'packages': APT_DEPENDENCIES,
			
 
				+            'packages': CHROME_APT_DEPENDENCIES,
			
 
				         },
			
 
				         brew.name: {
			
 
				             'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [],
			
@@ -104,10 +104,9 @@ class ChromeBinary(BaseBinary):
 
				 
			
 
				     @staticmethod
			
 
				     def symlink_to_lib(binary, bin_dir=None) -> None:
			
 
				-        from archivebox.config.common import STORAGE_CONFIG
			
 
				-        bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
			
 
				+        bin_dir = bin_dir or abx.pm.hook.get_BIN_DIR()
			
 
				         
			
 
				-        if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
			
 
				+        if not (binary.abspath and os.path.isfile(binary.abspath)):
			
 
				             return
			
 
				         
			
 
				         bin_dir.mkdir(parents=True, exist_ok=True)
			
@@ -121,7 +120,7 @@ class ChromeBinary(BaseBinary):
 
				                 # otherwise on linux we can symlink directly to binary executable
			
 
				                 symlink.unlink(missing_ok=True)
			
 
				                 symlink.symlink_to(binary.abspath)
			
 
				-        except Exception as err:
			
 
				+        except Exception:
			
 
				             # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
			
 
				             # not actually needed, we can just run without it
			
 
				             pass
			
@@ -132,14 +131,17 @@ class ChromeBinary(BaseBinary):
 
				         Cleans up any state or runtime files that chrome leaves behind when killed by
			
 
				         a timeout or other error
			
 
				         """
			
 
				-        lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
			
 
				-
			
 
				-        if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
			
 
				-            lock_file.unlink()
			
 
				+        try:
			
 
				+            linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
			
 
				+            linux_lock_file.unlink(missing_ok=True)
			
 
				+        except Exception:
			
 
				+            pass
			
 
				         
			
 
				         if CHROME_CONFIG.CHROME_USER_DATA_DIR:
			
 
				-            if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
			
 
				-                lock_file.unlink()
			
 
				+            try:
			
 
				+                (CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True)
			
 
				+            except Exception:
			
 
				+                pass
			
 
				 
			
 
				 
			
 
				 
			
--- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
@@ -1,5 +1,3 @@
 
				-__package__ = 'plugins_extractor.chrome'
			
 
				-
			
 
				 import os
			
 
				 from pathlib import Path
			
 
				 from typing import List, Optional
			
@@ -7,8 +5,8 @@ from typing import List, Optional
 
				 from pydantic import Field
			
 
				 from pydantic_pkgr import bin_abspath
			
 
				 
			
 
				-from abx.archivebox.base_configset import BaseConfigSet
			
 
				-from abx.archivebox.base_binary import env
			
 
				+from abx_spec_config.base_configset import BaseConfigSet
			
 
				+from abx_plugin_default_binproviders import env
			
 
				 
			
 
				 from archivebox.config import CONSTANTS
			
 
				 from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
			
@@ -81,15 +79,16 @@ class ChromeConfig(BaseConfigSet):
 
				     # Chrome Binary
			
 
				     CHROME_BINARY: str                      = Field(default='chrome')
			
 
				     CHROME_DEFAULT_ARGS: List[str]          = Field(default=[
			
 
				-        '--virtual-time-budget=15000',
			
 
				-        '--disable-features=DarkMode',
			
 
				-        "--run-all-compositor-stages-before-draw",
			
 
				-        "--hide-scrollbars",
			
 
				-        "--autoplay-policy=no-user-gesture-required",
			
 
				-        "--no-first-run",
			
 
				-        "--use-fake-ui-for-media-stream",
			
 
				-        "--use-fake-device-for-media-stream",
			
 
				-        "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'",
			
 
				+        "--no-first-run",                                              # dont show any first run ui / setup prompts
			
 
				+        '--virtual-time-budget=15000',                                 # accellerate any animations on the page by 15s into the future
			
 
				+        '--disable-features=DarkMode',                                 # disable dark mode for archiving
			
 
				+        "--run-all-compositor-stages-before-draw",                     # dont draw partially rendered content, wait until everything is ready
			
 
				+        "--hide-scrollbars",                                           # hide scrollbars to prevent layout shift / scrollbar visible in screenshots
			
 
				+        "--autoplay-policy=no-user-gesture-required",                  # allow media autoplay without user gesture (e.g. on mobile)
			
 
				+        "--use-fake-ui-for-media-stream",                              # provide fake camera if site tries to request camera access
			
 
				+        "--use-fake-device-for-media-stream",                          # provide fake camera if site tries to request camera access
			
 
				+        "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'",   # ignore chrome updates
			
 
				+        "--force-gpu-mem-available-mb=4096",                           # allows for longer full page screenshots https://github.com/puppeteer/puppeteer/issues/5530
			
 
				     ])
			
 
				     CHROME_EXTRA_ARGS: List[str]           = Field(default=[])
			
 
				     
			
@@ -196,6 +195,7 @@ class ChromeConfig(BaseConfigSet):
 
				             cmd_args.append('--user-data-dir={}'.format(options.CHROME_USER_DATA_DIR))
			
 
				             cmd_args.append('--profile-directory={}'.format(options.CHROME_PROFILE_NAME or 'Default'))
			
 
				         
			
 
				+            # if CHROME_USER_DATA_DIR is set but folder is empty, create a new profile inside it
			
 
				             if not os.path.isfile(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME / 'Preferences'):
			
 
				                 STDERR.print(f'[green]        + creating new Chrome profile in: {pretty_path(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME)}[/green]')
			
 
				                 cmd_args.remove('--no-first-run')
			
--- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py
--- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py
--- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py
--- a/archivebox/pkgs/abx-plugin-chrome/pyproject.toml
+++ b/archivebox/pkgs/abx-plugin-chrome/pyproject.toml
@@ -0,0 +1,18 @@
 
				+[project]
			
 
				+name = "abx-plugin-chrome"
			
 
				+version = "2024.10.28"
			
 
				+description = "Add your description here"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.10"
			
 
				+dependencies = [
			
 
				+    "abx>=0.1.0",
			
 
				+    "abx-spec-config>=0.1.0",
			
 
				+    "abx-spec-pydantic-pkgr>=0.1.0",
			
 
				+]
			
 
				+
			
 
				+[build-system]
			
 
				+requires = ["hatchling"]
			
 
				+build-backend = "hatchling.build"
			
 
				+
			
 
				+[project.entry-points.abx]
			
 
				+abx_plugin_chrome = "abx_plugin_chrome"
			
--- a/archivebox/pkgs/abx-plugin-curl/README.md
+++ b/archivebox/pkgs/abx-plugin-curl/README.md
--- a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py
@@ -0,0 +1,18 @@
 
				+import abx
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from .config import CURL_CONFIG
			
 
				+    
			
 
				+    return {
			
 
				+        'curl': CURL_CONFIG
			
 
				+    }
			
 
				+
			
 
				[email protected]
			
 
				+def get_BINARIES():
			
 
				+    from .binaries import CURL_BINARY
			
 
				+    
			
 
				+    return {
			
 
				+        'curl': CURL_BINARY,
			
 
				+    }
			
--- a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
@@ -1,17 +1,17 @@
 
				-__package__ = 'plugins_extractor.curl'
			
 
				+__package__ = 'abx_plugin_curl'
			
 
				 
			
 
				 from typing import List
			
 
				 
			
 
				 from pydantic import InstanceOf
			
 
				-from pydantic_pkgr import BinProvider, BinName
			
 
				+from pydantic_pkgr import BinProvider, BinName, Binary
			
 
				 
			
 
				-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
			
 
				+from abx_plugin_default_binproviders import apt, brew, env
			
 
				 
			
 
				 
			
 
				 from .config import CURL_CONFIG
			
 
				 
			
 
				 
			
 
				-class CurlBinary(BaseBinary):
			
 
				+class CurlBinary(Binary):
			
 
				     name: BinName = CURL_CONFIG.CURL_BINARY
			
 
				     binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
			
 
				 
			
--- a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py
@@ -1,11 +1,11 @@
 
				-__package__ = 'plugins_extractor.curl'
			
 
				+__package__ = 'abx_plugin_curl'
			
 
				 
			
 
				 from typing import List, Optional
			
 
				 from pathlib import Path
			
 
				 
			
 
				 from pydantic import Field
			
 
				 
			
 
				-from abx.archivebox.base_configset import BaseConfigSet
			
 
				+from abx_spec_config.base_configset import BaseConfigSet
			
 
				 
			
 
				 from archivebox.config.common import ARCHIVING_CONFIG
			
 
				 
			
--- a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py
--- a/archivebox/pkgs/abx-plugin-curl/pyproject.toml
+++ b/archivebox/pkgs/abx-plugin-curl/pyproject.toml
@@ -0,0 +1,18 @@
 
				+[project]
			
 
				+name = "abx-plugin-curl"
			
 
				+version = "2024.10.24"
			
 
				+description = "Add your description here"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.10"
			
 
				+dependencies = [
			
 
				+    "abx>=0.1.0",
			
 
				+    "abx-spec-config>=0.1.0",
			
 
				+    "abx-spec-pydantic-pkgr>=0.1.0",
			
 
				+]
			
 
				+
			
 
				+[build-system]
			
 
				+requires = ["hatchling"]
			
 
				+build-backend = "hatchling.build"
			
 
				+
			
 
				+[project.entry-points.abx]
			
 
				+abx_plugin_curl = "abx_plugin_curl"
			
--- a/archivebox/pkgs/abx-plugin-default-binproviders/README.md
+++ b/archivebox/pkgs/abx-plugin-default-binproviders/README.md
--- a/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
+++ b/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
@@ -0,0 +1,23 @@
 
				+
			
 
				+import abx
			
 
				+
			
 
				+from typing import Dict
			
 
				+
			
 
				+from pydantic_pkgr import (
			
 
				+    AptProvider,
			
 
				+    BrewProvider,
			
 
				+    EnvProvider,
			
 
				+    BinProvider,
			
 
				+)
			
 
				+apt = APT_BINPROVIDER = AptProvider()
			
 
				+brew = BREW_BINPROVIDER = BrewProvider()
			
 
				+env = ENV_BINPROVIDER = EnvProvider()
			
 
				+
			
 
				+
			
 
				[email protected](tryfirst=True)
			
 
				+def get_BINPROVIDERS() -> Dict[str, BinProvider]:
			
 
				+    return {
			
 
				+        'apt': APT_BINPROVIDER,
			
 
				+        'brew': BREW_BINPROVIDER,
			
 
				+        'env': ENV_BINPROVIDER,
			
 
				+    }
			
--- a/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml
+++ b/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml
@@ -0,0 +1,18 @@
 
				+[project]
			
 
				+name = "abx-plugin-default-binproviders"
			
 
				+version = "2024.10.24"
			
 
				+description = "Default BinProviders for ABX (apt, brew, env)"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.10"
			
 
				+dependencies = [
			
 
				+    "abx>=0.1.0",
			
 
				+    "pydantic-pkgr>=0.5.4",
			
 
				+    "abx-spec-pydantic-pkgr>=0.1.0",
			
 
				+]
			
 
				+
			
 
				+[build-system]
			
 
				+requires = ["hatchling"]
			
 
				+build-backend = "hatchling.build"
			
 
				+
			
 
				+[project.entry-points.abx]
			
 
				+abx_plugin_default_binproviders = "abx_plugin_default_binproviders"
			
--- a/archivebox/pkgs/abx-plugin-favicon/README.md
+++ b/archivebox/pkgs/abx-plugin-favicon/README.md
--- a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py
+++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py
@@ -0,0 +1,29 @@
 
				+__label__ = 'Favicon'
			
 
				+__version__ = '2024.10.24'
			
 
				+__author__ = 'ArchiveBox'
			
 
				+__homepage__ = 'https://github.com/ArchiveBox/archivebox'
			
 
				+__dependencies__ = [
			
 
				+    'abx>=0.1.0',
			
 
				+    'abx-spec-config>=0.1.0',
			
 
				+    'abx-plugin-curl-extractor>=2024.10.24',
			
 
				+]
			
 
				+
			
 
				+import abx
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from .config import FAVICON_CONFIG
			
 
				+    
			
 
				+    return {
			
 
				+        'FAVICON_CONFIG': FAVICON_CONFIG
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# @abx.hookimpl
			
 
				+# def get_EXTRACTORS():
			
 
				+#     from .extractors import FAVICON_EXTRACTOR
			
 
				+    
			
 
				+#     return {
			
 
				+#         'favicon': FAVICON_EXTRACTOR,
			
 
				+#     }
			
--- a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py
+++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py
@@ -1,7 +1,4 @@
 
				-__package__ = 'plugins_extractor.favicon'
			
 
				-
			
 
				-
			
 
				-from abx.archivebox.base_configset import BaseConfigSet
			
 
				+from abx_spec_config.base_configset import BaseConfigSet
			
 
				 
			
 
				 
			
 
				 class FaviconConfig(BaseConfigSet):
			
--- a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py
+++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py
--- a/archivebox/pkgs/abx-plugin-favicon/pyproject.toml
+++ b/archivebox/pkgs/abx-plugin-favicon/pyproject.toml
@@ -0,0 +1,18 @@
 
				+[project]
			
 
				+name = "abx-plugin-favicon"
			
 
				+version = "2024.10.28"
			
 
				+description = "Add your description here"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.10"
			
 
				+dependencies = [
			
 
				+    "abx>=0.1.0",
			
 
				+    "abx-spec-config>=0.1.0",
			
 
				+    "abx-plugin-curl>=2024.10.28",
			
 
				+]
			
 
				+
			
 
				+[build-system]
			
 
				+requires = ["hatchling"]
			
 
				+build-backend = "hatchling.build"
			
 
				+
			
 
				+[project.entry-points.abx]
			
 
				+abx_plugin_favicon = "abx_plugin_favicon"
			
--- a/archivebox/pkgs/abx-plugin-git/README.md
+++ b/archivebox/pkgs/abx-plugin-git/README.md
--- a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py
@@ -0,0 +1,29 @@
 
				+__package__ = 'abx_plugin_git'
			
 
				+__label__ = 'Git'
			
 
				+
			
 
				+import abx
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from .config import GIT_CONFIG
			
 
				+    
			
 
				+    return {
			
 
				+        'GIT_CONFIG': GIT_CONFIG
			
 
				+    }
			
 
				+
			
 
				[email protected]
			
 
				+def get_BINARIES():
			
 
				+    from .binaries import GIT_BINARY
			
 
				+    
			
 
				+    return {
			
 
				+        'git': GIT_BINARY,
			
 
				+    }
			
 
				+
			
 
				[email protected]
			
 
				+def get_EXTRACTORS():
			
 
				+    from .extractors import GIT_EXTRACTOR
			
 
				+    
			
 
				+    return {
			
 
				+        'git': GIT_EXTRACTOR,
			
 
				+    }
			
--- a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
@@ -1,17 +1,17 @@
 
				-__package__ = 'plugins_extractor.git'
			
 
				+__package__ = 'abx_plugin_git'
			
 
				 
			
 
				 from typing import List
			
 
				 
			
 
				 from pydantic import InstanceOf
			
 
				-from pydantic_pkgr import BinProvider, BinName
			
 
				+from pydantic_pkgr import BinProvider, BinName, Binary
			
 
				 
			
 
				-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
			
 
				+from abx_plugin_default_binproviders import apt, brew, env
			
 
				 
			
 
				 from .config import GIT_CONFIG
			
 
				 
			
 
				 
			
 
				 
			
 
				-class GitBinary(BaseBinary):
			
 
				+class GitBinary(Binary):
			
 
				     name: BinName = GIT_CONFIG.GIT_BINARY
			
 
				     binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
			
 
				 
			
--- a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py
@@ -1,10 +1,10 @@
 
				-__package__ = 'plugins_extractor.git'
			
 
				+__package__ = 'abx_plugin_git'
			
 
				 
			
 
				 from typing import List
			
 
				 
			
 
				 from pydantic import Field
			
 
				 
			
 
				-from abx.archivebox.base_configset import BaseConfigSet
			
 
				+from abx_spec_config.base_configset import BaseConfigSet
			
 
				 
			
 
				 from archivebox.config.common import ARCHIVING_CONFIG
			
 
				 
			
--- a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py
@@ -0,0 +1,15 @@
 
				+__package__ = 'abx_plugin_git'
			
 
				+
			
 
				+# from pathlib import Path
			
 
				+
			
 
				+# from .binaries import GIT_BINARY
			
 
				+
			
 
				+
			
 
				+# class GitExtractor(BaseExtractor):
			
 
				+#     name: ExtractorName = 'git'
			
 
				+#     binary: str = GIT_BINARY.name
			
 
				+
			
 
				+#     def get_output_path(self, snapshot) -> Path | None:
			
 
				+#         return snapshot.as_link() / 'git'
			
 
				+
			
 
				+# GIT_EXTRACTOR = GitExtractor()
			
--- a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py
@@ -16,8 +16,8 @@ from archivebox.misc.util import (
 
				 from ..logging_util import TimedProgress
			
 
				 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
			
 
				 
			
 
				-from archivebox.plugins_extractor.git.config import GIT_CONFIG
			
 
				-from archivebox.plugins_extractor.git.binaries import GIT_BINARY
			
 
				+from abx_plugin_git.config import GIT_CONFIG
			
 
				+from abx_plugin_git.binaries import GIT_BINARY
			
 
				 
			
 
				 
			
 
				 def get_output_path():
			
--- a/archivebox/pkgs/abx-plugin-git/pyproject.toml
+++ b/archivebox/pkgs/abx-plugin-git/pyproject.toml
@@ -0,0 +1,19 @@
 
				+[project]
			
 
				+name = "abx-plugin-git"
			
 
				+version = "2024.10.28"
			
 
				+description = "Add your description here"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.10"
			
 
				+dependencies = [
			
 
				+    "abx>=0.1.0",
			
 
				+    "abx-spec-config>=0.1.0",
			
 
				+    "abx-spec-pydantic-pkgr>=0.1.0",
			
 
				+    "abx-plugin-default-binproviders>=2024.10.24",
			
 
				+]
			
 
				+
			
 
				+[build-system]
			
 
				+requires = ["hatchling"]
			
 
				+build-backend = "hatchling.build"
			
 
				+
			
 
				+[project.entry-points.abx]
			
 
				+abx_plugin_git = "abx_plugin_git"
			
--- a/archivebox/pkgs/abx-plugin-htmltotext/README.md
+++ b/archivebox/pkgs/abx-plugin-htmltotext/README.md
--- a/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py
+++ b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py
@@ -0,0 +1,22 @@
 
				+__package__ = 'abx_plugin_htmltotext'
			
 
				+__label__ = 'HTML-to-Text'
			
 
				+
			
 
				+import abx
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def get_CONFIG():
			
 
				+    from .config import HTMLTOTEXT_CONFIG
			
 
				+    
			
 
				+    return {
			
 
				+        'HTMLTOTEXT_CONFIG': HTMLTOTEXT_CONFIG
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# @abx.hookimpl
			
 
				+# def get_EXTRACTORS():
			
 
				+#     from .extractors import FAVICON_EXTRACTOR
			
 
				+    
			
 
				+#     return {
			
 
				+#         'htmltotext': FAVICON_EXTRACTOR,
			
 
				+#     }