há 1 ano atrás · 328eb98a38
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -51,6 +51,7 @@ from .pkgs import load_vendored_pkgs             # noqa
 
															 load_vendored_pkgs()
														
 
															 # print('DONE LOADING VENDORED LIBRARIES')
														
 
															+# print('LOADING ABX PLUGIN SPECIFICATIONS')
														
 
															 # Load ABX Plugin Specifications + Default Implementations
														
 
															 import abx                                       # noqa
														
 
															 import abx_spec_archivebox                       # noqa
														
@@ -74,7 +75,7 @@ abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC())
 
															 # Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods
														
 
															 abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm)
														
 
															 pm = abx.pm
														
 
															-
														
 
															+# print('DONE LOADING ABX PLUGIN SPECIFICATIONS')
														
 
															 # Load all pip-installed ABX-compatible plugins
														
 
															 ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
														
@@ -94,7 +95,9 @@ USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins')
 
															 # Import all plugins and register them with ABX Plugin Manager
														
 
															 ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS}
														
 
															+# print('LOADING ALL PLUGINS')
														
 
															 LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS)
														
 
															+# print('DONE LOADING ALL PLUGINS')
														
 
															 # Setup basic config, constants, paths, and version
														
 
															 from .config.constants import CONSTANTS                         # noqa
														
--- a/archivebox/__main__.py
+++ b/archivebox/__main__.py
@@ -1,5 +1,5 @@
 
															 #!/usr/bin/env python3
														
 
															-"""This is the main entry point for the ArchiveBox CLI."""
														
 
															+"""This is the entrypoint for python -m archivebox ..."""
														
 
															 __package__ = 'archivebox'
														
 
															 import archivebox      # noqa # make sure monkey patches are applied before anything else
														
@@ -15,5 +15,4 @@ ASCII_LOGO_MINI = r"""
 
															  /_/   \_\_|  \___|_| |_|_| \_/ \___|____/ \___/_/\_\
														
 
															 """
														
 
															-if __name__ == '__main__':
														
 
															-    main(args=sys.argv[1:], stdin=sys.stdin)
														
 
															+main(args=sys.argv[1:], stdin=sys.stdin)
														
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@@ -6,13 +6,6 @@ from enum import Enum
 
															 from ninja import Router, Schema
														
 
															-from archivebox.main import (
														
 
															-    add,
														
 
															-    remove,
														
 
															-    update,
														
 
															-    list_all,
														
 
															-    schedule,
														
 
															-)
														
 
															 from archivebox.misc.util import ansi_to_html
														
 
															 from archivebox.config.common import ARCHIVING_CONFIG
														
@@ -60,13 +53,11 @@ class AddCommandSchema(Schema):
 
															     urls: List[str]
														
 
															     tag: str = ""
														
 
															     depth: int = 0
														
 
															+    parser: str = "auto"
														
 
															+    extract: str = ""
														
 
															     update: bool = not ARCHIVING_CONFIG.ONLY_NEW  # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
														
 
															-    update_all: bool = False
														
 
															-    index_only: bool = False
														
 
															     overwrite: bool = False
														
 
															-    init: bool = False
														
 
															-    extractors: str = ""
														
 
															-    parser: str = "auto"
														
 
															+    index_only: bool = False
														
 
															 class UpdateCommandSchema(Schema):
														
 
															     resume: Optional[float] = 0
														
@@ -93,7 +84,7 @@ class ScheduleCommandSchema(Schema):
 
															 class ListCommandSchema(Schema):
														
 
															     filter_patterns: Optional[List[str]] = ['https://example.com']
														
 
															     filter_type: str = FilterTypeChoices.substring
														
 
															-    status: Optional[StatusChoices] = StatusChoices.indexed
														
 
															+    status: StatusChoices = StatusChoices.indexed
														
 
															     after: Optional[float] = 0
														
 
															     before: Optional[float] = 999999999999999
														
 
															     sort: str = 'bookmarked_at'
														
@@ -115,16 +106,16 @@ class RemoveCommandSchema(Schema):
 
															 @router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
														
 
															 def cli_add(request, args: AddCommandSchema):
														
 
															+    from archivebox.cli.archivebox_add import add
														
 
															+    
														
 
															     result = add(
														
 
															         urls=args.urls,
														
 
															         tag=args.tag,
														
 
															         depth=args.depth,
														
 
															         update=args.update,
														
 
															-        update_all=args.update_all,
														
 
															         index_only=args.index_only,
														
 
															         overwrite=args.overwrite,
														
 
															-        init=args.init,
														
 
															-        extractors=args.extractors,
														
 
															+        extract=args.extract,
														
 
															         parser=args.parser,
														
 
															     )
														
@@ -139,6 +130,8 @@ def cli_add(request, args: AddCommandSchema):
 
															 @router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
														
 
															 def cli_update(request, args: UpdateCommandSchema):
														
 
															+    from archivebox.cli.archivebox_update import update
														
 
															+    
														
 
															     result = update(
														
 
															         resume=args.resume,
														
 
															         only_new=args.only_new,
														
@@ -162,6 +155,8 @@ def cli_update(request, args: UpdateCommandSchema):
 
															 @router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
														
 
															 def cli_schedule(request, args: ScheduleCommandSchema):
														
 
															+    from archivebox.cli.archivebox_schedule import schedule
														
 
															+    
														
 
															     result = schedule(
														
 
															         import_path=args.import_path,
														
 
															         add=args.add,
														
@@ -184,9 +179,11 @@ def cli_schedule(request, args: ScheduleCommandSchema):
 
															[email protected]("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns] (use this endpoint with ?filter_type=search to search for snapshots)')
														
 
															-def cli_list(request, args: ListCommandSchema):
														
 
															-    result = list_all(
														
 
															[email protected]("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
														
 
															+def cli_search(request, args: ListCommandSchema):
														
 
															+    from archivebox.cli.archivebox_search import search
														
 
															+    
														
 
															+    result = search(
														
 
															         filter_patterns=args.filter_patterns,
														
 
															         filter_type=args.filter_type,
														
 
															         status=args.status,
														
@@ -221,6 +218,8 @@ def cli_list(request, args: ListCommandSchema):
 
															 @router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
														
 
															 def cli_remove(request, args: RemoveCommandSchema):
														
 
															+    from archivebox.cli.archivebox_remove import remove
														
 
															+    
														
 
															     result = remove(
														
 
															         yes=True,            # no way to interactively ask for confirmation via API, so we force yes
														
 
															         delete=args.delete,
														
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -1,264 +1,117 @@
 
															 __package__ = 'archivebox.cli'
														
 
															 __command__ = 'archivebox'
														
 
															-
														
 
															 import os
														
 
															 import sys
														
 
															-import argparse
														
 
															-import threading
														
 
															-
														
 
															-from time import sleep
														
 
															-from collections.abc import Mapping
														
 
															+from importlib import import_module
														
 
															+import rich_click as click
														
 
															 from rich import print
														
 
															-from typing import Optional, List, IO, Union, Iterable
														
 
															-from pathlib import Path
														
 
															+from archivebox.config.version import VERSION
														
 
															-from importlib import import_module
														
 
															-BUILTIN_LIST = list
														
 
															-
														
 
															-CLI_DIR = Path(__file__).resolve().parent
														
 
															-
														
 
															-# rewrite setup -> install for backwards compatibility
														
 
															-if len(sys.argv) > 1 and sys.argv[1] == 'setup':
														
 
															-    from rich import print
														
 
															-    print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
														
 
															-    sys.argv[1] = 'install'
														
 
															 if '--debug' in sys.argv:
														
 
															     os.environ['DEBUG'] = 'True'
														
 
															     sys.argv.remove('--debug')
														
 
															-# def list_subcommands() -> Dict[str, str]:
														
 
															-#     """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
														
 
															-#     COMMANDS = []
														
 
															-#     for filename in os.listdir(CLI_DIR):
														
 
															-#         if is_cli_module(filename):
														
 
															-#             subcommand = filename.replace('archivebox_', '').replace('.py', '')
														
 
															-#             module = import_module('.archivebox_{}'.format(subcommand), __package__)
														
 
															-#             assert is_valid_cli_module(module, subcommand)
														
 
															-#             COMMANDS.append((subcommand, module.main.__doc__))
														
 
															-#             globals()[subcommand] = module.main
														
 
															-#     display_order = lambda cmd: (
														
 
															-#         display_first.index(cmd[0])
														
 
															-#         if cmd[0] in display_first else
														
 
															-#         100 + len(cmd[0])
														
 
															-#     )
														
 
															-#     return dict(sorted(COMMANDS, key=display_order))
														
 
															-
														
 
															-# just define it statically, it's much faster:
														
 
															-SUBCOMMAND_MODULES = {
														
 
															-    'help': 'archivebox_help',
														
 
															-    'version': 'archivebox_version' ,
														
 
															-    
														
 
															-    'init': 'archivebox_init',
														
 
															-    'install': 'archivebox_install',
														
 
															-    ##############################################
														
 
															-    'config': 'archivebox_config',
														
 
															-    'add': 'archivebox_add',
														
 
															-    'remove': 'archivebox_remove',
														
 
															-    'update': 'archivebox_update',
														
 
															-    'list': 'archivebox_list',
														
 
															-    'status': 'archivebox_status',
														
 
															-    
														
 
															-    'schedule': 'archivebox_schedule',
														
 
															-    'server': 'archivebox_server',
														
 
															-    'shell': 'archivebox_shell',
														
 
															-    'manage': 'archivebox_manage',
														
 
															-
														
 
															-    # 'oneshot': 'archivebox_oneshot',
														
 
															-}
														
 
															-
														
 
															-# every imported command module must have these properties in order to be valid
														
 
															-required_attrs = ('__package__', '__command__', 'main')
														
 
															-
														
 
															-# basic checks to make sure imported files are valid subcommands
														
 
															-is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
														
 
															-is_valid_cli_module = lambda module, subcommand: (
														
 
															-    all(hasattr(module, attr) for attr in required_attrs)
														
 
															-    and module.__command__.split(' ')[-1] == subcommand
														
 
															-)
														
 
															-
														
 
															-class LazySubcommands(Mapping):
														
 
															-    def keys(self):
														
 
															-        return SUBCOMMAND_MODULES.keys()
														
 
															-    
														
 
															-    def values(self):
														
 
															-        return [self[key] for key in self.keys()]
														
 
															-    
														
 
															-    def items(self):
														
 
															-        return [(key, self[key]) for key in self.keys()]
														
 
															-    
														
 
															-    def __getitem__(self, key):
														
 
															-        module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
														
 
															-        assert is_valid_cli_module(module, key)
														
 
															-        return module.main
														
 
															-    
														
 
															-    def __iter__(self):
														
 
															-        return iter(SUBCOMMAND_MODULES.keys())
														
 
															-    
														
 
															-    def __len__(self):
														
 
															-        return len(SUBCOMMAND_MODULES)
														
 
															-
														
 
															-CLI_SUBCOMMANDS = LazySubcommands()
														
 
															-
														
 
															-
														
 
															-# these common commands will appear sorted before any others for ease-of-use
														
 
															-meta_cmds = ('help', 'version')                               # dont require valid data folder at all
														
 
															-setup_cmds = ('init', 'setup', 'install')                      # require valid data folder, but dont require DB present in it yet
														
 
															-archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage')  # require valid data folder + existing db present
														
 
															-fake_db = ("oneshot",)                                        # use fake in-memory db
														
 
															-
														
 
															-display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
														
 
															-
														
 
															-
														
 
															-IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler')  # threads we dont have to wait for before exiting
														
 
															-
														
 
															-
														
 
															-def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
														
 
															-    """
														
 
															-    Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
														
 
															-    Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
														
 
															-    """
														
 
															-
														
 
															-    wait_for_all: bool = thread_names == ()
														
 
															-
														
 
															-    thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
														
 
															-
														
 
															-    should_wait = lambda thread: (
														
 
															-        not thread_matches(thread, ignore_names)
														
 
															-        and (wait_for_all or thread_matches(thread, thread_names)))
														
 
															-
														
 
															-    for tries in range(timeout):
														
 
															-        all_threads = [*threading.enumerate()]
														
 
															-        blocking_threads = [*filter(should_wait, all_threads)]
														
 
															-        threads_summary = ', '.join(repr(t) for t in blocking_threads)
														
 
															-        if blocking_threads:
														
 
															-            sleep(1)
														
 
															-            if tries == 5:                            # only show stderr message if we need to wait more than 5s
														
 
															-                print(
														
 
															-                    f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
														
 
															-                    threads_summary,
														
 
															-                    file=sys.stderr,
														
 
															-                )
														
 
															-        else:
														
 
															-            return tries
														
 
															-
														
 
															-    raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
														
 
															-
														
 
															-
														
 
															-
														
 
															-def run_subcommand(subcommand: str,
														
 
															-                   subcommand_args: List[str] | None = None,
														
 
															-                   stdin: Optional[IO]=None,
														
 
															-                   pwd: Union[Path, str, None]=None) -> None:
														
 
															-    """Run a given ArchiveBox subcommand with the given list of args"""
														
 
															-
														
 
															-    subcommand_args = subcommand_args or []
														
 
															-
														
 
															-    from archivebox.misc.checks import check_migrations
														
 
															-    from archivebox.config.django import setup_django
														
 
															+class ArchiveBoxGroup(click.Group):
														
 
															+    """lazy loading click group for archivebox commands"""
														
 
															+    meta_commands = {
														
 
															+        'help': 'archivebox.cli.archivebox_help.main',
														
 
															+        'version': 'archivebox.cli.archivebox_version.main',
														
 
															+    }
														
 
															+    setup_commands = {
														
 
															+        'init': 'archivebox.cli.archivebox_init.main',
														
 
															+        'install': 'archivebox.cli.archivebox_install.main',
														
 
															+    }
														
 
															+    archive_commands = {
														
 
															+        'add': 'archivebox.cli.archivebox_add.main',
														
 
															+        'remove': 'archivebox.cli.archivebox_remove.main',
														
 
															+        'update': 'archivebox.cli.archivebox_update.main',
														
 
															+        'search': 'archivebox.cli.archivebox_search.main',
														
 
															+        'status': 'archivebox.cli.archivebox_status.main',
														
 
															+        'config': 'archivebox.cli.archivebox_config.main',
														
 
															+        'schedule': 'archivebox.cli.archivebox_schedule.main',
														
 
															+        'server': 'archivebox.cli.archivebox_server.main',
														
 
															+        'shell': 'archivebox.cli.archivebox_shell.main',
														
 
															+        'manage': 'archivebox.cli.archivebox_manage.main',
														
 
															+    }
														
 
															+    all_subcommands = {
														
 
															+        **meta_commands,
														
 
															+        **setup_commands,
														
 
															+        **archive_commands,
														
 
															+    }
														
 
															+    renamed_commands = {
														
 
															+        'setup': 'install',
														
 
															+        'list': 'search',
														
 
															+        'import': 'add',
														
 
															+        'archive': 'add',
														
 
															+        'export': 'search',
														
 
															+    }
														
 
															-    # print('DATA_DIR is', DATA_DIR)
														
 
															-    # print('pwd is', os.getcwd())    
														
 
															-
														
 
															-    cmd_requires_db = (subcommand in archive_cmds)
														
 
															-    init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
														
 
															-
														
 
															-    check_db = cmd_requires_db and not init_pending
														
 
															-
														
 
															-    setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
														
 
															-    for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
														
 
															-        if ignore_pattern in sys.argv[:4]:
														
 
															-            cmd_requires_db = False
														
 
															-            break
														
 
															+    def get_command(self, ctx, cmd_name):
														
 
															+        # handle renamed commands
														
 
															+        if cmd_name in self.renamed_commands:
														
 
															+            new_name = self.renamed_commands[cmd_name]
														
 
															+            print(f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`')
														
 
															+            cmd_name = new_name
														
 
															+            ctx.invoked_subcommand = cmd_name
														
 
															+        
														
 
															+        # handle lazy loading of commands
														
 
															+        if cmd_name in self.all_subcommands:
														
 
															+            return self._lazy_load(cmd_name)
														
 
															+        
														
 
															+        # fall-back to using click's default command lookup
														
 
															+        return super().get_command(ctx, cmd_name)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def _lazy_load(cls, cmd_name):
														
 
															+        import_path = cls.all_subcommands[cmd_name]
														
 
															+        modname, funcname = import_path.rsplit('.', 1)
														
 
															+        
														
 
															+        # print(f'LAZY LOADING {import_path}')
														
 
															+        mod = import_module(modname)
														
 
															+        func = getattr(mod, funcname)
														
 
															+        
														
 
															+        if not hasattr(func, '__doc__'):
														
 
															+            raise ValueError(f'lazy loading of {import_path} failed - no docstring found on method')
														
 
															+        
														
 
															+        # if not isinstance(cmd, click.BaseCommand):
														
 
															+            # raise ValueError(f'lazy loading of {import_path} failed - not a click command')
														
 
															+            
														
 
															+        return func
														
 
															+
														
 
															+
														
 
															[email protected](cls=ArchiveBoxGroup, invoke_without_command=True)
														
 
															[email protected]('--help', '-h', is_flag=True, help='Show help')
														
 
															[email protected]_option(version=VERSION, package_name='archivebox', message='%(version)s')
														
 
															[email protected]_context
														
 
															+def cli(ctx, help=False):
														
 
															+    """ArchiveBox: The self-hosted internet archive"""
														
 
															-    if subcommand in archive_cmds:
														
 
															-        if cmd_requires_db:
														
 
															-            check_migrations()
														
 
															-
														
 
															-    module = import_module('.archivebox_{}'.format(subcommand), __package__)
														
 
															-    module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
														
 
															-
														
 
															-    # wait for webhooks, signals, and other background jobs to finish before exit
														
 
															-    wait_for_bg_threads_to_exit(timeout=60)
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-class NotProvided:
														
 
															-    def __len__(self):
														
 
															-        return 0
														
 
															-    def __bool__(self):
														
 
															-        return False
														
 
															-    def __repr__(self):
														
 
															-        return '<not provided>'
														
 
															-
														
 
															-Omitted = Union[None, NotProvided]
														
 
															-
														
 
															-OMITTED = NotProvided()
														
 
															-
														
 
															-
														
 
															-def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
														
 
															-    # print('STARTING CLI MAIN ENTRYPOINT')
														
 
															+    if help or ctx.invoked_subcommand is None:
														
 
															+        ctx.invoke(ctx.command.get_command(ctx, 'help'))
														
 
															-    args = sys.argv[1:] if args is OMITTED else args
														
 
															-    stdin = sys.stdin if stdin is OMITTED else stdin
														
 
															-
														
 
															-    parser = argparse.ArgumentParser(
														
 
															-        prog=__command__,
														
 
															-        description='ArchiveBox: The self-hosted internet archive',
														
 
															-        add_help=False,
														
 
															-    )
														
 
															-    group = parser.add_mutually_exclusive_group()
														
 
															-    group.add_argument(
														
 
															-        '--help', '-h',
														
 
															-        action='store_true',
														
 
															-        help=CLI_SUBCOMMANDS['help'].__doc__,
														
 
															-    )
														
 
															-    group.add_argument(
														
 
															-        '--version',
														
 
															-        action='store_true',
														
 
															-        help=CLI_SUBCOMMANDS['version'].__doc__,
														
 
															-    )
														
 
															-    group.add_argument(
														
 
															-        "subcommand",
														
 
															-        type=str,
														
 
															-        help= "The name of the subcommand to run",
														
 
															-        nargs='?',
														
 
															-        choices=CLI_SUBCOMMANDS.keys(),
														
 
															-        default=None,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "subcommand_args",
														
 
															-        help="Arguments for the subcommand",
														
 
															-        nargs=argparse.REMAINDER,
														
 
															-    )
														
 
															-    command = parser.parse_args(args or ())
														
 
															-
														
 
															-    if command.version:
														
 
															-        command.subcommand = 'version'
														
 
															-    elif command.help or command.subcommand is None:
														
 
															-        command.subcommand = 'help'
														
 
															-
														
 
															-    if command.subcommand not in ('version',):
														
 
															-        from archivebox.misc.logging_util import log_cli_command
														
 
															-
														
 
															-        log_cli_command(
														
 
															-            subcommand=command.subcommand,
														
 
															-            subcommand_args=command.subcommand_args,
														
 
															-            stdin=stdin or None,
														
 
															-        )
														
 
															+    if ctx.invoked_subcommand in ArchiveBoxGroup.archive_commands:
														
 
															+        # print('SETUP DJANGO AND CHECK DATA FOLDER')
														
 
															+        from archivebox.config.django import setup_django
														
 
															+        from archivebox.misc.checks import check_data_folder
														
 
															+        setup_django()
														
 
															+        check_data_folder()
														
 
															+
														
 
															+def main(args=None, prog_name=None):
														
 
															+    # show `docker run archivebox xyz` in help messages if running in docker
														
 
															+    IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
														
 
															+    prog_name = prog_name or ('docker compose run archivebox' if IN_DOCKER else 'archivebox')
														
 
															     try:
														
 
															-        run_subcommand(
														
 
															-            subcommand=command.subcommand,
														
 
															-            subcommand_args=command.subcommand_args,
														
 
															-            stdin=stdin or None,
														
 
															-        )
														
 
															+        cli(args=args, prog_name=prog_name)
														
 
															     except KeyboardInterrupt:
														
 
															         print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    main()
														
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -4,10 +4,10 @@ __package__ = 'archivebox.cli'
 
															 __command__ = 'archivebox add'
														
 
															 import sys
														
 
															-import argparse
														
 
															-from typing import IO, TYPE_CHECKING
														
 
															+from typing import TYPE_CHECKING
														
 
															+import rich_click as click
														
 
															 from django.utils import timezone
														
 
															 from django.db.models import QuerySet
														
@@ -18,7 +18,6 @@ from archivebox.config.common import ARCHIVING_CONFIG
 
															 from archivebox.config.django import setup_django
														
 
															 from archivebox.config.permissions import USER, HOSTNAME
														
 
															 from archivebox.misc.checks import check_data_folder
														
 
															-from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
														
 
															 from archivebox.parsers import PARSERS
														
@@ -29,22 +28,142 @@ if TYPE_CHECKING:
 
															 ORCHESTRATOR = None
														
 
															+# OLD VERSION:
														
 
															+# def add(urls: Union[str, List[str]],
														
 
															+#         tag: str='',
														
 
															+#         depth: int=0,
														
 
															+#         update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
														
 
															+#         update_all: bool=False,
														
 
															+#         index_only: bool=False,
														
 
															+#         overwrite: bool=False,
														
 
															+#         # duplicate: bool=False,  # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
														
 
															+#         init: bool=False,
														
 
															+#         extractors: str="",
														
 
															+#         parser: str="auto",
														
 
															+#         created_by_id: int | None=None,
														
 
															+#         out_dir: Path=DATA_DIR) -> List[Link]:
														
 
															+#     """Add a new URL or list of URLs to your archive"""
														
 
															+
														
 
															+#     from core.models import Snapshot, Tag
														
 
															+#     # from workers.supervisord_util import start_cli_workers, tail_worker_logs
														
 
															+#     # from workers.tasks import bg_archive_link
														
 
															+    
														
 
															+
														
 
															+#     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
														
 
															+
														
 
															+#     extractors = extractors.split(",") if extractors else []
														
 
															+
														
 
															+#     if init:
														
 
															+#         run_subcommand('init', stdin=None, pwd=out_dir)
														
 
															+
														
 
															+#     # Load list of links from the existing index
														
 
															+#     check_data_folder()
														
 
															+
														
 
															+#     # worker = start_cli_workers()
														
 
															+    
														
 
															+#     new_links: List[Link] = []
														
 
															+#     all_links = load_main_index(out_dir=out_dir)
														
 
															+
														
 
															+#     log_importing_started(urls=urls, depth=depth, index_only=index_only)
														
 
															+#     if isinstance(urls, str):
														
 
															+#         # save verbatim stdin to sources
														
 
															+#         write_ahead_log = save_text_as_source(urls, filename='{ts}-import.txt', out_dir=out_dir)
														
 
															+#     elif isinstance(urls, list):
														
 
															+#         # save verbatim args to sources
														
 
															+#         write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
														
 
															+    
														
 
															+
														
 
															+#     new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
														
 
															+
														
 
															+#     # If we're going one level deeper, download each link and look for more links
														
 
															+#     new_links_depth = []
														
 
															+#     if new_links and depth == 1:
														
 
															+#         log_crawl_started(new_links)
														
 
															+#         for new_link in new_links:
														
 
															+#             try:
														
 
															+#                 downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
														
 
															+#                 new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
														
 
															+#             except Exception as err:
														
 
															+#                 stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
														
 
															+
														
 
															+#     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
														
 
															+    
														
 
															+#     new_links = dedupe_links(all_links, imported_links)
														
 
															+
														
 
															+#     write_main_index(links=new_links, out_dir=out_dir, created_by_id=created_by_id)
														
 
															+#     all_links = load_main_index(out_dir=out_dir)
														
 
															+
														
 
															+#     tags = [
														
 
															+#         Tag.objects.get_or_create(name=name.strip(), defaults={'created_by_id': created_by_id})[0]
														
 
															+#         for name in tag.split(',')
														
 
															+#         if name.strip()
														
 
															+#     ]
														
 
															+#     if tags:
														
 
															+#         for link in imported_links:
														
 
															+#             snapshot = Snapshot.objects.get(url=link.url)
														
 
															+#             snapshot.tags.add(*tags)
														
 
															+#             snapshot.tags_str(nocache=True)
														
 
															+#             snapshot.save()
														
 
															+#         # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
														
 
															+
														
 
															+#     if index_only:
														
 
															+#         # mock archive all the links using the fake index_only extractor method in order to update their state
														
 
															+#         if overwrite:
														
 
															+#             archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
														
 
															+#         else:
														
 
															+#             archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
														
 
															+#     else:
														
 
															+#         # fully run the archive extractor methods for each link
														
 
															+#         archive_kwargs = {
														
 
															+#             "out_dir": out_dir,
														
 
															+#             "created_by_id": created_by_id,
														
 
															+#         }
														
 
															+#         if extractors:
														
 
															+#             archive_kwargs["methods"] = extractors
														
 
															+
														
 
															+#         stderr()
														
 
															+
														
 
															+#         ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
														
 
															+
														
 
															+#         if update:
														
 
															+#             stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
														
 
															+#             archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
														
 
															+#         elif update_all:
														
 
															+#             stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
														
 
															+#             archive_links(all_links, overwrite=overwrite, **archive_kwargs)
														
 
															+#         elif overwrite:
														
 
															+#             stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
														
 
															+#             archive_links(imported_links, overwrite=True, **archive_kwargs)
														
 
															+#         elif new_links:
														
 
															+#             stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
														
 
															+#             archive_links(new_links, overwrite=False, **archive_kwargs)
														
 
															+
														
 
															+#     # tail_worker_logs(worker['stdout_logfile'])
														
 
															+
														
 
															+#     # if CAN_UPGRADE:
														
 
															+#     #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
														
 
															+
														
 
															+#     return new_links
														
 
															+
														
 
															+
														
 
															+
														
 
															 def add(urls: str | list[str],
														
 
															-        tag: str='',
														
 
															         depth: int=0,
														
 
															-        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
														
 
															-        update_all: bool=False,
														
 
															-        index_only: bool=False,
														
 
															-        overwrite: bool=False,
														
 
															-        extractors: str="",
														
 
															+        tag: str='',
														
 
															         parser: str="auto",
														
 
															+        extract: str="",
														
 
															         persona: str='Default',
														
 
															+        overwrite: bool=False,
														
 
															+        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
														
 
															+        index_only: bool=False,
														
 
															         bg: bool=False,
														
 
															         created_by_id: int | None=None) -> QuerySet['Snapshot']:
														
 
															     """Add a new URL or list of URLs to your archive"""
														
 
															     global ORCHESTRATOR
														
 
															+    depth = int(depth)
														
 
															+
														
 
															     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
														
 
															     # 0. setup abx, django, check_data_folder
														
@@ -56,7 +175,6 @@ def add(urls: str | list[str],
 
															     from archivebox.base_models.models import get_or_create_system_user_pk
														
 
															-    
														
 
															     created_by_id = created_by_id or get_or_create_system_user_pk()
														
 
															     # 1. save the provided urls to sources/2024-11-05__23-59-59__cli_add.txt
														
@@ -72,7 +190,7 @@ def add(urls: str | list[str],
 
															         'ONLY_NEW': not update,
														
 
															         'INDEX_ONLY': index_only,
														
 
															         'OVERWRITE': overwrite,
														
 
															-        'EXTRACTORS': extractors,
														
 
															+        'EXTRACTORS': extract,
														
 
															         'DEFAULT_PERSONA': persona or 'Default',
														
 
															     })
														
 
															     # 3. create a new Crawl pointing to the Seed
														
@@ -91,118 +209,23 @@ def add(urls: str | list[str],
 
															     return crawl.snapshot_set.all()
														
 
															-def main(args: list[str] | None=None, stdin: IO | None=None, pwd: str | None=None) -> None:
														
 
															[email protected]()
														
 
															[email protected]('--depth', '-d', type=click.Choice(('0', '1')), default='0', help='Recursively archive linked pages up to N hops away')
														
 
															[email protected]('--tag', '-t', default='', help='Comma-separated list of tags to add to each snapshot e.g. tag1,tag2,tag3')
														
 
															[email protected]('--parser', type=click.Choice(['auto', *PARSERS.keys()]), default='auto', help='Parser for reading input URLs')
														
 
															[email protected]('--extract', '-e', default='', help='Comma-separated list of extractors to use e.g. title,favicon,screenshot,singlefile,...')
														
 
															[email protected]('--persona', default='Default', help='Authentication profile to use when archiving')
														
 
															[email protected]('--overwrite', '-F', is_flag=True, help='Overwrite existing data if URLs have been archived previously')
														
 
															[email protected]('--update', is_flag=True, default=ARCHIVING_CONFIG.ONLY_NEW, help='Retry any previously skipped/failed URLs when re-adding them')
														
 
															[email protected]('--index-only', is_flag=True, help='Just add the URLs to the index without archiving them now')
														
 
															+# @click.option('--update-all', is_flag=True, help='Update ALL links in index when finished adding new ones')
														
 
															[email protected]('--bg', is_flag=True, help='Run crawl in background worker instead of immediately')
														
 
															[email protected]('urls', nargs=-1, type=click.Path())
														
 
															+def main(**kwargs):
														
 
															     """Add a new URL or list of URLs to your archive"""
														
 
															-    parser = argparse.ArgumentParser(
														
 
															-        prog=__command__,
														
 
															-        description=add.__doc__,
														
 
															-        add_help=True,
														
 
															-        formatter_class=SmartFormatter,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--tag', '-t',
														
 
															-        type=str,
														
 
															-        default='',
														
 
															-        help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--update', #'-u',
														
 
															-        action='store_true',
														
 
															-        default=not ARCHIVING_CONFIG.ONLY_NEW,  # when ONLY_NEW=True we skip updating old links
														
 
															-        help="Also retry previously skipped/failed links when adding new links",
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--update-all', #'-n',
														
 
															-        action='store_true',
														
 
															-        default=False, 
														
 
															-        help="Also update ALL links in index when finished adding new links",
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--index-only', #'-o',
														
 
															-        action='store_true',
														
 
															-        help="Add the links to the main index without archiving them",
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        'urls',
														
 
															-        nargs='*',
														
 
															-        type=str,
														
 
															-        default=None,
														
 
															-        help=(
														
 
															-            'URLs or paths to archive e.g.:\n'
														
 
															-            '    https://getpocket.com/users/USERNAME/feed/all\n'
														
 
															-            '    https://example.com/some/rss/feed.xml\n'
														
 
															-            '    https://example.com\n'
														
 
															-            '    ~/Downloads/firefox_bookmarks_export.html\n'
														
 
															-            '    ~/Desktop/sites_list.csv\n'
														
 
															-        )
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--depth",
														
 
															-        action="store",
														
 
															-        default=0,
														
 
															-        choices=[0, 1],
														
 
															-        type=int,
														
 
															-        help="Recursively archive all linked pages up to this many hops away"
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--overwrite",
														
 
															-        default=False,
														
 
															-        action="store_true",
														
 
															-        help="Re-archive URLs from scratch, overwriting any existing files"
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--extract", '-e',
														
 
															-        type=str,
														
 
															-        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
														
 
															-              This does not take precedence over the configuration",
														
 
															-        default=""
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--parser",
														
 
															-        type=str,
														
 
															-        help="Parser used to read inputted URLs.",
														
 
															-        default="auto",
														
 
															-        choices=["auto", *PARSERS.keys()],
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--persona",
														
 
															-        type=str,
														
 
															-        help="Name of accounts persona to use when archiving.",
														
 
															-        default="Default",
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--bg",
														
 
															-        default=False,
														
 
															-        action="store_true",
														
 
															-        help="Enqueue a background worker to complete the crawl instead of running it immediately",
														
 
															-    )
														
 
															-    command = parser.parse_args(args or ())
														
 
															-    urls = command.urls
														
 
															-
														
 
															-    stdin_urls = ''
														
 
															-    if not urls:
														
 
															-        stdin_urls = accept_stdin(stdin)
														
 
															-
														
 
															-    if (stdin_urls and urls) or (not stdin and not urls):
														
 
															-        stderr(
														
 
															-            '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
														
 
															-            color='red',
														
 
															-        )
														
 
															-        raise SystemExit(2)
														
 
															-    add(
														
 
															-        urls=stdin_urls or urls,
														
 
															-        depth=command.depth,
														
 
															-        tag=command.tag,
														
 
															-        update=command.update,
														
 
															-        update_all=command.update_all,
														
 
															-        index_only=command.index_only,
														
 
															-        overwrite=command.overwrite,
														
 
															-        extractors=command.extract,
														
 
															-        parser=command.parser,
														
 
															-        persona=command.persona,
														
 
															-        bg=command.bg,
														
 
															-    )
														
 
															+    
														
 
															+    add(**kwargs)
														
 
															 if __name__ == '__main__':
														
 
															-    main(args=sys.argv[1:], stdin=sys.stdin)
														
 
															+    main()
														
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -12,7 +12,130 @@ from typing import Optional, List, IO
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
														
 
															-from ..main import config
														
 
															+
														
 
															+
														
 
															+
														
 
															+# @enforce_types
														
 
															+def config(config_options_str: Optional[str]=None,
														
 
															+           config_options: Optional[List[str]]=None,
														
 
															+           get: bool=False,
														
 
															+           set: bool=False,
														
 
															+           search: bool=False,
														
 
															+           reset: bool=False,
														
 
															+           out_dir: Path=DATA_DIR) -> None:
														
 
															+    """Get and set your ArchiveBox project configuration values"""
														
 
															+
														
 
															+    from rich import print
														
 
															+
														
 
															+    check_data_folder()
														
 
															+    if config_options and config_options_str:
														
 
															+        stderr(
														
 
															+            '[X] You should either pass config values as an arguments '
														
 
															+            'or via stdin, but not both.\n',
														
 
															+            color='red',
														
 
															+        )
														
 
															+        raise SystemExit(2)
														
 
															+    elif config_options_str:
														
 
															+        config_options = config_options_str.split('\n')
														
 
															+
														
 
															+    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
														
 
															+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
														
 
															+    
														
 
															+    config_options = config_options or []
														
 
															+
														
 
															+    no_args = not (get or set or reset or config_options)
														
 
															+
														
 
															+    matching_config = {}
														
 
															+    if search:
														
 
															+        if config_options:
														
 
															+            config_options = [get_real_name(key) for key in config_options]
														
 
															+            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
														
 
															+            for config_section in CONFIGS.values():
														
 
															+                aliases = config_section.aliases
														
 
															+                
														
 
															+                for search_key in config_options:
														
 
															+                    # search all aliases in the section
														
 
															+                    for alias_key, key in aliases.items():
														
 
															+                        if search_key.lower() in alias_key.lower():
														
 
															+                            matching_config[key] = config_section.model_dump()[key]
														
 
															+                    
														
 
															+                    # search all keys and values in the section
														
 
															+                    for existing_key, value in config_section.model_dump().items():
														
 
															+                        if search_key.lower() in existing_key.lower() or search_key.lower() in str(value).lower():
														
 
															+                            matching_config[existing_key] = value
														
 
															+            
														
 
															+        print(printable_config(matching_config))
														
 
															+        raise SystemExit(not matching_config)
														
 
															+    elif get or no_args:
														
 
															+        if config_options:
														
 
															+            config_options = [get_real_name(key) for key in config_options]
														
 
															+            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
														
 
															+            failed_config = [key for key in config_options if key not in FLAT_CONFIG]
														
 
															+            if failed_config:
														
 
															+                stderr()
														
 
															+                stderr('[X] These options failed to get', color='red')
														
 
															+                stderr('    {}'.format('\n    '.join(config_options)))
														
 
															+                raise SystemExit(1)
														
 
															+        else:
														
 
															+            matching_config = FLAT_CONFIG
														
 
															+        
														
 
															+        print(printable_config(matching_config))
														
 
															+        raise SystemExit(not matching_config)
														
 
															+    elif set:
														
 
															+        new_config = {}
														
 
															+        failed_options = []
														
 
															+        for line in config_options:
														
 
															+            if line.startswith('#') or not line.strip():
														
 
															+                continue
														
 
															+            if '=' not in line:
														
 
															+                stderr('[X] Config KEY=VALUE must have an = sign in it', color='red')
														
 
															+                stderr(f'    {line}')
														
 
															+                raise SystemExit(2)
														
 
															+
														
 
															+            raw_key, val = line.split('=', 1)
														
 
															+            raw_key = raw_key.upper().strip()
														
 
															+            key = get_real_name(raw_key)
														
 
															+            if key != raw_key:
														
 
															+                stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
														
 
															+
														
 
															+            if key in FLAT_CONFIG:
														
 
															+                new_config[key] = val.strip()
														
 
															+            else:
														
 
															+                failed_options.append(line)
														
 
															+
														
 
															+        if new_config:
														
 
															+            before = FLAT_CONFIG
														
 
															+            matching_config = write_config_file(new_config)
														
 
															+            after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
														
 
															+            print(printable_config(matching_config))
														
 
															+
														
 
															+            side_effect_changes = {}
														
 
															+            for key, val in after.items():
														
 
															+                if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
														
 
															+                    side_effect_changes[key] = after[key]
														
 
															+                    # import ipdb; ipdb.set_trace()
														
 
															+
														
 
															+            if side_effect_changes:
														
 
															+                stderr()
														
 
															+                stderr('[i] Note: This change also affected these other options that depended on it:', color='lightyellow')
														
 
															+                print('    {}'.format(printable_config(side_effect_changes, prefix='    ')))
														
 
															+        if failed_options:
														
 
															+            stderr()
														
 
															+            stderr('[X] These options failed to set (check for typos):', color='red')
														
 
															+            stderr('    {}'.format('\n    '.join(failed_options)))
														
 
															+            raise SystemExit(1)
														
 
															+    elif reset:
														
 
															+        stderr('[X] This command is not implemented yet.', color='red')
														
 
															+        stderr('    Please manually remove the relevant lines from your config file:')
														
 
															+        raise SystemExit(2)
														
 
															+    else:
														
 
															+        stderr('[X] You must pass either --get or --set, or no arguments to get the whole config.', color='red')
														
 
															+        stderr('    archivebox config')
														
 
															+        stderr('    archivebox config --get SOME_KEY')
														
 
															+        stderr('    archivebox config --set SOME_KEY=SOME_VALUE')
														
 
															+        raise SystemExit(2)
														
 
															+
														
 
															+
														
 
															 @docstring(config.__doc__)
														
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@@ -1,32 +1,105 @@
 
															 #!/usr/bin/env python3
														
 
															-
														
 
															 __package__ = 'archivebox.cli'
														
 
															 __command__ = 'archivebox help'
														
 
															-import sys
														
 
															-import argparse
														
 
															+import os    
														
 
															 from pathlib import Path
														
 
															-from typing import Optional, List, IO
														
 
															-from archivebox.misc.util import docstring
														
 
															-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from archivebox.config import DATA_DIR
														
 
															-from ..main import help
														
 
															+import click
														
 
															+from rich import print
														
 
															+from rich.panel import Panel
														
 
															+
														
 
															+def help() -> None:
														
 
															+    """Print the ArchiveBox help message and usage"""
														
 
															-@docstring(help.__doc__)
														
 
															-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
 
															-    parser = argparse.ArgumentParser(
														
 
															-        prog=__command__,
														
 
															-        description=help.__doc__,
														
 
															-        add_help=True,
														
 
															-        formatter_class=SmartFormatter,
														
 
															+    from archivebox.cli import ArchiveBoxGroup
														
 
															+    from archivebox.config import CONSTANTS
														
 
															+    from archivebox.config.permissions import IN_DOCKER
														
 
															+    from archivebox.misc.logging_util import log_cli_command
														
 
															+    
														
 
															+    log_cli_command('help', [], None, '.')
														
 
															+    
														
 
															+    COMMANDS_HELP_TEXT = '\n    '.join(
														
 
															+        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
														
 
															+        for cmd in ArchiveBoxGroup.meta_commands.keys()
														
 
															+    ) + '\n\n    ' + '\n    '.join(
														
 
															+        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
														
 
															+        for cmd in ArchiveBoxGroup.setup_commands.keys()
														
 
															+    ) + '\n\n    ' + '\n    '.join(
														
 
															+        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
														
 
															+        for cmd in ArchiveBoxGroup.archive_commands.keys()
														
 
															     )
														
 
															-    parser.parse_args(args or ())
														
 
															-    reject_stdin(__command__, stdin)
														
 
															-    help(out_dir=Path(pwd) if pwd else DATA_DIR)
														
 
															+    DOCKER_USAGE = '''
														
 
															+[dodger_blue3]Docker Usage:[/dodger_blue3]
														
 
															+    [grey53]# using Docker Compose:[/grey53]
														
 
															+    [blue]docker compose run[/blue] [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
														
 
															+
														
 
															+    [grey53]# using Docker:[/grey53]
														
 
															+    [blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
														
 
															+''' if IN_DOCKER else ''
														
 
															+    DOCKER_DOCS = '\n    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
														
 
															+    DOCKER_OUTSIDE_HINT = "\n    [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
														
 
															+    DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
														
 
															+
														
 
															+    print(f'''{DOCKER_USAGE}
														
 
															+[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
														
 
															+    [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
														
 
															+
														
 
															+[deep_sky_blue4]Commands:[/deep_sky_blue4]
														
 
															+    {COMMANDS_HELP_TEXT}
														
 
															+
														
 
															+[deep_sky_blue4]Documentation:[/deep_sky_blue4]
														
 
															+    [link=https://github.com/ArchiveBox/ArchiveBox/wiki]https://github.com/ArchiveBox/ArchiveBox/wiki[/link]{DOCKER_DOCS}
														
 
															+    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage[/link]
														
 
															+    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration[/link]
														
 
															+''')
														
 
															+    
														
 
															+    
														
 
															+    if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and CONSTANTS.ARCHIVE_DIR.is_dir():
														
 
															+        pretty_out_dir = str(CONSTANTS.DATA_DIR).replace(str(Path('~').expanduser()), '~')
														
 
															+        EXAMPLE_USAGE = f'''
														
 
															+[light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]
														
 
															+
														
 
															+[violet]Hint:[/violet] [i]Common maintenance tasks:[/i]
														
 
															+    [dark_green]archivebox[/dark_green] [green]init[/green]      [grey53]# make sure database is up-to-date (safe to run multiple times)[/grey53]
														
 
															+    [dark_green]archivebox[/dark_green] [green]install[/green]   [grey53]# make sure plugins are up-to-date (wget, chrome, singlefile, etc.)[/grey53]
														
 
															+    [dark_green]archivebox[/dark_green] [green]status[/green]    [grey53]# get a health checkup report on your collection[/grey53]
														
 
															+    [dark_green]archivebox[/dark_green] [green]update[/green]    [grey53]# retry any previously failed or interrupted archiving tasks[/grey53]
														
 
															+
														
 
															+[violet]Hint:[/violet] [i]More example usage:[/i]
														
 
															+    [dark_green]archivebox[/dark_green] [green]add[/green] --depth=1 "https://example.com/some/page"
														
 
															+    [dark_green]archivebox[/dark_green] [green]list[/green] --sort=timestamp --csv=timestamp,downloaded_at,url,title
														
 
															+    [dark_green]archivebox[/dark_green] [green]schedule[/green] --every=day --depth=1 "https://example.com/some/feed.rss"
														
 
															+    [dark_green]archivebox[/dark_green] [green]server[/green] [blue]0.0.0.0:8000[/blue]                [grey53]# Start the Web UI / API server[/grey53]
														
 
															+'''
														
 
															+        print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
														
 
															+    else:
														
 
															+        DATA_SETUP_HELP = '\n'
														
 
															+        if IN_DOCKER:
														
 
															+            DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
														
 
															+            DATA_SETUP_HELP += '    docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
														
 
															+        DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
														
 
															+        DATA_SETUP_HELP += '    1. [green]cd[/green] ~/archivebox/data     [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += f'    2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ...get help with next steps... [/grey53]\n\n'
														
 
															+        DATA_SETUP_HELP += 'To start a [sea_green1]new[/sea_green1] collection:\n'
														
 
															+        DATA_SETUP_HELP += '    1. [green]mkdir[/green] ~/archivebox/data  [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += '    2. [green]cd[/green] ~/archivebox/data     [grey53]# cd into the new directory[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
														
 
															+        DATA_SETUP_HELP += f'    5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ... get help with next steps... [/grey53]\n'
														
 
															+        print(Panel(DATA_SETUP_HELP, expand=False, border_style='grey53', title='[red]:cross_mark: No collection is currently active[/red]', subtitle='All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
														
 
															+
														
 
															+
														
 
															[email protected]()
														
 
															[email protected]('--help', '-h', is_flag=True, help='Show help')
														
 
															+def main(**kwargs):
														
 
															+    """Print the ArchiveBox help message and usage"""
														
 
															+    return help()
														
 
															 if __name__ == '__main__':
														
 
															-    main(args=sys.argv[1:], stdin=sys.stdin)
														
 
															+    main()
														
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -5,13 +5,193 @@ __command__ = 'archivebox init'
 
															 import sys
														
 
															 import argparse
														
 
															-
														
 
															+from pathlib import Path
														
 
															 from typing import Optional, List, IO
														
 
															+
														
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from ..main import init
														
 
															+
														
 
															+
														
 
															+def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Path=DATA_DIR) -> None:
														
 
															+    """Initialize a new ArchiveBox collection in the current directory"""
														
 
															+    
														
 
															+    from core.models import Snapshot
														
 
															+    from rich import print
														
 
															+    
														
 
															+    # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
														
 
															+    #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
														
 
															+    #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
														
 
															+
														
 
															+    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
														
 
															+    existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
														
 
															+    if is_empty and not existing_index:
														
 
															+        print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
														
 
															+        print('[green]----------------------------------------------------------------------[/green]')
														
 
															+    elif existing_index:
														
 
															+        # TODO: properly detect and print the existing version in current index as well
														
 
															+        print(f'[green][*] Verifying and updating existing ArchiveBox collection to v{VERSION}...[/green]')
														
 
															+        print('[green]----------------------------------------------------------------------[/green]')
														
 
															+    else:
														
 
															+        if force:
														
 
															+            print('[red][!] This folder appears to already have files in it, but no index.sqlite3 is present.[/red]')
														
 
															+            print('[red]    Because --force was passed, ArchiveBox will initialize anyway (which may overwrite existing files).[/red]')
														
 
															+        else:
														
 
															+            print(
														
 
															+                ("[red][X] This folder appears to already have files in it, but no index.sqlite3 present.[/red]\n\n"
														
 
															+                "    You must run init in a completely empty directory, or an existing data folder.\n\n"
														
 
															+                "    [violet]Hint:[/violet] To import an existing data folder make sure to cd into the folder first, \n"
														
 
															+                "    then run and run 'archivebox init' to pick up where you left off.\n\n"
														
 
															+                "    (Always make sure your data folder is backed up first before updating ArchiveBox)"
														
 
															+                )
														
 
															+            )
														
 
															+            raise SystemExit(2)
														
 
															+
														
 
															+    if existing_index:
														
 
															+        print('\n[green][*] Verifying archive folder structure...[/green]')
														
 
															+    else:
														
 
															+        print('\n[green][+] Building archive folder structure...[/green]')
														
 
															+    
														
 
															+    print(f'    + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
														
 
															+    Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
														
 
															+    Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
														
 
															+    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
														
 
															+    
														
 
															+    print(f'    + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
														
 
															+    
														
 
															+    # create the .archivebox_id file with a unique ID for this collection
														
 
															+    from archivebox.config.paths import _get_collection_id
														
 
															+    _get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
														
 
															+    
														
 
															+    # create the ArchiveBox.conf file
														
 
															+    write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
														
 
															+
														
 
															+
														
 
															+    if os.access(CONSTANTS.DATABASE_FILE, os.F_OK):
														
 
															+        print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]')
														
 
															+    else:
														
 
															+        print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
														
 
															+    
														
 
															+    for migration_line in apply_migrations(out_dir):
														
 
															+        sys.stdout.write(f'    {migration_line}\n')
														
 
															+
														
 
															+    assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
														
 
															+    print()
														
 
															+    print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
														
 
															+    
														
 
															+    # from django.contrib.auth.models import User
														
 
															+    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
														
 
															+    #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+    #     call_command("createsuperuser", interactive=True)
														
 
															+
														
 
															+    print()
														
 
															+    print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
														
 
															+
														
 
															+    all_links = Snapshot.objects.none()
														
 
															+    pending_links: Dict[str, Link] = {}
														
 
															+
														
 
															+    if existing_index:
														
 
															+        all_links = load_main_index(out_dir=out_dir, warn=False)
														
 
															+        print(f'    √ Loaded {all_links.count()} links from existing main index.')
														
 
															+
														
 
															+    if quick:
														
 
															+        print('    > Skipping full snapshot directory check (quick mode)')
														
 
															+    else:
														
 
															+        try:
														
 
															+            # Links in data folders that dont match their timestamp
														
 
															+            fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
														
 
															+            if fixed:
														
 
															+                print(f'    [yellow]√ Fixed {len(fixed)} data directory locations that didn\'t match their link timestamps.[/yellow]')
														
 
															+            if cant_fix:
														
 
															+                print(f'    [red]! Could not fix {len(cant_fix)} data directory locations due to conflicts with existing folders.[/red]')
														
 
															+
														
 
															+            # Links in JSON index but not in main index
														
 
															+            orphaned_json_links = {
														
 
															+                link.url: link
														
 
															+                for link in parse_json_main_index(out_dir)
														
 
															+                if not all_links.filter(url=link.url).exists()
														
 
															+            }
														
 
															+            if orphaned_json_links:
														
 
															+                pending_links.update(orphaned_json_links)
														
 
															+                print(f'    [yellow]√ Added {len(orphaned_json_links)} orphaned links from existing JSON index...[/yellow]')
														
 
															+
														
 
															+            # Links in data dir indexes but not in main index
														
 
															+            orphaned_data_dir_links = {
														
 
															+                link.url: link
														
 
															+                for link in parse_json_links_details(out_dir)
														
 
															+                if not all_links.filter(url=link.url).exists()
														
 
															+            }
														
 
															+            if orphaned_data_dir_links:
														
 
															+                pending_links.update(orphaned_data_dir_links)
														
 
															+                print(f'    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
														
 
															+
														
 
															+            # Links in invalid/duplicate data dirs
														
 
															+            invalid_folders = {
														
 
															+                folder: link
														
 
															+                for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
														
 
															+            }
														
 
															+            if invalid_folders:
														
 
															+                print(f'    [red]! Skipped adding {len(invalid_folders)} invalid link data directories.[/red]')
														
 
															+                print('        X ' + '\n        X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items()))
														
 
															+                print()
														
 
															+                print('    [violet]Hint:[/violet] For more information about the link data directories that were skipped, run:')
														
 
															+                print('        archivebox status')
														
 
															+                print('        archivebox list --status=invalid')
														
 
															+
														
 
															+        except (KeyboardInterrupt, SystemExit):
														
 
															+            print(file=sys.stderr)
														
 
															+            print('[yellow]:stop_sign: Stopped checking archive directories due to Ctrl-C/SIGTERM[/yellow]', file=sys.stderr)
														
 
															+            print('    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.', file=sys.stderr)
														
 
															+            print(file=sys.stderr)
														
 
															+            print('    [violet]Hint:[/violet] In the future you can run a quick init without checking dirs like so:', file=sys.stderr)
														
 
															+            print('        archivebox init --quick', file=sys.stderr)
														
 
															+            raise SystemExit(1)
														
 
															+        
														
 
															+        write_main_index(list(pending_links.values()), out_dir=out_dir)
														
 
															+
														
 
															+    print('\n[green]----------------------------------------------------------------------[/green]')
														
 
															+
														
 
															+    from django.contrib.auth.models import User
														
 
															+
														
 
															+    if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
														
 
															+        print('[green][+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.[/green]')
														
 
															+        User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)
														
 
															+
														
 
															+    if existing_index:
														
 
															+        print('[green][√] Done. Verified and updated the existing ArchiveBox collection.[/green]')
														
 
															+    else:
														
 
															+        print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
														
 
															+
														
 
															+    json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
														
 
															+    html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
														
 
															+    index_name = f"{date.today()}_index_old"
														
 
															+    if os.access(json_index, os.F_OK):
														
 
															+        json_index.rename(f"{index_name}.json")
														
 
															+    if os.access(html_index, os.F_OK):
														
 
															+        html_index.rename(f"{index_name}.html")
														
 
															+    
														
 
															+    CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
														
 
															+    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
														
 
															+    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
														
 
															+    
														
 
															+    from archivebox.config.common import STORAGE_CONFIG
														
 
															+    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
														
 
															+    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
														
 
															+    
														
 
															+    if install:
														
 
															+        run_subcommand('install', pwd=out_dir)
														
 
															+
														
 
															+    if Snapshot.objects.count() < 25:     # hide the hints for experienced users
														
 
															+        print()
														
 
															+        print('    [violet]Hint:[/violet] To view your archive index, run:')
														
 
															+        print('        archivebox server  # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link][/deep_sky_blue4]')
														
 
															+        print()
														
 
															+        print('    To add new links, you can run:')
														
 
															+        print("        archivebox add < ~/some/path/to/list_of_links.txt")
														
 
															+        print()
														
 
															+        print('    For more usage and examples, run:')
														
 
															+        print('        archivebox help')
														
 
															 @docstring(init.__doc__)
														
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@@ -3,6 +3,7 @@
 
															 __package__ = 'archivebox.cli'
														
 
															 __command__ = 'archivebox install'
														
 
															+import os
														
 
															 import sys
														
 
															 import argparse
														
 
															 from pathlib import Path
														
@@ -11,11 +12,145 @@ from typing import Optional, List, IO
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from ..main import install
														
 
															+
														
 
															+
														
 
															+def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
														
 
															+    """Automatically install all ArchiveBox dependencies and extras"""
														
 
															+    
														
 
															+    # if running as root:
														
 
															+    #    - run init to create index + lib dir
														
 
															+    #    - chown -R 911 DATA_DIR
														
 
															+    #    - install all binaries as root
														
 
															+    #    - chown -R 911 LIB_DIR
														
 
															+    # else:
														
 
															+    #    - run init to create index + lib dir as current user
														
 
															+    #    - install all binaries as current user
														
 
															+    #    - recommend user re-run with sudo if any deps need to be installed as root
														
 
															+
														
 
															+    from rich import print
														
 
															+    
														
 
															+    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
														
 
															+    from archivebox.config.paths import get_or_create_working_lib_dir
														
 
															+
														
 
															+    if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
														
 
															+        run_subcommand('init', stdin=None, pwd=out_dir)  # must init full index because we need a db to store InstalledBinary entries in
														
 
															+
														
 
															+    print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
														
 
															+    
														
 
															+    # we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
														
 
															+    if IS_ROOT:
														
 
															+        EUID = os.geteuid()
														
 
															+        
														
 
															+        # if we have sudo/root permissions, take advantage of them just while installing dependencies
														
 
															+        print()
														
 
															+        print(f'[yellow]:warning:  Running as UID=[blue]{EUID}[/blue] with [red]sudo[/red] only for dependencies that need it.[/yellow]')
														
 
															+        print(f'    DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
														
 
															+        print()
														
 
															+    
														
 
															+    LIB_DIR = get_or_create_working_lib_dir()
														
 
															+    
														
 
															+    package_manager_names = ', '.join(
														
 
															+        f'[yellow]{binprovider.name}[/yellow]'
														
 
															+        for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
														
 
															+        if not binproviders or (binproviders and binprovider.name in binproviders)
														
 
															+    )
														
 
															+    print(f'[+] Setting up package managers {package_manager_names}...')
														
 
															+    for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
														
 
															+        if binproviders and binprovider.name not in binproviders:
														
 
															+            continue
														
 
															+        try:
														
 
															+            binprovider.setup()
														
 
															+        except Exception:
														
 
															+            # it's ok, installing binaries below will automatically set up package managers as needed
														
 
															+            # e.g. if user does not have npm available we cannot set it up here yet, but once npm Binary is installed
														
 
															+            # the next package that depends on npm will automatically call binprovider.setup() during its own install
														
 
															+            pass
														
 
															+    
														
 
															+    print()
														
 
															+    
														
 
															+    for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
														
 
															+        if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
														
 
															+            # obviously must already be installed if we are running
														
 
															+            continue
														
 
															+        
														
 
															+        if binaries and binary.name not in binaries:
														
 
															+            continue
														
 
															+        
														
 
															+        providers = ' [grey53]or[/grey53] '.join(
														
 
															+            provider.name for provider in binary.binproviders_supported
														
 
															+            if not binproviders or (binproviders and provider.name in binproviders)
														
 
															+        )
														
 
															+        if not providers:
														
 
															+            continue
														
 
															+        print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...')
														
 
															+        try:
														
 
															+            with SudoPermission(uid=0, fallback=True):
														
 
															+                # print(binary.load_or_install(fresh=True).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}))
														
 
															+                if binproviders:
														
 
															+                    providers_supported_by_binary = [provider.name for provider in binary.binproviders_supported]
														
 
															+                    for binprovider_name in binproviders:
														
 
															+                        if binprovider_name not in providers_supported_by_binary:
														
 
															+                            continue
														
 
															+                        try:
														
 
															+                            if dry_run:
														
 
															+                                # always show install commands when doing a dry run
														
 
															+                                sys.stderr.write("\033[2;49;90m")  # grey53
														
 
															+                                result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															+                                sys.stderr.write("\033[00m\n")     # reset
														
 
															+                            else:
														
 
															+                                loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
														
 
															+                                result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															+                            if result and result['loaded_version']:
														
 
															+                                break
														
 
															+                        except Exception as e:
														
 
															+                            print(f'[red]:cross_mark: Failed to install {binary.name} as using {binprovider_name} as user {ARCHIVEBOX_USER}: {e}[/red]')
														
 
															+                else:
														
 
															+                    if dry_run:
														
 
															+                        sys.stderr.write("\033[2;49;90m")  # grey53
														
 
															+                        binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															+                        sys.stderr.write("\033[00m\n")  # reset
														
 
															+                    else:
														
 
															+                        loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
														
 
															+                        result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															+            if IS_ROOT and LIB_DIR:
														
 
															+                with SudoPermission(uid=0):
														
 
															+                    if ARCHIVEBOX_USER == 0:
														
 
															+                        os.system(f'chmod -R 777 "{LIB_DIR.resolve()}"')
														
 
															+                    else:    
														
 
															+                        os.system(f'chown -R {ARCHIVEBOX_USER} "{LIB_DIR.resolve()}"')
														
 
															+        except Exception as e:
														
 
															+            print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
														
 
															+            if binaries and len(binaries) == 1:
														
 
															+                # if we are only installing a single binary, raise the exception so the user can see what went wrong
														
 
															+                raise
														
 
															+                
														
 
															+
														
 
															+    from django.contrib.auth import get_user_model
														
 
															+    User = get_user_model()
														
 
															+
														
 
															+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
														
 
															+        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
														
 
															+        stderr('    archivebox manage createsuperuser')
														
 
															+        # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
														
 
															+    
														
 
															+    print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
														
 
															+    
														
 
															+    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
														
 
															+    
														
 
															+    extra_args = []
														
 
															+    if binproviders:
														
 
															+        extra_args.append(f'--binproviders={",".join(binproviders)}')
														
 
															+    if binaries:
														
 
															+        extra_args.append(f'--binaries={",".join(binaries)}')
														
 
															+    
														
 
															+    proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
														
 
															+    raise SystemExit(proc.returncode)
														
 
															+
														
 
															 @docstring(install.__doc__)
														
 
															 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
 
															+    
														
 
															     parser = argparse.ArgumentParser(
														
 
															         prog=__command__,
														
 
															         description=install.__doc__,
														
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -1,139 +0,0 @@
 
															-#!/usr/bin/env python3
														
 
															-
														
 
															-__package__ = 'archivebox.cli'
														
 
															-__command__ = 'archivebox list'
														
 
															-
														
 
															-import sys
														
 
															-import argparse
														
 
															-from pathlib import Path
														
 
															-from typing import Optional, List, IO
														
 
															-
														
 
															-from archivebox.config import DATA_DIR
														
 
															-from archivebox.misc.util import docstring
														
 
															-from archivebox.misc.logging_util import SmartFormatter, reject_stdin, stderr
														
 
															-from ..main import list_all
														
 
															-from ..index import (
														
 
															-    LINK_FILTERS,
														
 
															-    get_indexed_folders,
														
 
															-    get_archived_folders,
														
 
															-    get_unarchived_folders,
														
 
															-    get_present_folders,
														
 
															-    get_valid_folders,
														
 
															-    get_invalid_folders,
														
 
															-    get_duplicate_folders,
														
 
															-    get_orphaned_folders,
														
 
															-    get_corrupted_folders,
														
 
															-    get_unrecognized_folders,
														
 
															-)
														
 
															-
														
 
															-
														
 
															-@docstring(list_all.__doc__)
														
 
															-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
 
															-    parser = argparse.ArgumentParser(
														
 
															-        prog=__command__,
														
 
															-        description=list_all.__doc__,
														
 
															-        add_help=True,
														
 
															-        formatter_class=SmartFormatter,
														
 
															-    )
														
 
															-    group = parser.add_mutually_exclusive_group()
														
 
															-    group.add_argument(
														
 
															-        '--csv', #'-c',
														
 
															-        type=str,
														
 
															-        help="Print the output in CSV format with the given columns, e.g.: timestamp,url,extension",
														
 
															-        default=None,
														
 
															-    )
														
 
															-    group.add_argument(
														
 
															-        '--json', #'-j',
														
 
															-        action='store_true',
														
 
															-        help="Print the output in JSON format with all columns included",
														
 
															-    )
														
 
															-    group.add_argument(
														
 
															-        '--html',
														
 
															-        action='store_true',
														
 
															-        help="Print the output in HTML format"
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--with-headers',
														
 
															-        action='store_true',
														
 
															-        help='Include the headers in the output document' 
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--sort', #'-s',
														
 
															-        type=str,
														
 
															-        help="List the links sorted using the given key, e.g. timestamp or updated",
														
 
															-        default=None,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--before', #'-b',
														
 
															-        type=float,
														
 
															-        help="List only links bookmarked before (less than) the given timestamp",
														
 
															-        default=None,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--after', #'-a',
														
 
															-        type=float,
														
 
															-        help="List only links bookmarked after (greater than or equal to) the given timestamp",
														
 
															-        default=None,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--status',
														
 
															-        type=str,
														
 
															-        choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),
														
 
															-        default='indexed',
														
 
															-        help=(
														
 
															-            'List only links or data directories that have the given status\n'
														
 
															-            f'    indexed       {get_indexed_folders.__doc__} (the default)\n'
														
 
															-            f'    archived      {get_archived_folders.__doc__}\n'
														
 
															-            f'    unarchived    {get_unarchived_folders.__doc__}\n'
														
 
															-            '\n'
														
 
															-            f'    present       {get_present_folders.__doc__}\n'
														
 
															-            f'    valid         {get_valid_folders.__doc__}\n'
														
 
															-            f'    invalid       {get_invalid_folders.__doc__}\n'
														
 
															-            '\n'
														
 
															-            f'    duplicate     {get_duplicate_folders.__doc__}\n'
														
 
															-            f'    orphaned      {get_orphaned_folders.__doc__}\n'
														
 
															-            f'    corrupted     {get_corrupted_folders.__doc__}\n'
														
 
															-            f'    unrecognized  {get_unrecognized_folders.__doc__}\n'
														
 
															-        )
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--filter-type', '-t',
														
 
															-        type=str,
														
 
															-        choices=(*LINK_FILTERS.keys(), 'search'),
														
 
															-        default='exact',
														
 
															-        help='Type of pattern matching to use when filtering URLs',
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        'filter_patterns',
														
 
															-        nargs='*',
														
 
															-        type=str,
														
 
															-        default=None,
														
 
															-        help='List only URLs matching these filter patterns'
														
 
															-    )
														
 
															-    command = parser.parse_args(args or ())
														
 
															-    reject_stdin(stdin)
														
 
															-
														
 
															-    if command.with_headers and not (command.json or command.html or command.csv):
														
 
															-        stderr(
														
 
															-            '[X] --with-headers can only be used with --json, --html or --csv options\n',
														
 
															-            color='red',
														
 
															-        )
														
 
															-        raise SystemExit(2)
														
 
															-
														
 
															-    matching_folders = list_all(
														
 
															-        filter_patterns=command.filter_patterns,
														
 
															-        filter_type=command.filter_type,
														
 
															-        status=command.status,
														
 
															-        after=command.after,
														
 
															-        before=command.before,
														
 
															-        sort=command.sort,
														
 
															-        csv=command.csv,
														
 
															-        json=command.json,
														
 
															-        html=command.html,
														
 
															-        with_headers=command.with_headers,
														
 
															-        out_dir=Path(pwd) if pwd else DATA_DIR,
														
 
															-    )
														
 
															-    raise SystemExit(not matching_folders)
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    main(args=sys.argv[1:], stdin=sys.stdin)
														
--- a/archivebox/cli/archivebox_manage.py
+++ b/archivebox/cli/archivebox_manage.py
@@ -9,7 +9,27 @@ from typing import Optional, List, IO
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															-from ..main import manage
														
 
															+
														
 
															+
														
 
															+
														
 
															+# @enforce_types
														
 
															+def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
														
 
															+    """Run an ArchiveBox Django management command"""
														
 
															+
														
 
															+    check_data_folder()
														
 
															+    from django.core.management import execute_from_command_line
														
 
															+
														
 
															+    if (args and "createsuperuser" in args) and (IN_DOCKER and not SHELL_CONFIG.IS_TTY):
														
 
															+        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
														
 
															+        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
														
 
															+        stderr('')
														
 
															+        
														
 
															+    # import ipdb; ipdb.set_trace()
														
 
															+
														
 
															+    execute_from_command_line(['manage.py', *(args or ['help'])])
														
 
															+
														
 
															+
														
 
															+
														
 
															 @docstring(manage.__doc__)
														
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@@ -1,73 +1,98 @@
 
															-#!/usr/bin/env python3
														
 
															-
														
 
															-__package__ = 'archivebox.cli'
														
 
															-__command__ = 'archivebox oneshot'
														
 
															-
														
 
															-import sys
														
 
															-import argparse
														
 
															-
														
 
															-from pathlib import Path
														
 
															-from typing import List, Optional, IO
														
 
															-
														
 
															-from archivebox.misc.util import docstring
														
 
															-from archivebox.config import DATA_DIR
														
 
															-from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
														
 
															-from ..main import oneshot
														
 
															-
														
 
															-
														
 
															-@docstring(oneshot.__doc__)
														
 
															-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
 
															-    parser = argparse.ArgumentParser(
														
 
															-        prog=__command__,
														
 
															-        description=oneshot.__doc__,
														
 
															-        add_help=True,
														
 
															-        formatter_class=SmartFormatter,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        'url',
														
 
															-        type=str,
														
 
															-        default=None,
														
 
															-        help=(
														
 
															-            'URLs or paths to archive e.g.:\n'
														
 
															-            '    https://getpocket.com/users/USERNAME/feed/all\n'
														
 
															-            '    https://example.com/some/rss/feed.xml\n'
														
 
															-            '    https://example.com\n'
														
 
															-            '    ~/Downloads/firefox_bookmarks_export.html\n'
														
 
															-            '    ~/Desktop/sites_list.csv\n'
														
 
															-        )
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        "--extract",
														
 
															-        type=str,
														
 
															-        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
														
 
															-              This does not take precedence over the configuration",
														
 
															-        default=""
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--out-dir',
														
 
															-        type=str,
														
 
															-        default=DATA_DIR,
														
 
															-        help= "Path to save the single archive folder to, e.g. ./example.com_archive"
														
 
															-    )
														
 
															-    command = parser.parse_args(args or ())
														
 
															-    stdin_url = None
														
 
															-    url = command.url
														
 
															-    if not url:
														
 
															-        stdin_url = accept_stdin(stdin)
														
 
															-
														
 
															-    if (stdin_url and url) or (not stdin and not url):
														
 
															-        stderr(
														
 
															-            '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
														
 
															-            color='red',
														
 
															-        )
														
 
															-        raise SystemExit(2)
														
 
															+# #!/usr/bin/env python3
														
 
															+
														
 
															+################## DEPRECATED IN FAVOR OF abx-dl #####################
														
 
															+# https://github.com/ArchiveBox/abx-dl
														
 
															+
														
 
															+# __package__ = 'archivebox.cli'
														
 
															+# __command__ = 'archivebox oneshot'
														
 
															+
														
 
															+# import sys
														
 
															+# import argparse
														
 
															+
														
 
															+# from pathlib import Path
														
 
															+# from typing import List, Optional, IO
														
 
															+
														
 
															+# from archivebox.misc.util import docstring
														
 
															+# from archivebox.config import DATA_DIR
														
 
															+# from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
														
 
															+
														
 
															+
														
 
															+# @enforce_types
														
 
															+# def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
														
 
															+#     """
														
 
															+#     Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
														
 
															+#     You can run this to archive single pages without needing to create a whole collection with archivebox init.
														
 
															+#     """
														
 
															+#     oneshot_link, _ = parse_links_memory([url])
														
 
															+#     if len(oneshot_link) > 1:
														
 
															+#         stderr(
														
 
															+#                 '[X] You should pass a single url to the oneshot command',
														
 
															+#                 color='red'
														
 
															+#             )
														
 
															+#         raise SystemExit(2)
														
 
															+
														
 
															+#     methods = extractors.split(",") if extractors else ignore_methods(['title'])
														
 
															+#     archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
														
 
															+#     return oneshot_link
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# @docstring(oneshot.__doc__)
														
 
															+# def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
 
															+#     parser = argparse.ArgumentParser(
														
 
															+#         prog=__command__,
														
 
															+#         description=oneshot.__doc__,
														
 
															+#         add_help=True,
														
 
															+#         formatter_class=SmartFormatter,
														
 
															+#     )
														
 
															+#     parser.add_argument(
														
 
															+#         'url',
														
 
															+#         type=str,
														
 
															+#         default=None,
														
 
															+#         help=(
														
 
															+#             'URLs or paths to archive e.g.:\n'
														
 
															+#             '    https://getpocket.com/users/USERNAME/feed/all\n'
														
 
															+#             '    https://example.com/some/rss/feed.xml\n'
														
 
															+#             '    https://example.com\n'
														
 
															+#             '    ~/Downloads/firefox_bookmarks_export.html\n'
														
 
															+#             '    ~/Desktop/sites_list.csv\n'
														
 
															+#         )
														
 
															+#     )
														
 
															+#     parser.add_argument(
														
 
															+#         "--extract",
														
 
															+#         type=str,
														
 
															+#         help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
														
 
															+#               This does not take precedence over the configuration",
														
 
															+#         default=""
														
 
															+#     )
														
 
															+#     parser.add_argument(
														
 
															+#         '--out-dir',
														
 
															+#         type=str,
														
 
															+#         default=DATA_DIR,
														
 
															+#         help= "Path to save the single archive folder to, e.g. ./example.com_archive"
														
 
															+#     )
														
 
															+#     command = parser.parse_args(args or ())
														
 
															+#     stdin_url = None
														
 
															+#     url = command.url
														
 
															+#     if not url:
														
 
															+#         stdin_url = accept_stdin(stdin)
														
 
															+
														
 
															+#     if (stdin_url and url) or (not stdin and not url):
														
 
															+#         stderr(
														
 
															+#             '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
														
 
															+#             color='red',
														
 
															+#         )
														
 
															+#         raise SystemExit(2)
														
 
															-    oneshot(
														
 
															-        url=stdin_url or url,
														
 
															-        out_dir=Path(command.out_dir).resolve(),
														
 
															-        extractors=command.extract,
														
 
															-    )
														
 
															+#     oneshot(
														
 
															+#         url=stdin_url or url,
														
 
															+#         out_dir=Path(command.out_dir).resolve(),
														
 
															+#         extractors=command.extract,
														
 
															+#     )
														
 
															-if __name__ == '__main__':
														
 
															-    main(args=sys.argv[1:], stdin=sys.stdin)
														
 
															+# if __name__ == '__main__':
														
 
															+#     main(args=sys.argv[1:], stdin=sys.stdin)
														
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -8,10 +8,93 @@ import argparse
 
															 from pathlib import Path
														
 
															 from typing import Optional, List, IO
														
 
															+from django.db.models import QuerySet
														
 
															+
														
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
														
 
															-from ..main import remove
														
 
															+from archivebox.index.schema import Link
														
 
															+
														
 
															+
														
 
															+def remove(filter_str: Optional[str]=None,
														
 
															+           filter_patterns: Optional[list[str]]=None,
														
 
															+           filter_type: str='exact',
														
 
															+           snapshots: Optional[QuerySet]=None,
														
 
															+           after: Optional[float]=None,
														
 
															+           before: Optional[float]=None,
														
 
															+           yes: bool=False,
														
 
															+           delete: bool=False,
														
 
															+           out_dir: Path=DATA_DIR) -> list[Link]:
														
 
															+    """Remove the specified URLs from the archive"""
														
 
															+    
														
 
															+    check_data_folder()
														
 
															+
														
 
															+    if snapshots is None:
														
 
															+        if filter_str and filter_patterns:
														
 
															+            stderr(
														
 
															+                '[X] You should pass either a pattern as an argument, '
														
 
															+                'or pass a list of patterns via stdin, but not both.\n',
														
 
															+                color='red',
														
 
															+            )
														
 
															+            raise SystemExit(2)
														
 
															+        elif not (filter_str or filter_patterns):
														
 
															+            stderr(
														
 
															+                '[X] You should pass either a pattern as an argument, '
														
 
															+                'or pass a list of patterns via stdin.',
														
 
															+                color='red',
														
 
															+            )
														
 
															+            stderr()
														
 
															+            hint(('To remove all urls you can run:',
														
 
															+                'archivebox remove --filter-type=regex ".*"'))
														
 
															+            stderr()
														
 
															+            raise SystemExit(2)
														
 
															+        elif filter_str:
														
 
															+            filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
														
 
															+
														
 
															+    list_kwargs = {
														
 
															+        "filter_patterns": filter_patterns,
														
 
															+        "filter_type": filter_type,
														
 
															+        "after": after,
														
 
															+        "before": before,
														
 
															+    }
														
 
															+    if snapshots:
														
 
															+        list_kwargs["snapshots"] = snapshots
														
 
															+
														
 
															+    log_list_started(filter_patterns, filter_type)
														
 
															+    timer = TimedProgress(360, prefix='      ')
														
 
															+    try:
														
 
															+        snapshots = list_links(**list_kwargs)
														
 
															+    finally:
														
 
															+        timer.end()
														
 
															+
														
 
															+
														
 
															+    if not snapshots.exists():
														
 
															+        log_removal_finished(0, 0)
														
 
															+        raise SystemExit(1)
														
 
															+
														
 
															+
														
 
															+    log_links = [link.as_link() for link in snapshots]
														
 
															+    log_list_finished(log_links)
														
 
															+    log_removal_started(log_links, yes=yes, delete=delete)
														
 
															+
														
 
															+    timer = TimedProgress(360, prefix='      ')
														
 
															+    try:
														
 
															+        for snapshot in snapshots:
														
 
															+            if delete:
														
 
															+                shutil.rmtree(snapshot.as_link().link_dir, ignore_errors=True)
														
 
															+    finally:
														
 
															+        timer.end()
														
 
															+
														
 
															+    to_remove = snapshots.count()
														
 
															+
														
 
															+    from .search import flush_search_index
														
 
															+
														
 
															+    flush_search_index(snapshots=snapshots)
														
 
															+    remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
														
 
															+    all_snapshots = load_main_index(out_dir=out_dir)
														
 
															+    log_removal_finished(all_snapshots.count(), to_remove)
														
 
															+    
														
 
															+    return all_snapshots
														
 
															 @docstring(remove.__doc__)
														
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@@ -11,7 +11,139 @@ from typing import Optional, List, IO
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from ..main import schedule
														
 
															+from archivebox.config.common import ARCHIVING_CONFIG
														
 
															+
														
 
															+
														
 
															+# @enforce_types
														
 
															+def schedule(add: bool=False,
														
 
															+             show: bool=False,
														
 
															+             clear: bool=False,
														
 
															+             foreground: bool=False,
														
 
															+             run_all: bool=False,
														
 
															+             quiet: bool=False,
														
 
															+             every: Optional[str]=None,
														
 
															+             tag: str='',
														
 
															+             depth: int=0,
														
 
															+             overwrite: bool=False,
														
 
															+             update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
														
 
															+             import_path: Optional[str]=None,
														
 
															+             out_dir: Path=DATA_DIR):
														
 
															+    """Set ArchiveBox to regularly import URLs at specific times using cron"""
														
 
															+    
														
 
															+    check_data_folder()
														
 
															+    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
														
 
															+    from archivebox.config.permissions import USER
														
 
															+
														
 
															+    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
														
 
															+
														
 
															+    cron = CronTab(user=True)
														
 
															+    cron = dedupe_cron_jobs(cron)
														
 
															+
														
 
															+    if clear:
														
 
															+        print(cron.remove_all(comment=CRON_COMMENT))
														
 
															+        cron.write()
														
 
															+        raise SystemExit(0)
														
 
															+
														
 
															+    existing_jobs = list(cron.find_comment(CRON_COMMENT))
														
 
															+
														
 
															+    if every or add:
														
 
															+        every = every or 'day'
														
 
															+        quoted = lambda s: f'"{s}"' if (s and ' ' in str(s)) else str(s)
														
 
															+        cmd = [
														
 
															+            'cd',
														
 
															+            quoted(out_dir),
														
 
															+            '&&',
														
 
															+            quoted(ARCHIVEBOX_BINARY.load().abspath),
														
 
															+            *([
														
 
															+                'add',
														
 
															+                *(['--overwrite'] if overwrite else []),
														
 
															+                *(['--update'] if update else []),
														
 
															+                *([f'--tag={tag}'] if tag else []),
														
 
															+                f'--depth={depth}',
														
 
															+                f'"{import_path}"',
														
 
															+            ] if import_path else ['update']),
														
 
															+            '>>',
														
 
															+            quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
														
 
															+            '2>&1',
														
 
															+
														
 
															+        ]
														
 
															+        new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
														
 
															+
														
 
															+        if every in ('minute', 'hour', 'day', 'month', 'year'):
														
 
															+            set_every = getattr(new_job.every(), every)
														
 
															+            set_every()
														
 
															+        elif CronSlices.is_valid(every):
														
 
															+            new_job.setall(every)
														
 
															+        else:
														
 
															+            stderr('{red}[X] Got invalid timeperiod for cron task.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+            stderr('    It must be one of minute/hour/day/month')
														
 
															+            stderr('    or a quoted cron-format schedule like:')
														
 
															+            stderr('        archivebox init --every=day --depth=1 https://example.com/some/rss/feed.xml')
														
 
															+            stderr('        archivebox init --every="0/5 * * * *" --depth=1 https://example.com/some/rss/feed.xml')
														
 
															+            raise SystemExit(1)
														
 
															+
														
 
															+        cron = dedupe_cron_jobs(cron)
														
 
															+        cron.write()
														
 
															+
														
 
															+        total_runs = sum(j.frequency_per_year() for j in cron)
														
 
															+        existing_jobs = list(cron.find_comment(CRON_COMMENT))
														
 
															+
														
 
															+        print()
														
 
															+        print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
														
 
															+        print('\n'.join(f'  > {cmd}' if str(cmd) == str(new_job) else f'    {cmd}' for cmd in existing_jobs))
														
 
															+        if total_runs > 60 and not quiet:
														
 
															+            stderr()
														
 
															+            stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **SHELL_CONFIG.ANSI))
														
 
															+            stderr('    Congrats on being an enthusiastic internet archiver! 👌')
														
 
															+            stderr()
														
 
															+            stderr('    Make sure you have enough storage space available to hold all the data.')
														
 
															+            stderr('    Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
														
 
															+            stderr('')
														
 
															+    elif show:
														
 
															+        if existing_jobs:
														
 
															+            print('\n'.join(str(cmd) for cmd in existing_jobs))
														
 
															+        else:
														
 
															+            stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **SHELL_CONFIG.ANSI))
														
 
															+            stderr('    To schedule a new job, run:')
														
 
															+            stderr('        archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
														
 
															+        raise SystemExit(0)
														
 
															+
														
 
															+    cron = CronTab(user=True)
														
 
															+    cron = dedupe_cron_jobs(cron)
														
 
															+    existing_jobs = list(cron.find_comment(CRON_COMMENT))
														
 
															+
														
 
															+    if foreground or run_all:
														
 
															+        if not existing_jobs:
														
 
															+            stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+            stderr('    archivebox schedule --every=hour --depth=1 https://example.com/some/rss/feed.xml')
														
 
															+            raise SystemExit(1)
														
 
															+
														
 
															+        print('{green}[*] Running {} ArchiveBox jobs in foreground task scheduler...{reset}'.format(len(existing_jobs), **SHELL_CONFIG.ANSI))
														
 
															+        if run_all:
														
 
															+            try:
														
 
															+                for job in existing_jobs:
														
 
															+                    sys.stdout.write(f'  > {job.command.split("/archivebox ")[0].split(" && ")[0]}\n')
														
 
															+                    sys.stdout.write(f'    > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
														
 
															+                    sys.stdout.flush()
														
 
															+                    job.run()
														
 
															+                    sys.stdout.write(f'\r    √ {job.command.split("/archivebox ")[-1]}\n')
														
 
															+            except KeyboardInterrupt:
														
 
															+                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+                raise SystemExit(1)
														
 
															+
														
 
															+        if foreground:
														
 
															+            try:
														
 
															+                for job in existing_jobs:
														
 
															+                    print(f'  > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
														
 
															+                for result in cron.run_scheduler():
														
 
															+                    print(result)
														
 
															+            except KeyboardInterrupt:
														
 
															+                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+                raise SystemExit(1)
														
 
															+
														
 
															+    # if CAN_UPGRADE:
														
 
															+    #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
														
 
															+
														
 
															 @docstring(schedule.__doc__)
														
--- a/archivebox/cli/archivebox_search.py
+++ b/archivebox/cli/archivebox_search.py
@@ -0,0 +1,164 @@
 
															+#!/usr/bin/env python3
														
 
															+
														
 
															+__package__ = 'archivebox.cli'
														
 
															+__command__ = 'archivebox search'
														
 
															+
														
 
															+from pathlib import Path
														
 
															+from typing import Optional, List, Iterable
														
 
															+
														
 
															+import rich_click as click
														
 
															+from rich import print
														
 
															+
														
 
															+from django.db.models import QuerySet
														
 
															+
														
 
															+from archivebox.config import DATA_DIR
														
 
															+from archivebox.index import LINK_FILTERS
														
 
															+from archivebox.index.schema import Link
														
 
															+from archivebox.misc.logging import stderr
														
 
															+from archivebox.misc.util import enforce_types, docstring
														
 
															+
														
 
															+STATUS_CHOICES = [
														
 
															+    'indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid',
														
 
															+    'duplicate', 'orphaned', 'corrupted', 'unrecognized'
														
 
															+]
														
 
															+
														
 
															+
														
 
															+
														
 
															+def list_links(snapshots: Optional[QuerySet]=None,
														
 
															+               filter_patterns: Optional[List[str]]=None,
														
 
															+               filter_type: str='substring',
														
 
															+               after: Optional[float]=None,
														
 
															+               before: Optional[float]=None,
														
 
															+               out_dir: Path=DATA_DIR) -> Iterable[Link]:
														
 
															+    
														
 
															+    from archivebox.index import load_main_index
														
 
															+    from archivebox.index import snapshot_filter
														
 
															+
														
 
															+    if snapshots:
														
 
															+        all_snapshots = snapshots
														
 
															+    else:
														
 
															+        all_snapshots = load_main_index(out_dir=out_dir)
														
 
															+
														
 
															+    if after is not None:
														
 
															+        all_snapshots = all_snapshots.filter(timestamp__gte=after)
														
 
															+    if before is not None:
														
 
															+        all_snapshots = all_snapshots.filter(timestamp__lt=before)
														
 
															+    if filter_patterns:
														
 
															+        all_snapshots = snapshot_filter(all_snapshots, filter_patterns, filter_type)
														
 
															+
														
 
															+    if not all_snapshots:
														
 
															+        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
														
 
															+
														
 
															+    return all_snapshots
														
 
															+
														
 
															+
														
 
															+def list_folders(links: list[Link], status: str, out_dir: Path=DATA_DIR) -> dict[str, Link | None]:
														
 
															+    
														
 
															+    from archivebox.misc.checks import check_data_folder
														
 
															+    from archivebox.index import (
														
 
															+        get_indexed_folders,
														
 
															+        get_archived_folders,
														
 
															+        get_unarchived_folders,
														
 
															+        get_present_folders,
														
 
															+        get_valid_folders,
														
 
															+        get_invalid_folders,
														
 
															+        get_duplicate_folders,
														
 
															+        get_orphaned_folders,
														
 
															+        get_corrupted_folders,
														
 
															+        get_unrecognized_folders,
														
 
															+    )
														
 
															+    
														
 
															+    check_data_folder()
														
 
															+
														
 
															+    STATUS_FUNCTIONS = {
														
 
															+        "indexed": get_indexed_folders,
														
 
															+        "archived": get_archived_folders,
														
 
															+        "unarchived": get_unarchived_folders,
														
 
															+        "present": get_present_folders,
														
 
															+        "valid": get_valid_folders,
														
 
															+        "invalid": get_invalid_folders,
														
 
															+        "duplicate": get_duplicate_folders,
														
 
															+        "orphaned": get_orphaned_folders,
														
 
															+        "corrupted": get_corrupted_folders,
														
 
															+        "unrecognized": get_unrecognized_folders,
														
 
															+    }
														
 
															+
														
 
															+    try:
														
 
															+        return STATUS_FUNCTIONS[status](links, out_dir=out_dir)
														
 
															+    except KeyError:
														
 
															+        raise ValueError('Status not recognized.')
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+@enforce_types
														
 
															+def search(filter_patterns: list[str] | None=None,
														
 
															+           filter_type: str='substring',
														
 
															+           status: str='indexed',
														
 
															+           before: float | None=None,
														
 
															+           after: float | None=None,
														
 
															+           sort: str | None=None,
														
 
															+           json: bool=False,
														
 
															+           html: bool=False,
														
 
															+           csv: str | None=None,
														
 
															+           with_headers: bool=False):
														
 
															+    """List, filter, and export information about archive entries"""
														
 
															+    
														
 
															+
														
 
															+    if with_headers and not (json or html or csv):
														
 
															+        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
														
 
															+        raise SystemExit(2)
														
 
															+
														
 
															+    snapshots = list_links(
														
 
															+        filter_patterns=list(filter_patterns) if filter_patterns else None,
														
 
															+        filter_type=filter_type,
														
 
															+        before=before,
														
 
															+        after=after,
														
 
															+    )
														
 
															+
														
 
															+    if sort:
														
 
															+        snapshots = snapshots.order_by(sort)
														
 
															+
														
 
															+    folders = list_folders(
														
 
															+        links=snapshots,
														
 
															+        status=status,
														
 
															+        out_dir=DATA_DIR,
														
 
															+    )
														
 
															+
														
 
															+    if json:
														
 
															+        from archivebox.index.json import generate_json_index_from_links
														
 
															+        output = generate_json_index_from_links(folders.values(), with_headers)
														
 
															+    elif html:
														
 
															+        from archivebox.index.html import generate_index_from_links
														
 
															+        output = generate_index_from_links(folders.values(), with_headers) 
														
 
															+    elif csv:
														
 
															+        from archivebox.index.csv import links_to_csv
														
 
															+        output = links_to_csv(folders.values(), csv.split(','), with_headers)
														
 
															+    else:
														
 
															+        from archivebox.misc.logging_util import printable_folders
														
 
															+        output = printable_folders(folders, with_headers)
														
 
															+
														
 
															+    print(output)
														
 
															+    return output
														
 
															+
														
 
															+
														
 
															[email protected]()
														
 
															[email protected]('--filter-type', '-f', type=click.Choice(['search', *LINK_FILTERS.keys()]), default='substring', help='Pattern matching type for filtering URLs')
														
 
															[email protected]('--status', '-s', type=click.Choice(STATUS_CHOICES), default='indexed', help='List snapshots with the given status')
														
 
															[email protected]('--before', '-b', type=float, help='List snapshots bookmarked before the given UNIX timestamp')
														
 
															[email protected]('--after', '-a', type=float, help='List snapshots bookmarked after the given UNIX timestamp')
														
 
															[email protected]('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
														
 
															[email protected]('--json', '-J', is_flag=True, help='Print output in JSON format')
														
 
															[email protected]('--html', '-M', is_flag=True, help='Print output in HTML format (suitable for viewing statically without a server)')
														
 
															[email protected]('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: created_at,url,title')
														
 
															[email protected]('--with-headers', '-H', is_flag=True, help='Include extra CSV/HTML headers in the output')
														
 
															[email protected]_option('--help', '-h')
														
 
															[email protected]('filter_patterns', nargs=-1)
														
 
															+@docstring(search.__doc__)
														
 
															+def main(**kwargs):
														
 
															+    return search(**kwargs)
														
 
															+
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    main()
														
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -12,7 +12,81 @@ from archivebox.misc.util import docstring
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.config.common import SERVER_CONFIG
														
 
															 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from ..main import server
														
 
															+
														
 
															+
														
 
															+
														
 
															+# @enforce_types
														
 
															+def server(runserver_args: Optional[List[str]]=None,
														
 
															+           reload: bool=False,
														
 
															+           debug: bool=False,
														
 
															+           init: bool=False,
														
 
															+           quick_init: bool=False,
														
 
															+           createsuperuser: bool=False,
														
 
															+           daemonize: bool=False,
														
 
															+           out_dir: Path=DATA_DIR) -> None:
														
 
															+    """Run the ArchiveBox HTTP server"""
														
 
															+
														
 
															+    from rich import print
														
 
															+
														
 
															+    runserver_args = runserver_args or []
														
 
															+    
														
 
															+    if init:
														
 
															+        run_subcommand('init', stdin=None, pwd=out_dir)
														
 
															+        print()
														
 
															+    elif quick_init:
														
 
															+        run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
														
 
															+        print()
														
 
															+
														
 
															+    if createsuperuser:
														
 
															+        run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
														
 
															+        print()
														
 
															+
														
 
															+
														
 
															+    check_data_folder()
														
 
															+
														
 
															+    from django.core.management import call_command
														
 
															+    from django.contrib.auth.models import User
														
 
															+    
														
 
															+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
														
 
															+        print()
														
 
															+        # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
														
 
															+        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
														
 
															+        print('      [green]archivebox manage createsuperuser[/green]')
														
 
															+        print()
														
 
															+    
														
 
															+
														
 
															+    host = '127.0.0.1'
														
 
															+    port = '8000'
														
 
															+    
														
 
															+    try:
														
 
															+        host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
														
 
															+        if ':' in host_and_port:
														
 
															+            host, port = host_and_port.split(':')
														
 
															+        else:
														
 
															+            if '.' in host_and_port:
														
 
															+                host = host_and_port
														
 
															+            else:
														
 
															+                port = host_and_port
														
 
															+    except IndexError:
														
 
															+        pass
														
 
															+
														
 
															+    print('[green][+] Starting ArchiveBox webserver...[/green]')
														
 
															+    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
														
 
															+    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
														
 
															+    print('    > Writing ArchiveBox error log to ./logs/errors.log')
														
 
															+
														
 
															+    if SHELL_CONFIG.DEBUG:
														
 
															+        if not reload:
														
 
															+            runserver_args.append('--noreload')  # '--insecure'
														
 
															+        call_command("runserver", *runserver_args)
														
 
															+    else:
														
 
															+        from workers.supervisord_util import start_server_workers
														
 
															+
														
 
															+        print()
														
 
															+        start_server_workers(host=host, port=port, daemonize=False)
														
 
															+        print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
														
 
															+
														
 
															+
														
 
															 @docstring(server.__doc__)
														
 
															 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@@ -11,7 +11,19 @@ from typing import Optional, List, IO
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from ..main import shell
														
 
															+
														
 
															+
														
 
															+
														
 
															+#@enforce_types
														
 
															+def shell(out_dir: Path=DATA_DIR) -> None:
														
 
															+    """Enter an interactive ArchiveBox Django shell"""
														
 
															+
														
 
															+    check_data_folder()
														
 
															+
														
 
															+    from django.core.management import call_command
														
 
															+    call_command("shell_plus")
														
 
															+
														
 
															+
														
 
															 @docstring(shell.__doc__)
														
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -8,10 +8,114 @@ import argparse
 
															 from pathlib import Path
														
 
															 from typing import Optional, List, IO
														
 
															+from rich import print
														
 
															+
														
 
															 from archivebox.misc.util import docstring
														
 
															 from archivebox.config import DATA_DIR
														
 
															 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-from ..main import status
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+# @enforce_types
														
 
															+def status(out_dir: Path=DATA_DIR) -> None:
														
 
															+    """Print out some info and statistics about the archive collection"""
														
 
															+
														
 
															+    check_data_folder()
														
 
															+
														
 
															+    from core.models import Snapshot
														
 
															+    from django.contrib.auth import get_user_model
														
 
															+    User = get_user_model()
														
 
															+
														
 
															+    print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {out_dir}/*', SHELL_CONFIG.ANSI['reset'])
														
 
															+    num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
														
 
															+    size = printable_filesize(num_bytes)
														
 
															+    print(f'    Index size: {size} across {num_files} files')
														
 
															+    print()
														
 
															+
														
 
															+    links = load_main_index(out_dir=out_dir)
														
 
															+    num_sql_links = links.count()
														
 
															+    num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
														
 
															+    print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
														
 
															+    print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
														
 
															+    print()
														
 
															+    print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset'])
														
 
															+    num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
														
 
															+    size = printable_filesize(num_bytes)
														
 
															+    print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
														
 
															+    print(SHELL_CONFIG.ANSI['black'])
														
 
															+    num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
														
 
															+    num_archived = len(get_archived_folders(links, out_dir=out_dir))
														
 
															+    num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
														
 
															+    print(f'    > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
														
 
															+    print(f'      > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
														
 
															+    print(f'      > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
														
 
															+    
														
 
															+    num_present = len(get_present_folders(links, out_dir=out_dir))
														
 
															+    num_valid = len(get_valid_folders(links, out_dir=out_dir))
														
 
															+    print()
														
 
															+    print(f'    > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
														
 
															+    print(f'      > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
														
 
															+    
														
 
															+    duplicate = get_duplicate_folders(links, out_dir=out_dir)
														
 
															+    orphaned = get_orphaned_folders(links, out_dir=out_dir)
														
 
															+    corrupted = get_corrupted_folders(links, out_dir=out_dir)
														
 
															+    unrecognized = get_unrecognized_folders(links, out_dir=out_dir)
														
 
															+    num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
														
 
															+    print(f'      > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
														
 
															+    print(f'        > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
														
 
															+    print(f'        > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
														
 
															+    print(f'        > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
														
 
															+    print(f'        > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
														
 
															+        
														
 
															+    print(SHELL_CONFIG.ANSI['reset'])
														
 
															+
														
 
															+    if num_indexed:
														
 
															+        print('    {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI))
														
 
															+        print('        archivebox list --status=<status>  (e.g. indexed, corrupted, archived, etc.)')
														
 
															+
														
 
															+    if orphaned:
														
 
															+        print('    {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI))
														
 
															+        print('        archivebox init')
														
 
															+
														
 
															+    if num_invalid:
														
 
															+        print('    {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI))
														
 
															+        print('        archivebox init')
														
 
															+    
														
 
															+    print()
														
 
															+    print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															+    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset'])
														
 
															+    users = get_admins().values_list('username', flat=True)
														
 
															+    print(f'    UI users {len(users)}: {", ".join(users)}')
														
 
															+    last_login = User.objects.order_by('last_login').last()
														
 
															+    if last_login:
														
 
															+        print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
														
 
															+    last_downloaded = Snapshot.objects.order_by('downloaded_at').last()
														
 
															+    if last_downloaded:
														
 
															+        print(f'    Last changes: {str(last_downloaded.downloaded_at)[:16]}')
														
 
															+
														
 
															+    if not users:
														
 
															+        print()
														
 
															+        print('    {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI))
														
 
															+        print('        archivebox manage createsuperuser')
														
 
															+
														
 
															+    print()
														
 
															+    for snapshot in links.order_by('-downloaded_at')[:10]:
														
 
															+        if not snapshot.downloaded_at:
														
 
															+            continue
														
 
															+        print(
														
 
															+            SHELL_CONFIG.ANSI['black'],
														
 
															+            (
														
 
															+                f'   > {str(snapshot.downloaded_at)[:16]} '
														
 
															+                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
														
 
															+                f'"{snapshot.title}": {snapshot.url}'
														
 
															+            )[:SHELL_CONFIG.TERM_WIDTH],
														
 
															+            SHELL_CONFIG.ANSI['reset'],
														
 
															+        )
														
 
															+    print(SHELL_CONFIG.ANSI['black'], '   ...', SHELL_CONFIG.ANSI['reset'])
														
 
															+
														
 
															 @docstring(status.__doc__)
														
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -24,7 +24,92 @@ from archivebox.index import (
 
															 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
														
 
															 # from ..main import update
														
 
															+
														
 
															+
														
 
															+
														
 
															+# LEGACY VERSION:
														
 
															+# @enforce_types
														
 
															+# def update(resume: Optional[float]=None,
														
 
															+#            only_new: bool=ARCHIVING_CONFIG.ONLY_NEW,
														
 
															+#            index_only: bool=False,
														
 
															+#            overwrite: bool=False,
														
 
															+#            filter_patterns_str: Optional[str]=None,
														
 
															+#            filter_patterns: Optional[List[str]]=None,
														
 
															+#            filter_type: Optional[str]=None,
														
 
															+#            status: Optional[str]=None,
														
 
															+#            after: Optional[str]=None,
														
 
															+#            before: Optional[str]=None,
														
 
															+#            extractors: str="",
														
 
															+#            out_dir: Path=DATA_DIR) -> List[Link]:
														
 
															+#     """Import any new links from subscriptions and retry any previously failed/skipped links"""
														
 
															+
														
 
															+#     from core.models import ArchiveResult
														
 
															+#     from .search import index_links
														
 
															+#     # from workers.supervisord_util import start_cli_workers
														
 
															+    
														
 
															+
														
 
															+#     check_data_folder()
														
 
															+#     # start_cli_workers()
														
 
															+#     new_links: List[Link] = [] # TODO: Remove input argument: only_new
														
 
															+
														
 
															+#     extractors = extractors.split(",") if extractors else []
														
 
															+
														
 
															+#     # Step 1: Filter for selected_links
														
 
															+#     print('[*] Finding matching Snapshots to update...')
														
 
															+#     print(f'    - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...')
														
 
															+#     matching_snapshots = list_links(
														
 
															+#         filter_patterns=filter_patterns,
														
 
															+#         filter_type=filter_type,
														
 
															+#         before=before,
														
 
															+#         after=after,
														
 
															+#     )
														
 
															+#     print(f'    - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...')
														
 
															+#     matching_folders = list_folders(
														
 
															+#         links=matching_snapshots,
														
 
															+#         status=status,
														
 
															+#         out_dir=out_dir,
														
 
															+#     )
														
 
															+#     all_links = (link for link in matching_folders.values() if link)
														
 
															+#     print('    - Sorting by most unfinished -> least unfinished + date archived...')
														
 
															+#     all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp))
														
 
															+
														
 
															+#     if index_only:
														
 
															+#         for link in all_links:
														
 
															+#             write_link_details(link, out_dir=out_dir, skip_sql_index=True)
														
 
															+#         index_links(all_links, out_dir=out_dir)
														
 
															+#         return all_links
														
 
															+        
														
 
															+#     # Step 2: Run the archive methods for each link
														
 
															+#     to_archive = new_links if only_new else all_links
														
 
															+#     if resume:
														
 
															+#         to_archive = [
														
 
															+#             link for link in to_archive
														
 
															+#             if link.timestamp >= str(resume)
														
 
															+#         ]
														
 
															+#         if not to_archive:
														
 
															+#             stderr('')
														
 
															+#             stderr(f'[√] Nothing found to resume after {resume}', color='green')
														
 
															+#             return all_links
														
 
															+
														
 
															+#     archive_kwargs = {
														
 
															+#         "out_dir": out_dir,
														
 
															+#     }
														
 
															+#     if extractors:
														
 
															+#         archive_kwargs["methods"] = extractors
														
 
															+
														
 
															+
														
 
															+#     archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
														
 
															+
														
 
															+#     # Step 4: Re-write links index with updated titles, icons, and resources
														
 
															+#     all_links = load_main_index(out_dir=out_dir)
														
 
															+#     return all_links
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															 def update():
														
 
															+    """Import any new links from subscriptions and retry any previously failed/skipped links"""
														
 
															     from archivebox.config.django import setup_django
														
 
															     setup_django()
														
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -1,61 +1,207 @@
 
															 #!/usr/bin/env python3
														
 
															 __package__ = 'archivebox.cli'
														
 
															-__command__ = 'archivebox version'
														
 
															 import sys
														
 
															-import argparse
														
 
															-from pathlib import Path
														
 
															-from typing import Optional, List, IO
														
 
															-
														
 
															-# from archivebox.misc.util import docstring
														
 
															-from archivebox.config import DATA_DIR, VERSION
														
 
															-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
														
 
															-
														
 
															-
														
 
															-# @docstring(version.__doc__)
														
 
															-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
														
 
															-    """Print the ArchiveBox version and dependency information"""
														
 
															-    parser = argparse.ArgumentParser(
														
 
															-        prog=__command__,
														
 
															-        description="Print the ArchiveBox version and dependency information",   # version.__doc__,
														
 
															-        add_help=True,
														
 
															-        formatter_class=SmartFormatter,
														
 
															-    )
														
 
															-    parser.add_argument(
														
 
															-        '--quiet', '-q',
														
 
															-        action='store_true',
														
 
															-        help='Only print ArchiveBox version number and nothing else.',
														
 
															+from typing import Iterable
														
 
															+
														
 
															+import rich_click as click
														
 
															+
														
 
															+from archivebox.misc.util import docstring, enforce_types
														
 
															+
														
 
															+
														
 
															+@enforce_types
														
 
															+def version(quiet: bool=False,
														
 
															+            binproviders: Iterable[str]=(),
														
 
															+            binaries: Iterable[str]=()) -> list[str]:
														
 
															+    """Print the ArchiveBox version, debug metadata, and installed dependency versions"""
														
 
															+    
														
 
															+    # fast path for just getting the version and exiting, dont do any slower imports
														
 
															+    from archivebox.config.version import VERSION
														
 
															+    print(VERSION)
														
 
															+    if quiet or '--version' in sys.argv:
														
 
															+        return []
														
 
															+    
														
 
															+    # Only do slower imports when getting full version info
														
 
															+    import os
														
 
															+    import platform
														
 
															+    from pathlib import Path
														
 
															+    
														
 
															+    from rich.panel import Panel
														
 
															+    from rich.console import Console
														
 
															+    from abx_pkg import Binary
														
 
															+    
														
 
															+    import abx
														
 
															+    import archivebox
														
 
															+    from archivebox.config import CONSTANTS, DATA_DIR
														
 
															+    from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
														
 
															+    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID, IN_DOCKER
														
 
															+    from archivebox.config.paths import get_data_locations, get_code_locations
														
 
															+    from archivebox.config.common import SHELL_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
														
 
															+    from archivebox.misc.logging_util import printable_folder_status
														
 
															+    
														
 
															+    from abx_plugin_default_binproviders import apt, brew, env
														
 
															+    
														
 
															+    console = Console()
														
 
															+    prnt = console.print
														
 
															+    
														
 
															+    LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED
														
 
															+
														
 
															+    # 0.7.1
														
 
															+    # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
														
 
															+    # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
														
 
															+    # FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
														
 
															+    # DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
														
 
															+    
														
 
															+    p = platform.uname()
														
 
															+    COMMIT_HASH = get_COMMIT_HASH()
														
 
															+    prnt(
														
 
															+        '[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
														
 
															+        f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
														
 
															+        f'BUILD_TIME={get_BUILD_TIME()}',
														
 
															     )
														
 
															-    parser.add_argument(
														
 
															-        '--binproviders', '-p',
														
 
															-        type=str,
														
 
															-        help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
														
 
															-        default=None,
														
 
															+    prnt(
														
 
															+        f'IN_DOCKER={IN_DOCKER}',
														
 
															+        f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
														
 
															+        f'ARCH={p.machine}',
														
 
															+        f'OS={p.system}',
														
 
															+        f'PLATFORM={platform.platform()}',
														
 
															+        f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
														
 
															     )
														
 
															-    parser.add_argument(
														
 
															-        '--binaries', '-b',
														
 
															-        type=str,
														
 
															-        help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
														
 
															-        default=None,
														
 
															+    OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
														
 
															+    DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
														
 
															+    prnt(
														
 
															+        f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
														
 
															+        f'FS_UID={DATA_DIR_STAT.st_uid}:{DATA_DIR_STAT.st_gid}',
														
 
															+        f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
														
 
															+        f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
														
 
															+        f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
														
 
															     )
														
 
															-    command = parser.parse_args(args or ())
														
 
															-    reject_stdin(__command__, stdin)
														
 
															-    
														
 
															-    # for speed reasons, check if quiet flag was set and just return simple version immediately if so
														
 
															-    if command.quiet:
														
 
															-        print(VERSION)
														
 
															-        return
														
 
															-    
														
 
															-    # otherwise do big expensive import to get the full version
														
 
															-    from ..main import version
														
 
															-    version(
														
 
															-        quiet=command.quiet,
														
 
															-        out_dir=Path(pwd) if pwd else DATA_DIR,
														
 
															-        binproviders=command.binproviders.split(',') if command.binproviders else None,
														
 
															-        binaries=command.binaries.split(',') if command.binaries else None,
														
 
															+    prnt(
														
 
															+        f'DEBUG={SHELL_CONFIG.DEBUG}',
														
 
															+        f'IS_TTY={SHELL_CONFIG.IS_TTY}',
														
 
															+        f'SUDO={CONSTANTS.IS_ROOT}',
														
 
															+        f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
														
 
															+        f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
														
 
															+        f'LDAP={LDAP_ENABLED}',
														
 
															+        #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
														
 
															     )
														
 
															+    prnt()
														
 
															+    
														
 
															+    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
														
 
															+        PANEL_TEXT = '\n'.join((
														
 
															+            # '',
														
 
															+            # f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
														
 
															+            '',
														
 
															+            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
														
 
															+            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
														
 
															+            '',
														
 
															+            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
														
 
															+            '',
														
 
															+        ))
														
 
															+        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
														
 
															+        prnt()
														
 
															+        return []
														
 
															+
														
 
															+    prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
														
 
															+    failures = []
														
 
															+    BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
														
 
															+    for name, binary in list(BINARIES.items()):
														
 
															+        if binary.name == 'archivebox':
														
 
															+            continue
														
 
															+        
														
 
															+        # skip if the binary is not in the requested list of binaries
														
 
															+        if binaries and binary.name not in binaries:
														
 
															+            continue
														
 
															+        
														
 
															+        # skip if the binary is not supported by any of the requested binproviders
														
 
															+        if binproviders and binary.binproviders_supported and not any(provider.name in binproviders for provider in binary.binproviders_supported):
														
 
															+            continue
														
 
															+        
														
 
															+        err = None
														
 
															+        try:
														
 
															+            loaded_bin = binary.load()
														
 
															+        except Exception as e:
														
 
															+            err = e
														
 
															+            loaded_bin = binary
														
 
															+        provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23] '
														
 
															+        if loaded_bin.abspath:
														
 
															+            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
														
 
															+            if ' ' in abspath:
														
 
															+                abspath = abspath.replace(' ', r'\ ')
														
 
															+        else:
														
 
															+            abspath = f'[red]{err}[/red]'
														
 
															+        prnt('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False)
														
 
															+        if not loaded_bin.is_valid:
														
 
															+            failures.append(loaded_bin.name)
														
 
															+            
														
 
															+    prnt()
														
 
															+    prnt('[gold3][i] Package Managers:[/gold3]')
														
 
															+    BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
														
 
															+    for name, binprovider in list(BINPROVIDERS.items()):
														
 
															+        err = None
														
 
															+        
														
 
															+        if binproviders and binprovider.name not in binproviders:
														
 
															+            continue
														
 
															+        
														
 
															+        # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
														
 
															+        loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
														
 
															+        
														
 
															+        abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
														
 
															+        abspath = None
														
 
															+        if loaded_bin.abspath:
														
 
															+            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
														
 
															+            if ' ' in abspath:
														
 
															+                abspath = abspath.replace(' ', r'\ ')
														
 
															+                
														
 
															+        PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
														
 
															+        ownership_summary = f'UID=[blue]{str(binprovider.EUID).ljust(4)}[/blue]'
														
 
															+        provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]'
														
 
															+        prnt('', '[green]√[/green]' if binprovider.is_valid else '[grey53]-[/grey53]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}', overflow='ellipsis', soft_wrap=True)
														
 
															+
														
 
															+    if not (binaries or binproviders):
														
 
															+        # dont show source code / data dir info if we just want to get version info for a binary or binprovider
														
 
															+        
														
 
															+        prnt()
														
 
															+        prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
														
 
															+        for name, path in get_code_locations().items():
														
 
															+            prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
														
 
															+
														
 
															+        prnt()
														
 
															+        if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
														
 
															+            prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
														
 
															+            for name, path in get_data_locations().items():
														
 
															+                prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
														
 
															+        
														
 
															+            from archivebox.misc.checks import check_data_dir_permissions
														
 
															+            
														
 
															+            check_data_dir_permissions()
														
 
															+        else:
														
 
															+            prnt()
														
 
															+            prnt('[red][i] Data locations:[/red] (not in a data directory)')
														
 
															+        
														
 
															+    prnt()
														
 
															+    
														
 
															+    if failures:
														
 
															+        prnt('[red]Error:[/red] [yellow]Failed to detect the following binaries:[/yellow]')
														
 
															+        prnt(f'      [red]{", ".join(failures)}[/red]')
														
 
															+        prnt()
														
 
															+        prnt('[violet]Hint:[/violet] To install missing binaries automatically, run:')
														
 
															+        prnt('      [green]archivebox install[/green]')
														
 
															+        prnt()
														
 
															+    return failures
														
 
															+
														
 
															+
														
 
															[email protected]()
														
 
															[email protected]('--quiet', '-q', is_flag=True, help='Only print ArchiveBox version number and nothing else. (equivalent to archivebox --version)')
														
 
															[email protected]('--binproviders', '-p', help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)')
														
 
															[email protected]('--binaries', '-b', help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)')
														
 
															+@docstring(version.__doc__)
														
 
															+def main(**kwargs):
														
 
															+    failures = version(**kwargs)
														
 
															+    if failures:
														
 
															+        raise SystemExit(1)
														
 
															 if __name__ == '__main__':
														
 
															-    main(args=sys.argv[1:], stdin=sys.stdin)
														
 
															+    main()
														
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
 
															         return
														
 
															     with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
														
 
															-        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False)
														
 
															+        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=True)
														
 
															         from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
														
--- a/archivebox/config/paths.py
+++ b/archivebox/config/paths.py
@@ -142,7 +142,7 @@ def create_and_chown_dir(dir_path: Path) -> None:
 
															         os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')
														
 
															 @cache
														
 
															-def get_or_create_working_tmp_dir(autofix=True, quiet=False):
														
 
															+def get_or_create_working_tmp_dir(autofix=True, quiet=True):
														
 
															     from archivebox import CONSTANTS
														
 
															     from archivebox.config.common import STORAGE_CONFIG
														
 
															     from archivebox.misc.checks import check_tmp_dir
														
@@ -165,7 +165,7 @@ def get_or_create_working_tmp_dir(autofix=True, quiet=False):
 
															             pass
														
 
															         if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
														
 
															             if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
														
 
															-                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
														
 
															+                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate)
														
 
															             return candidate
														
 
															     if not quiet:
														
@@ -193,7 +193,7 @@ def get_or_create_working_lib_dir(autofix=True, quiet=False):
 
															             pass
														
 
															         if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
														
 
															             if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
														
 
															-                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
														
 
															+                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate)
														
 
															             return candidate
														
 
															     if not quiet:
														
--- a/archivebox/config/permissions.py
+++ b/archivebox/config/permissions.py
@@ -36,6 +36,8 @@ HOSTNAME: str           = max([socket.gethostname(), platform.node()], key=len)
 
															 IS_ROOT = RUNNING_AS_UID == 0
														
 
															 IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
														
 
															+# IN_DOCKER_COMPOSE =  # TODO: figure out a way to detect if running in docker compose
														
 
															+
														
 
															 FALLBACK_UID = RUNNING_AS_UID or SUDO_UID
														
 
															 FALLBACK_GID = RUNNING_AS_GID or SUDO_GID
														
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -303,7 +303,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
															         "Exit Status": [],
														
 
															     }
														
 
															-    from workers.supervisor_util import get_existing_supervisord_process
														
 
															+    from workers.supervisord_util import get_existing_supervisord_process
														
 
															     supervisor = get_existing_supervisord_process()
														
 
															     if supervisor is None:
														
@@ -373,7 +373,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
 
															 def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
														
 
															     assert request.user.is_superuser, "Must be a superuser to view configuration settings."
														
 
															-    from workers.supervisor_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
														
 
															+    from workers.supervisord_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
														
 
															     SOCK_FILE = get_sock_file()
														
 
															     CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
														
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -21,7 +21,6 @@ from archivebox.misc.logging_util import printable_filesize
 
															 from archivebox.search.admin import SearchResultsAdminMixin
														
 
															 from archivebox.index.html import snapshot_icons
														
 
															 from archivebox.extractors import archive_links
														
 
															-from archivebox.main import remove
														
 
															 from archivebox.base_models.admin import ABIDModelAdmin
														
 
															 from archivebox.workers.tasks import bg_archive_links, bg_add
														
@@ -321,7 +320,9 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
 
															         description="☠️ Delete"
														
 
															     )
														
 
															     def delete_snapshots(self, request, queryset):
														
 
															+        from archivebox.cli.archivebox_remove import remove
														
 
															         remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
														
 
															+        
														
 
															         messages.success(
														
 
															             request,
														
 
															             mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
														
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -1,1526 +0,0 @@
 
															-__package__ = 'archivebox'
														
 
															-
														
 
															-import os
														
 
															-import sys
														
 
															-import shutil
														
 
															-import platform
														
 
															-
														
 
															-from typing import Dict, List, Optional, Iterable, IO, Union
														
 
															-from pathlib import Path
														
 
															-from datetime import date, datetime
														
 
															-
														
 
															-from crontab import CronTab, CronSlices
														
 
															-
														
 
															-from django.db.models import QuerySet
														
 
															-from django.utils import timezone
														
 
															-
														
 
															-from abx_pkg import Binary
														
 
															-
														
 
															-import abx
														
 
															-import archivebox
														
 
															-from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR
														
 
															-from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
														
 
															-from archivebox.config.permissions import SudoPermission, IN_DOCKER
														
 
															-from archivebox.config.collection import write_config_file, load_all_config, get_real_name
														
 
															-from archivebox.misc.checks import check_data_folder
														
 
															-from archivebox.misc.util import enforce_types                         # type: ignore
														
 
															-from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
														
 
															-from archivebox.misc.system import run as run_shell
														
 
															-from archivebox.misc.logging import stderr, hint
														
 
															-from archivebox.misc.logging_util import (
														
 
															-    TimedProgress,
														
 
															-    log_importing_started,
														
 
															-    log_crawl_started,
														
 
															-    log_removal_started,
														
 
															-    log_removal_finished,
														
 
															-    log_list_started,
														
 
															-    log_list_finished,
														
 
															-    printable_config,
														
 
															-    printable_folders,
														
 
															-    printable_filesize,
														
 
															-    printable_folder_status,
														
 
															-)
														
 
															-
														
 
															-
														
 
															-from .cli import (
														
 
															-    CLI_SUBCOMMANDS,
														
 
															-    run_subcommand,
														
 
															-    display_first,
														
 
															-    meta_cmds,
														
 
															-    setup_cmds,
														
 
															-    archive_cmds,
														
 
															-)
														
 
															-from .parsers import (
														
 
															-    save_text_as_source,
														
 
															-    save_file_as_source,
														
 
															-    parse_links_memory,
														
 
															-)
														
 
															-from .index.schema import Link
														
 
															-from .index import (
														
 
															-    load_main_index,
														
 
															-    parse_links_from_source,
														
 
															-    dedupe_links,
														
 
															-    write_main_index,
														
 
															-    snapshot_filter,
														
 
															-    get_indexed_folders,
														
 
															-    get_archived_folders,
														
 
															-    get_unarchived_folders,
														
 
															-    get_present_folders,
														
 
															-    get_valid_folders,
														
 
															-    get_invalid_folders,
														
 
															-    get_duplicate_folders,
														
 
															-    get_orphaned_folders,
														
 
															-    get_corrupted_folders,
														
 
															-    get_unrecognized_folders,
														
 
															-    fix_invalid_folder_locations,
														
 
															-    write_link_details,
														
 
															-)
														
 
															-from .index.json import (
														
 
															-    parse_json_main_index,
														
 
															-    parse_json_links_details,
														
 
															-    generate_json_index_from_links,
														
 
															-)
														
 
															-from .index.sql import (
														
 
															-    get_admins,
														
 
															-    apply_migrations,
														
 
															-    remove_from_sql_main_index,
														
 
															-)
														
 
															-from .index.html import generate_index_from_links
														
 
															-from .index.csv import links_to_csv
														
 
															-from .extractors import archive_links, archive_link, ignore_methods
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def help(out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Print the ArchiveBox help message and usage"""
														
 
															-
														
 
															-    from rich import print
														
 
															-    from rich.panel import Panel
														
 
															-
														
 
															-    all_subcommands = CLI_SUBCOMMANDS
														
 
															-    COMMANDS_HELP_TEXT = '\n    '.join(
														
 
															-        f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
														
 
															-        for cmd, func in all_subcommands.items()
														
 
															-        if cmd in meta_cmds
														
 
															-    ) + '\n\n    ' + '\n    '.join(
														
 
															-        f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
														
 
															-        for cmd, func in all_subcommands.items()
														
 
															-        if cmd in setup_cmds
														
 
															-    ) + '\n\n    ' + '\n    '.join(
														
 
															-        f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
														
 
															-        for cmd, func in all_subcommands.items()
														
 
															-        if cmd in archive_cmds
														
 
															-    ) + '\n\n    ' + '\n    '.join(
														
 
															-        f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
														
 
															-        for cmd, func in all_subcommands.items()
														
 
															-        if cmd not in display_first
														
 
															-    )
														
 
															-    
														
 
															-    DOCKER_USAGE = '''
														
 
															-[dodger_blue3]Docker Usage:[/dodger_blue3]
														
 
															-    [grey53]# using Docker Compose:[/grey53]
														
 
															-    [blue]docker compose run[/blue] [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
														
 
															-
														
 
															-    [grey53]# using Docker:[/grey53]
														
 
															-    [blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
														
 
															-''' if IN_DOCKER else ''
														
 
															-    DOCKER_DOCS = '\n    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
														
 
															-    DOCKER_OUTSIDE_HINT = "\n    [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
														
 
															-    DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
														
 
															-
														
 
															-    print(f'''{DOCKER_USAGE}
														
 
															-[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
														
 
															-    [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
														
 
															-
														
 
															-[deep_sky_blue4]Commands:[/deep_sky_blue4]
														
 
															-    {COMMANDS_HELP_TEXT}
														
 
															-
														
 
															-[deep_sky_blue4]Documentation:[/deep_sky_blue4]
														
 
															-    [link=https://github.com/ArchiveBox/ArchiveBox/wiki]https://github.com/ArchiveBox/ArchiveBox/wiki[/link]{DOCKER_DOCS}
														
 
															-    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage[/link]
														
 
															-    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration[/link]
														
 
															-''')
														
 
															-    
														
 
															-    
														
 
															-    if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and CONSTANTS.ARCHIVE_DIR.is_dir():
														
 
															-        pretty_out_dir = str(out_dir).replace(str(Path('~').expanduser()), '~')
														
 
															-        EXAMPLE_USAGE = f'''
														
 
															-[light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]
														
 
															-
														
 
															-[violet]Hint:[/violet] [i]Common maintenance tasks:[/i]
														
 
															-    [dark_green]archivebox[/dark_green] [green]init[/green]      [grey53]# make sure database is up-to-date (safe to run multiple times)[/grey53]
														
 
															-    [dark_green]archivebox[/dark_green] [green]install[/green]   [grey53]# make sure plugins are up-to-date (wget, chrome, singlefile, etc.)[/grey53]
														
 
															-    [dark_green]archivebox[/dark_green] [green]status[/green]    [grey53]# get a health checkup report on your collection[/grey53]
														
 
															-    [dark_green]archivebox[/dark_green] [green]update[/green]    [grey53]# retry any previously failed or interrupted archiving tasks[/grey53]
														
 
															-
														
 
															-[violet]Hint:[/violet] [i]More example usage:[/i]
														
 
															-    [dark_green]archivebox[/dark_green] [green]add[/green] --depth=1 "https://example.com/some/page"
														
 
															-    [dark_green]archivebox[/dark_green] [green]list[/green] --sort=timestamp --csv=timestamp,downloaded_at,url,title
														
 
															-    [dark_green]archivebox[/dark_green] [green]schedule[/green] --every=day --depth=1 "https://example.com/some/feed.rss"
														
 
															-    [dark_green]archivebox[/dark_green] [green]server[/green] [blue]0.0.0.0:8000[/blue]                [grey53]# Start the Web UI / API server[/grey53]
														
 
															-'''
														
 
															-        print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
														
 
															-    else:
														
 
															-        DATA_SETUP_HELP = '\n'
														
 
															-        if IN_DOCKER:
														
 
															-            DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
														
 
															-            DATA_SETUP_HELP += '    docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
														
 
															-        DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
														
 
															-        DATA_SETUP_HELP += '    1. [green]cd[/green] ~/archivebox/data     [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += f'    2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ...get help with next steps... [/grey53]\n\n'
														
 
															-        DATA_SETUP_HELP += 'To start a [sea_green1]new[/sea_green1] collection:\n'
														
 
															-        DATA_SETUP_HELP += '    1. [green]mkdir[/green] ~/archivebox/data  [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += '    2. [green]cd[/green] ~/archivebox/data     [grey53]# cd into the new directory[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
														
 
															-        DATA_SETUP_HELP += f'    5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ... get help with next steps... [/grey53]\n'
														
 
															-        print(Panel(DATA_SETUP_HELP, expand=False, border_style='grey53', title='[red]:cross_mark: No collection is currently active[/red]', subtitle='All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def version(quiet: bool=False,
														
 
															-            out_dir: Path=DATA_DIR,
														
 
															-            binproviders: Optional[List[str]]=None,
														
 
															-            binaries: Optional[List[str]]=None,
														
 
															-            ) -> None:
														
 
															-    """Print the ArchiveBox version and dependency information"""
														
 
															-    
														
 
															-    print(VERSION)
														
 
															-    if quiet or '--version' in sys.argv:
														
 
															-        return
														
 
															-    
														
 
															-    from rich.panel import Panel
														
 
															-    from rich.console import Console
														
 
															-    console = Console()
														
 
															-    prnt = console.print
														
 
															-    
														
 
															-    from abx_plugin_default_binproviders import apt, brew, env
														
 
															-    
														
 
															-    from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
														
 
															-    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
														
 
															-    from archivebox.config.paths import get_data_locations, get_code_locations
														
 
															-    
														
 
															-    LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED
														
 
															-
														
 
															-
														
 
															-    # 0.7.1
														
 
															-    # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
														
 
															-    # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
														
 
															-    # FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
														
 
															-    # DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
														
 
															-    
														
 
															-    p = platform.uname()
														
 
															-    COMMIT_HASH = get_COMMIT_HASH()
														
 
															-    prnt(
														
 
															-        '[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
														
 
															-        f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
														
 
															-        f'BUILD_TIME={get_BUILD_TIME()}',
														
 
															-    )
														
 
															-    prnt(
														
 
															-        f'IN_DOCKER={IN_DOCKER}',
														
 
															-        f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
														
 
															-        f'ARCH={p.machine}',
														
 
															-        f'OS={p.system}',
														
 
															-        f'PLATFORM={platform.platform()}',
														
 
															-        f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
														
 
															-    )
														
 
															-    OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
														
 
															-    DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
														
 
															-    prnt(
														
 
															-        f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
														
 
															-        f'FS_UID={DATA_DIR_STAT.st_uid}:{DATA_DIR_STAT.st_gid}',
														
 
															-        f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
														
 
															-        f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
														
 
															-        f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
														
 
															-    )
														
 
															-    prnt(
														
 
															-        f'DEBUG={SHELL_CONFIG.DEBUG}',
														
 
															-        f'IS_TTY={SHELL_CONFIG.IS_TTY}',
														
 
															-        f'SUDO={CONSTANTS.IS_ROOT}',
														
 
															-        f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
														
 
															-        f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
														
 
															-        f'LDAP={LDAP_ENABLED}',
														
 
															-        #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
														
 
															-    )
														
 
															-    prnt()
														
 
															-    
														
 
															-    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
														
 
															-        PANEL_TEXT = '\n'.join((
														
 
															-            # '',
														
 
															-            # f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
														
 
															-            '',
														
 
															-            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
														
 
															-            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
														
 
															-            '',
														
 
															-            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
														
 
															-            '',
														
 
															-        ))
														
 
															-        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
														
 
															-        prnt()
														
 
															-        return
														
 
															-
														
 
															-    prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
														
 
															-    failures = []
														
 
															-    BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
														
 
															-    for name, binary in list(BINARIES.items()):
														
 
															-        if binary.name == 'archivebox':
														
 
															-            continue
														
 
															-        
														
 
															-        # skip if the binary is not in the requested list of binaries
														
 
															-        if binaries and binary.name not in binaries:
														
 
															-            continue
														
 
															-        
														
 
															-        # skip if the binary is not supported by any of the requested binproviders
														
 
															-        if binproviders and binary.binproviders_supported and not any(provider.name in binproviders for provider in binary.binproviders_supported):
														
 
															-            continue
														
 
															-        
														
 
															-        err = None
														
 
															-        try:
														
 
															-            loaded_bin = binary.load()
														
 
															-        except Exception as e:
														
 
															-            err = e
														
 
															-            loaded_bin = binary
														
 
															-        provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23] '
														
 
															-        if loaded_bin.abspath:
														
 
															-            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
														
 
															-            if ' ' in abspath:
														
 
															-                abspath = abspath.replace(' ', r'\ ')
														
 
															-        else:
														
 
															-            abspath = f'[red]{err}[/red]'
														
 
															-        prnt('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False)
														
 
															-        if not loaded_bin.is_valid:
														
 
															-            failures.append(loaded_bin.name)
														
 
															-            
														
 
															-    prnt()
														
 
															-    prnt('[gold3][i] Package Managers:[/gold3]')
														
 
															-    BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
														
 
															-    for name, binprovider in list(BINPROVIDERS.items()):
														
 
															-        err = None
														
 
															-        
														
 
															-        if binproviders and binprovider.name not in binproviders:
														
 
															-            continue
														
 
															-        
														
 
															-        # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
														
 
															-        loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
														
 
															-        
														
 
															-        abspath = None
														
 
															-        if loaded_bin.abspath:
														
 
															-            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
														
 
															-            if ' ' in abspath:
														
 
															-                abspath = abspath.replace(' ', r'\ ')
														
 
															-                
														
 
															-        PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
														
 
															-        ownership_summary = f'UID=[blue]{str(binprovider.EUID).ljust(4)}[/blue]'
														
 
															-        provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]'
														
 
															-        prnt('', '[green]√[/green]' if binprovider.is_valid else '[grey53]-[/grey53]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}', overflow='ellipsis', soft_wrap=True)
														
 
															-
														
 
															-    if not (binaries or binproviders):
														
 
															-        # dont show source code / data dir info if we just want to get version info for a binary or binprovider
														
 
															-        
														
 
															-        prnt()
														
 
															-        prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
														
 
															-        for name, path in get_code_locations().items():
														
 
															-            prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
														
 
															-
														
 
															-        prnt()
														
 
															-        if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
														
 
															-            prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
														
 
															-            for name, path in get_data_locations().items():
														
 
															-                prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
														
 
															-        
														
 
															-            from archivebox.misc.checks import check_data_dir_permissions
														
 
															-            
														
 
															-            check_data_dir_permissions()
														
 
															-        else:
														
 
															-            prnt()
														
 
															-            prnt('[red][i] Data locations:[/red] (not in a data directory)')
														
 
															-        
														
 
															-    prnt()
														
 
															-    
														
 
															-    if failures:
														
 
															-        raise SystemExit(1)
														
 
															-    raise SystemExit(0)
														
 
															-
														
 
															-@enforce_types
														
 
															-def run(subcommand: str,
														
 
															-        subcommand_args: Optional[List[str]],
														
 
															-        stdin: Optional[IO]=None,
														
 
															-        out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Run a given ArchiveBox subcommand with the given list of args"""
														
 
															-    run_subcommand(
														
 
															-        subcommand=subcommand,
														
 
															-        subcommand_args=subcommand_args,
														
 
															-        stdin=stdin,
														
 
															-        pwd=out_dir,
														
 
															-    )
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Initialize a new ArchiveBox collection in the current directory"""
														
 
															-    
														
 
															-    from core.models import Snapshot
														
 
															-    from rich import print
														
 
															-    
														
 
															-    # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
														
 
															-    #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
														
 
															-    #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
														
 
															-
														
 
															-    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
														
 
															-    existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
														
 
															-    if is_empty and not existing_index:
														
 
															-        print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
														
 
															-        print('[green]----------------------------------------------------------------------[/green]')
														
 
															-    elif existing_index:
														
 
															-        # TODO: properly detect and print the existing version in current index as well
														
 
															-        print(f'[green][*] Verifying and updating existing ArchiveBox collection to v{VERSION}...[/green]')
														
 
															-        print('[green]----------------------------------------------------------------------[/green]')
														
 
															-    else:
														
 
															-        if force:
														
 
															-            print('[red][!] This folder appears to already have files in it, but no index.sqlite3 is present.[/red]')
														
 
															-            print('[red]    Because --force was passed, ArchiveBox will initialize anyway (which may overwrite existing files).[/red]')
														
 
															-        else:
														
 
															-            print(
														
 
															-                ("[red][X] This folder appears to already have files in it, but no index.sqlite3 present.[/red]\n\n"
														
 
															-                "    You must run init in a completely empty directory, or an existing data folder.\n\n"
														
 
															-                "    [violet]Hint:[/violet] To import an existing data folder make sure to cd into the folder first, \n"
														
 
															-                "    then run and run 'archivebox init' to pick up where you left off.\n\n"
														
 
															-                "    (Always make sure your data folder is backed up first before updating ArchiveBox)"
														
 
															-                )
														
 
															-            )
														
 
															-            raise SystemExit(2)
														
 
															-
														
 
															-    if existing_index:
														
 
															-        print('\n[green][*] Verifying archive folder structure...[/green]')
														
 
															-    else:
														
 
															-        print('\n[green][+] Building archive folder structure...[/green]')
														
 
															-    
														
 
															-    print(f'    + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
														
 
															-    Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
														
 
															-    Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
														
 
															-    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
														
 
															-    
														
 
															-    print(f'    + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
														
 
															-    
														
 
															-    # create the .archivebox_id file with a unique ID for this collection
														
 
															-    from archivebox.config.paths import _get_collection_id
														
 
															-    _get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
														
 
															-    
														
 
															-    # create the ArchiveBox.conf file
														
 
															-    write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
														
 
															-
														
 
															-
														
 
															-    if os.access(CONSTANTS.DATABASE_FILE, os.F_OK):
														
 
															-        print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]')
														
 
															-    else:
														
 
															-        print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
														
 
															-    
														
 
															-    for migration_line in apply_migrations(out_dir):
														
 
															-        sys.stdout.write(f'    {migration_line}\n')
														
 
															-
														
 
															-    assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
														
 
															-    print()
														
 
															-    print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
														
 
															-    
														
 
															-    # from django.contrib.auth.models import User
														
 
															-    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
														
 
															-    #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-    #     call_command("createsuperuser", interactive=True)
														
 
															-
														
 
															-    print()
														
 
															-    print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
														
 
															-
														
 
															-    all_links = Snapshot.objects.none()
														
 
															-    pending_links: Dict[str, Link] = {}
														
 
															-
														
 
															-    if existing_index:
														
 
															-        all_links = load_main_index(out_dir=out_dir, warn=False)
														
 
															-        print(f'    √ Loaded {all_links.count()} links from existing main index.')
														
 
															-
														
 
															-    if quick:
														
 
															-        print('    > Skipping full snapshot directory check (quick mode)')
														
 
															-    else:
														
 
															-        try:
														
 
															-            # Links in data folders that dont match their timestamp
														
 
															-            fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
														
 
															-            if fixed:
														
 
															-                print(f'    [yellow]√ Fixed {len(fixed)} data directory locations that didn\'t match their link timestamps.[/yellow]')
														
 
															-            if cant_fix:
														
 
															-                print(f'    [red]! Could not fix {len(cant_fix)} data directory locations due to conflicts with existing folders.[/red]')
														
 
															-
														
 
															-            # Links in JSON index but not in main index
														
 
															-            orphaned_json_links = {
														
 
															-                link.url: link
														
 
															-                for link in parse_json_main_index(out_dir)
														
 
															-                if not all_links.filter(url=link.url).exists()
														
 
															-            }
														
 
															-            if orphaned_json_links:
														
 
															-                pending_links.update(orphaned_json_links)
														
 
															-                print(f'    [yellow]√ Added {len(orphaned_json_links)} orphaned links from existing JSON index...[/yellow]')
														
 
															-
														
 
															-            # Links in data dir indexes but not in main index
														
 
															-            orphaned_data_dir_links = {
														
 
															-                link.url: link
														
 
															-                for link in parse_json_links_details(out_dir)
														
 
															-                if not all_links.filter(url=link.url).exists()
														
 
															-            }
														
 
															-            if orphaned_data_dir_links:
														
 
															-                pending_links.update(orphaned_data_dir_links)
														
 
															-                print(f'    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
														
 
															-
														
 
															-            # Links in invalid/duplicate data dirs
														
 
															-            invalid_folders = {
														
 
															-                folder: link
														
 
															-                for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
														
 
															-            }
														
 
															-            if invalid_folders:
														
 
															-                print(f'    [red]! Skipped adding {len(invalid_folders)} invalid link data directories.[/red]')
														
 
															-                print('        X ' + '\n        X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items()))
														
 
															-                print()
														
 
															-                print('    [violet]Hint:[/violet] For more information about the link data directories that were skipped, run:')
														
 
															-                print('        archivebox status')
														
 
															-                print('        archivebox list --status=invalid')
														
 
															-
														
 
															-        except (KeyboardInterrupt, SystemExit):
														
 
															-            print(file=sys.stderr)
														
 
															-            print('[yellow]:stop_sign: Stopped checking archive directories due to Ctrl-C/SIGTERM[/yellow]', file=sys.stderr)
														
 
															-            print('    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.', file=sys.stderr)
														
 
															-            print(file=sys.stderr)
														
 
															-            print('    [violet]Hint:[/violet] In the future you can run a quick init without checking dirs like so:', file=sys.stderr)
														
 
															-            print('        archivebox init --quick', file=sys.stderr)
														
 
															-            raise SystemExit(1)
														
 
															-        
														
 
															-        write_main_index(list(pending_links.values()), out_dir=out_dir)
														
 
															-
														
 
															-    print('\n[green]----------------------------------------------------------------------[/green]')
														
 
															-
														
 
															-    from django.contrib.auth.models import User
														
 
															-
														
 
															-    if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
														
 
															-        print('[green][+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.[/green]')
														
 
															-        User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)
														
 
															-
														
 
															-    if existing_index:
														
 
															-        print('[green][√] Done. Verified and updated the existing ArchiveBox collection.[/green]')
														
 
															-    else:
														
 
															-        print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
														
 
															-
														
 
															-    json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
														
 
															-    html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
														
 
															-    index_name = f"{date.today()}_index_old"
														
 
															-    if os.access(json_index, os.F_OK):
														
 
															-        json_index.rename(f"{index_name}.json")
														
 
															-    if os.access(html_index, os.F_OK):
														
 
															-        html_index.rename(f"{index_name}.html")
														
 
															-    
														
 
															-    CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
														
 
															-    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
														
 
															-    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
														
 
															-    
														
 
															-    from archivebox.config.common import STORAGE_CONFIG
														
 
															-    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
														
 
															-    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
														
 
															-    
														
 
															-    if install:
														
 
															-        run_subcommand('install', pwd=out_dir)
														
 
															-
														
 
															-    if Snapshot.objects.count() < 25:     # hide the hints for experienced users
														
 
															-        print()
														
 
															-        print('    [violet]Hint:[/violet] To view your archive index, run:')
														
 
															-        print('        archivebox server  # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link][/deep_sky_blue4]')
														
 
															-        print()
														
 
															-        print('    To add new links, you can run:')
														
 
															-        print("        archivebox add < ~/some/path/to/list_of_links.txt")
														
 
															-        print()
														
 
															-        print('    For more usage and examples, run:')
														
 
															-        print('        archivebox help')
														
 
															-
														
 
															-@enforce_types
														
 
															-def status(out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Print out some info and statistics about the archive collection"""
														
 
															-
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    from core.models import Snapshot
														
 
															-    from django.contrib.auth import get_user_model
														
 
															-    User = get_user_model()
														
 
															-
														
 
															-    print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {out_dir}/*', SHELL_CONFIG.ANSI['reset'])
														
 
															-    num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
														
 
															-    size = printable_filesize(num_bytes)
														
 
															-    print(f'    Index size: {size} across {num_files} files')
														
 
															-    print()
														
 
															-
														
 
															-    links = load_main_index(out_dir=out_dir)
														
 
															-    num_sql_links = links.count()
														
 
															-    num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
														
 
															-    print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
														
 
															-    print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
														
 
															-    print()
														
 
															-    print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset'])
														
 
															-    num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
														
 
															-    size = printable_filesize(num_bytes)
														
 
															-    print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
														
 
															-    print(SHELL_CONFIG.ANSI['black'])
														
 
															-    num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
														
 
															-    num_archived = len(get_archived_folders(links, out_dir=out_dir))
														
 
															-    num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
														
 
															-    print(f'    > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
														
 
															-    print(f'      > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
														
 
															-    print(f'      > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
														
 
															-    
														
 
															-    num_present = len(get_present_folders(links, out_dir=out_dir))
														
 
															-    num_valid = len(get_valid_folders(links, out_dir=out_dir))
														
 
															-    print()
														
 
															-    print(f'    > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
														
 
															-    print(f'      > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
														
 
															-    
														
 
															-    duplicate = get_duplicate_folders(links, out_dir=out_dir)
														
 
															-    orphaned = get_orphaned_folders(links, out_dir=out_dir)
														
 
															-    corrupted = get_corrupted_folders(links, out_dir=out_dir)
														
 
															-    unrecognized = get_unrecognized_folders(links, out_dir=out_dir)
														
 
															-    num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
														
 
															-    print(f'      > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
														
 
															-    print(f'        > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
														
 
															-    print(f'        > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
														
 
															-    print(f'        > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
														
 
															-    print(f'        > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
														
 
															-        
														
 
															-    print(SHELL_CONFIG.ANSI['reset'])
														
 
															-
														
 
															-    if num_indexed:
														
 
															-        print('    {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI))
														
 
															-        print('        archivebox list --status=<status>  (e.g. indexed, corrupted, archived, etc.)')
														
 
															-
														
 
															-    if orphaned:
														
 
															-        print('    {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI))
														
 
															-        print('        archivebox init')
														
 
															-
														
 
															-    if num_invalid:
														
 
															-        print('    {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI))
														
 
															-        print('        archivebox init')
														
 
															-    
														
 
															-    print()
														
 
															-    print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset'])
														
 
															-    users = get_admins().values_list('username', flat=True)
														
 
															-    print(f'    UI users {len(users)}: {", ".join(users)}')
														
 
															-    last_login = User.objects.order_by('last_login').last()
														
 
															-    if last_login:
														
 
															-        print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
														
 
															-    last_downloaded = Snapshot.objects.order_by('downloaded_at').last()
														
 
															-    if last_downloaded:
														
 
															-        print(f'    Last changes: {str(last_downloaded.downloaded_at)[:16]}')
														
 
															-
														
 
															-    if not users:
														
 
															-        print()
														
 
															-        print('    {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI))
														
 
															-        print('        archivebox manage createsuperuser')
														
 
															-
														
 
															-    print()
														
 
															-    for snapshot in links.order_by('-downloaded_at')[:10]:
														
 
															-        if not snapshot.downloaded_at:
														
 
															-            continue
														
 
															-        print(
														
 
															-            SHELL_CONFIG.ANSI['black'],
														
 
															-            (
														
 
															-                f'   > {str(snapshot.downloaded_at)[:16]} '
														
 
															-                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
														
 
															-                f'"{snapshot.title}": {snapshot.url}'
														
 
															-            )[:SHELL_CONFIG.TERM_WIDTH],
														
 
															-            SHELL_CONFIG.ANSI['reset'],
														
 
															-        )
														
 
															-    print(SHELL_CONFIG.ANSI['black'], '   ...', SHELL_CONFIG.ANSI['reset'])
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
														
 
															-    """
														
 
															-    Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
														
 
															-    You can run this to archive single pages without needing to create a whole collection with archivebox init.
														
 
															-    """
														
 
															-    oneshot_link, _ = parse_links_memory([url])
														
 
															-    if len(oneshot_link) > 1:
														
 
															-        stderr(
														
 
															-                '[X] You should pass a single url to the oneshot command',
														
 
															-                color='red'
														
 
															-            )
														
 
															-        raise SystemExit(2)
														
 
															-
														
 
															-    methods = extractors.split(",") if extractors else ignore_methods(['title'])
														
 
															-    archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
														
 
															-    return oneshot_link
														
 
															-
														
 
															-@enforce_types
														
 
															-def add(urls: Union[str, List[str]],
														
 
															-        tag: str='',
														
 
															-        depth: int=0,
														
 
															-        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
														
 
															-        update_all: bool=False,
														
 
															-        index_only: bool=False,
														
 
															-        overwrite: bool=False,
														
 
															-        # duplicate: bool=False,  # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
														
 
															-        init: bool=False,
														
 
															-        extractors: str="",
														
 
															-        parser: str="auto",
														
 
															-        created_by_id: int | None=None,
														
 
															-        out_dir: Path=DATA_DIR) -> List[Link]:
														
 
															-    """Add a new URL or list of URLs to your archive"""
														
 
															-
														
 
															-    from core.models import Snapshot, Tag
														
 
															-    # from workers.supervisor_util import start_cli_workers, tail_worker_logs
														
 
															-    # from workers.tasks import bg_archive_link
														
 
															-    
														
 
															-
														
 
															-    assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
														
 
															-
														
 
															-    extractors = extractors.split(",") if extractors else []
														
 
															-
														
 
															-    if init:
														
 
															-        run_subcommand('init', stdin=None, pwd=out_dir)
														
 
															-
														
 
															-    # Load list of links from the existing index
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    # worker = start_cli_workers()
														
 
															-    
														
 
															-    new_links: List[Link] = []
														
 
															-    all_links = load_main_index(out_dir=out_dir)
														
 
															-
														
 
															-    log_importing_started(urls=urls, depth=depth, index_only=index_only)
														
 
															-    if isinstance(urls, str):
														
 
															-        # save verbatim stdin to sources
														
 
															-        write_ahead_log = save_text_as_source(urls, filename='{ts}-import.txt', out_dir=out_dir)
														
 
															-    elif isinstance(urls, list):
														
 
															-        # save verbatim args to sources
														
 
															-        write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
														
 
															-    
														
 
															-
														
 
															-    new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
														
 
															-
														
 
															-    # If we're going one level deeper, download each link and look for more links
														
 
															-    new_links_depth = []
														
 
															-    if new_links and depth == 1:
														
 
															-        log_crawl_started(new_links)
														
 
															-        for new_link in new_links:
														
 
															-            try:
														
 
															-                downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
														
 
															-                new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
														
 
															-            except Exception as err:
														
 
															-                stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
														
 
															-
														
 
															-    imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
														
 
															-    
														
 
															-    new_links = dedupe_links(all_links, imported_links)
														
 
															-
														
 
															-    write_main_index(links=new_links, out_dir=out_dir, created_by_id=created_by_id)
														
 
															-    all_links = load_main_index(out_dir=out_dir)
														
 
															-
														
 
															-    tags = [
														
 
															-        Tag.objects.get_or_create(name=name.strip(), defaults={'created_by_id': created_by_id})[0]
														
 
															-        for name in tag.split(',')
														
 
															-        if name.strip()
														
 
															-    ]
														
 
															-    if tags:
														
 
															-        for link in imported_links:
														
 
															-            snapshot = Snapshot.objects.get(url=link.url)
														
 
															-            snapshot.tags.add(*tags)
														
 
															-            snapshot.tags_str(nocache=True)
														
 
															-            snapshot.save()
														
 
															-        # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
														
 
															-
														
 
															-    if index_only:
														
 
															-        # mock archive all the links using the fake index_only extractor method in order to update their state
														
 
															-        if overwrite:
														
 
															-            archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
														
 
															-        else:
														
 
															-            archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
														
 
															-    else:
														
 
															-        # fully run the archive extractor methods for each link
														
 
															-        archive_kwargs = {
														
 
															-            "out_dir": out_dir,
														
 
															-            "created_by_id": created_by_id,
														
 
															-        }
														
 
															-        if extractors:
														
 
															-            archive_kwargs["methods"] = extractors
														
 
															-
														
 
															-        stderr()
														
 
															-
														
 
															-        ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
														
 
															-
														
 
															-        if update:
														
 
															-            stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
														
 
															-            archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
														
 
															-        elif update_all:
														
 
															-            stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
														
 
															-            archive_links(all_links, overwrite=overwrite, **archive_kwargs)
														
 
															-        elif overwrite:
														
 
															-            stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
														
 
															-            archive_links(imported_links, overwrite=True, **archive_kwargs)
														
 
															-        elif new_links:
														
 
															-            stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
														
 
															-            archive_links(new_links, overwrite=False, **archive_kwargs)
														
 
															-
														
 
															-    # tail_worker_logs(worker['stdout_logfile'])
														
 
															-
														
 
															-    # if CAN_UPGRADE:
														
 
															-    #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
														
 
															-
														
 
															-    return new_links
														
 
															-
														
 
															-@enforce_types
														
 
															-def remove(filter_str: Optional[str]=None,
														
 
															-           filter_patterns: Optional[List[str]]=None,
														
 
															-           filter_type: str='exact',
														
 
															-           snapshots: Optional[QuerySet]=None,
														
 
															-           after: Optional[float]=None,
														
 
															-           before: Optional[float]=None,
														
 
															-           yes: bool=False,
														
 
															-           delete: bool=False,
														
 
															-           out_dir: Path=DATA_DIR) -> List[Link]:
														
 
															-    """Remove the specified URLs from the archive"""
														
 
															-    
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    if snapshots is None:
														
 
															-        if filter_str and filter_patterns:
														
 
															-            stderr(
														
 
															-                '[X] You should pass either a pattern as an argument, '
														
 
															-                'or pass a list of patterns via stdin, but not both.\n',
														
 
															-                color='red',
														
 
															-            )
														
 
															-            raise SystemExit(2)
														
 
															-        elif not (filter_str or filter_patterns):
														
 
															-            stderr(
														
 
															-                '[X] You should pass either a pattern as an argument, '
														
 
															-                'or pass a list of patterns via stdin.',
														
 
															-                color='red',
														
 
															-            )
														
 
															-            stderr()
														
 
															-            hint(('To remove all urls you can run:',
														
 
															-                'archivebox remove --filter-type=regex ".*"'))
														
 
															-            stderr()
														
 
															-            raise SystemExit(2)
														
 
															-        elif filter_str:
														
 
															-            filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
														
 
															-
														
 
															-    list_kwargs = {
														
 
															-        "filter_patterns": filter_patterns,
														
 
															-        "filter_type": filter_type,
														
 
															-        "after": after,
														
 
															-        "before": before,
														
 
															-    }
														
 
															-    if snapshots:
														
 
															-        list_kwargs["snapshots"] = snapshots
														
 
															-
														
 
															-    log_list_started(filter_patterns, filter_type)
														
 
															-    timer = TimedProgress(360, prefix='      ')
														
 
															-    try:
														
 
															-        snapshots = list_links(**list_kwargs)
														
 
															-    finally:
														
 
															-        timer.end()
														
 
															-
														
 
															-
														
 
															-    if not snapshots.exists():
														
 
															-        log_removal_finished(0, 0)
														
 
															-        raise SystemExit(1)
														
 
															-
														
 
															-
														
 
															-    log_links = [link.as_link() for link in snapshots]
														
 
															-    log_list_finished(log_links)
														
 
															-    log_removal_started(log_links, yes=yes, delete=delete)
														
 
															-
														
 
															-    timer = TimedProgress(360, prefix='      ')
														
 
															-    try:
														
 
															-        for snapshot in snapshots:
														
 
															-            if delete:
														
 
															-                shutil.rmtree(snapshot.as_link().link_dir, ignore_errors=True)
														
 
															-    finally:
														
 
															-        timer.end()
														
 
															-
														
 
															-    to_remove = snapshots.count()
														
 
															-
														
 
															-    from .search import flush_search_index
														
 
															-
														
 
															-    flush_search_index(snapshots=snapshots)
														
 
															-    remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
														
 
															-    all_snapshots = load_main_index(out_dir=out_dir)
														
 
															-    log_removal_finished(all_snapshots.count(), to_remove)
														
 
															-    
														
 
															-    return all_snapshots
														
 
															-
														
 
															-@enforce_types
														
 
															-def update(resume: Optional[float]=None,
														
 
															-           only_new: bool=ARCHIVING_CONFIG.ONLY_NEW,
														
 
															-           index_only: bool=False,
														
 
															-           overwrite: bool=False,
														
 
															-           filter_patterns_str: Optional[str]=None,
														
 
															-           filter_patterns: Optional[List[str]]=None,
														
 
															-           filter_type: Optional[str]=None,
														
 
															-           status: Optional[str]=None,
														
 
															-           after: Optional[str]=None,
														
 
															-           before: Optional[str]=None,
														
 
															-           extractors: str="",
														
 
															-           out_dir: Path=DATA_DIR) -> List[Link]:
														
 
															-    """Import any new links from subscriptions and retry any previously failed/skipped links"""
														
 
															-
														
 
															-    from core.models import ArchiveResult
														
 
															-    from .search import index_links
														
 
															-    # from workers.supervisor_util import start_cli_workers
														
 
															-    
														
 
															-
														
 
															-    check_data_folder()
														
 
															-    # start_cli_workers()
														
 
															-    new_links: List[Link] = [] # TODO: Remove input argument: only_new
														
 
															-
														
 
															-    extractors = extractors.split(",") if extractors else []
														
 
															-
														
 
															-    # Step 1: Filter for selected_links
														
 
															-    print('[*] Finding matching Snapshots to update...')
														
 
															-    print(f'    - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...')
														
 
															-    matching_snapshots = list_links(
														
 
															-        filter_patterns=filter_patterns,
														
 
															-        filter_type=filter_type,
														
 
															-        before=before,
														
 
															-        after=after,
														
 
															-    )
														
 
															-    print(f'    - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...')
														
 
															-    matching_folders = list_folders(
														
 
															-        links=matching_snapshots,
														
 
															-        status=status,
														
 
															-        out_dir=out_dir,
														
 
															-    )
														
 
															-    all_links = (link for link in matching_folders.values() if link)
														
 
															-    print('    - Sorting by most unfinished -> least unfinished + date archived...')
														
 
															-    all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp))
														
 
															-
														
 
															-    if index_only:
														
 
															-        for link in all_links:
														
 
															-            write_link_details(link, out_dir=out_dir, skip_sql_index=True)
														
 
															-        index_links(all_links, out_dir=out_dir)
														
 
															-        return all_links
														
 
															-        
														
 
															-    # Step 2: Run the archive methods for each link
														
 
															-    to_archive = new_links if only_new else all_links
														
 
															-    if resume:
														
 
															-        to_archive = [
														
 
															-            link for link in to_archive
														
 
															-            if link.timestamp >= str(resume)
														
 
															-        ]
														
 
															-        if not to_archive:
														
 
															-            stderr('')
														
 
															-            stderr(f'[√] Nothing found to resume after {resume}', color='green')
														
 
															-            return all_links
														
 
															-
														
 
															-    archive_kwargs = {
														
 
															-        "out_dir": out_dir,
														
 
															-    }
														
 
															-    if extractors:
														
 
															-        archive_kwargs["methods"] = extractors
														
 
															-
														
 
															-
														
 
															-    archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
														
 
															-
														
 
															-    # Step 4: Re-write links index with updated titles, icons, and resources
														
 
															-    all_links = load_main_index(out_dir=out_dir)
														
 
															-    return all_links
														
 
															-
														
 
															-@enforce_types
														
 
															-def list_all(filter_patterns_str: Optional[str]=None,
														
 
															-             filter_patterns: Optional[List[str]]=None,
														
 
															-             filter_type: str='exact',
														
 
															-             status: Optional[str]=None,
														
 
															-             after: Optional[float]=None,
														
 
															-             before: Optional[float]=None,
														
 
															-             sort: Optional[str]=None,
														
 
															-             csv: Optional[str]=None,
														
 
															-             json: bool=False,
														
 
															-             html: bool=False,
														
 
															-             with_headers: bool=False,
														
 
															-             out_dir: Path=DATA_DIR):
														
 
															-    """List, filter, and export information about archive entries"""
														
 
															-    
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    if filter_patterns and filter_patterns_str:
														
 
															-        stderr(
														
 
															-            '[X] You should either pass filter patterns as an arguments '
														
 
															-            'or via stdin, but not both.\n',
														
 
															-            color='red',
														
 
															-        )
														
 
															-        raise SystemExit(2)
														
 
															-    elif filter_patterns_str:
														
 
															-        filter_patterns = filter_patterns_str.split('\n')
														
 
															-
														
 
															-    snapshots = list_links(
														
 
															-        filter_patterns=filter_patterns,
														
 
															-        filter_type=filter_type,
														
 
															-        before=before,
														
 
															-        after=after,
														
 
															-    )
														
 
															-
														
 
															-    if sort:
														
 
															-        snapshots = snapshots.order_by(sort)
														
 
															-
														
 
															-    folders = list_folders(
														
 
															-        links=snapshots,
														
 
															-        status=status,
														
 
															-        out_dir=out_dir,
														
 
															-    )
														
 
															-
														
 
															-    if json: 
														
 
															-        output = generate_json_index_from_links(folders.values(), with_headers=with_headers)
														
 
															-    elif html:
														
 
															-        output = generate_index_from_links(folders.values(), with_headers=with_headers)
														
 
															-    elif csv:
														
 
															-        output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
														
 
															-    else:
														
 
															-        output = printable_folders(folders, with_headers=with_headers)
														
 
															-    print(output)
														
 
															-    return output
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def list_links(snapshots: Optional[QuerySet]=None,
														
 
															-               filter_patterns: Optional[List[str]]=None,
														
 
															-               filter_type: str='exact',
														
 
															-               after: Optional[float]=None,
														
 
															-               before: Optional[float]=None,
														
 
															-               out_dir: Path=DATA_DIR) -> Iterable[Link]:
														
 
															-    
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    if snapshots:
														
 
															-        all_snapshots = snapshots
														
 
															-    else:
														
 
															-        all_snapshots = load_main_index(out_dir=out_dir)
														
 
															-
														
 
															-    if after is not None:
														
 
															-        all_snapshots = all_snapshots.filter(timestamp__gte=after)
														
 
															-    if before is not None:
														
 
															-        all_snapshots = all_snapshots.filter(timestamp__lt=before)
														
 
															-    if filter_patterns:
														
 
															-        all_snapshots = snapshot_filter(all_snapshots, filter_patterns, filter_type)
														
 
															-
														
 
															-    if not all_snapshots:
														
 
															-        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
														
 
															-
														
 
															-    return all_snapshots
														
 
															-
														
 
															-@enforce_types
														
 
															-def list_folders(links: List[Link],
														
 
															-                 status: str,
														
 
															-                 out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
														
 
															-    
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    STATUS_FUNCTIONS = {
														
 
															-        "indexed": get_indexed_folders,
														
 
															-        "archived": get_archived_folders,
														
 
															-        "unarchived": get_unarchived_folders,
														
 
															-        "present": get_present_folders,
														
 
															-        "valid": get_valid_folders,
														
 
															-        "invalid": get_invalid_folders,
														
 
															-        "duplicate": get_duplicate_folders,
														
 
															-        "orphaned": get_orphaned_folders,
														
 
															-        "corrupted": get_corrupted_folders,
														
 
															-        "unrecognized": get_unrecognized_folders,
														
 
															-    }
														
 
															-
														
 
															-    try:
														
 
															-        return STATUS_FUNCTIONS[status](links, out_dir=out_dir)
														
 
															-    except KeyError:
														
 
															-        raise ValueError('Status not recognized.')
														
 
															-
														
 
															-@enforce_types
														
 
															-def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
														
 
															-    """Automatically install all ArchiveBox dependencies and extras"""
														
 
															-    
														
 
															-    # if running as root:
														
 
															-    #    - run init to create index + lib dir
														
 
															-    #    - chown -R 911 DATA_DIR
														
 
															-    #    - install all binaries as root
														
 
															-    #    - chown -R 911 LIB_DIR
														
 
															-    # else:
														
 
															-    #    - run init to create index + lib dir as current user
														
 
															-    #    - install all binaries as current user
														
 
															-    #    - recommend user re-run with sudo if any deps need to be installed as root
														
 
															-
														
 
															-    from rich import print
														
 
															-    
														
 
															-    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
														
 
															-    from archivebox.config.paths import get_or_create_working_lib_dir
														
 
															-
														
 
															-    if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
														
 
															-        run_subcommand('init', stdin=None, pwd=out_dir)  # must init full index because we need a db to store InstalledBinary entries in
														
 
															-
														
 
															-    print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
														
 
															-    
														
 
															-    # we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
														
 
															-    if IS_ROOT:
														
 
															-        EUID = os.geteuid()
														
 
															-        
														
 
															-        # if we have sudo/root permissions, take advantage of them just while installing dependencies
														
 
															-        print()
														
 
															-        print(f'[yellow]:warning:  Running as UID=[blue]{EUID}[/blue] with [red]sudo[/red] only for dependencies that need it.[/yellow]')
														
 
															-        print(f'    DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
														
 
															-        print()
														
 
															-    
														
 
															-    LIB_DIR = get_or_create_working_lib_dir()
														
 
															-    
														
 
															-    package_manager_names = ', '.join(
														
 
															-        f'[yellow]{binprovider.name}[/yellow]'
														
 
															-        for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
														
 
															-        if not binproviders or (binproviders and binprovider.name in binproviders)
														
 
															-    )
														
 
															-    print(f'[+] Setting up package managers {package_manager_names}...')
														
 
															-    for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
														
 
															-        if binproviders and binprovider.name not in binproviders:
														
 
															-            continue
														
 
															-        try:
														
 
															-            binprovider.setup()
														
 
															-        except Exception:
														
 
															-            # it's ok, installing binaries below will automatically set up package managers as needed
														
 
															-            # e.g. if user does not have npm available we cannot set it up here yet, but once npm Binary is installed
														
 
															-            # the next package that depends on npm will automatically call binprovider.setup() during its own install
														
 
															-            pass
														
 
															-    
														
 
															-    print()
														
 
															-    
														
 
															-    for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
														
 
															-        if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
														
 
															-            # obviously must already be installed if we are running
														
 
															-            continue
														
 
															-        
														
 
															-        if binaries and binary.name not in binaries:
														
 
															-            continue
														
 
															-        
														
 
															-        providers = ' [grey53]or[/grey53] '.join(
														
 
															-            provider.name for provider in binary.binproviders_supported
														
 
															-            if not binproviders or (binproviders and provider.name in binproviders)
														
 
															-        )
														
 
															-        if not providers:
														
 
															-            continue
														
 
															-        print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...')
														
 
															-        try:
														
 
															-            with SudoPermission(uid=0, fallback=True):
														
 
															-                # print(binary.load_or_install(fresh=True).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}))
														
 
															-                if binproviders:
														
 
															-                    providers_supported_by_binary = [provider.name for provider in binary.binproviders_supported]
														
 
															-                    for binprovider_name in binproviders:
														
 
															-                        if binprovider_name not in providers_supported_by_binary:
														
 
															-                            continue
														
 
															-                        try:
														
 
															-                            if dry_run:
														
 
															-                                # always show install commands when doing a dry run
														
 
															-                                sys.stderr.write("\033[2;49;90m")  # grey53
														
 
															-                                result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															-                                sys.stderr.write("\033[00m\n")     # reset
														
 
															-                            else:
														
 
															-                                loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
														
 
															-                                result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															-                            if result and result['loaded_version']:
														
 
															-                                break
														
 
															-                        except Exception as e:
														
 
															-                            print(f'[red]:cross_mark: Failed to install {binary.name} as using {binprovider_name} as user {ARCHIVEBOX_USER}: {e}[/red]')
														
 
															-                else:
														
 
															-                    if dry_run:
														
 
															-                        sys.stderr.write("\033[2;49;90m")  # grey53
														
 
															-                        binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															-                        sys.stderr.write("\033[00m\n")  # reset
														
 
															-                    else:
														
 
															-                        loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
														
 
															-                        result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
														
 
															-            if IS_ROOT and LIB_DIR:
														
 
															-                with SudoPermission(uid=0):
														
 
															-                    if ARCHIVEBOX_USER == 0:
														
 
															-                        os.system(f'chmod -R 777 "{LIB_DIR.resolve()}"')
														
 
															-                    else:    
														
 
															-                        os.system(f'chown -R {ARCHIVEBOX_USER} "{LIB_DIR.resolve()}"')
														
 
															-        except Exception as e:
														
 
															-            print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
														
 
															-            if binaries and len(binaries) == 1:
														
 
															-                # if we are only installing a single binary, raise the exception so the user can see what went wrong
														
 
															-                raise
														
 
															-                
														
 
															-
														
 
															-    from django.contrib.auth import get_user_model
														
 
															-    User = get_user_model()
														
 
															-
														
 
															-    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
														
 
															-        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
														
 
															-        stderr('    archivebox manage createsuperuser')
														
 
															-        # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
														
 
															-    
														
 
															-    print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
														
 
															-    
														
 
															-    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
														
 
															-    
														
 
															-    extra_args = []
														
 
															-    if binproviders:
														
 
															-        extra_args.append(f'--binproviders={",".join(binproviders)}')
														
 
															-    if binaries:
														
 
															-        extra_args.append(f'--binaries={",".join(binaries)}')
														
 
															-    
														
 
															-    proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
														
 
															-    raise SystemExit(proc.returncode)
														
 
															-
														
 
															-
														
 
															-# backwards-compatibility:
														
 
															-setup = install
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def config(config_options_str: Optional[str]=None,
														
 
															-           config_options: Optional[List[str]]=None,
														
 
															-           get: bool=False,
														
 
															-           set: bool=False,
														
 
															-           search: bool=False,
														
 
															-           reset: bool=False,
														
 
															-           out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Get and set your ArchiveBox project configuration values"""
														
 
															-
														
 
															-    from rich import print
														
 
															-
														
 
															-    check_data_folder()
														
 
															-    if config_options and config_options_str:
														
 
															-        stderr(
														
 
															-            '[X] You should either pass config values as an arguments '
														
 
															-            'or via stdin, but not both.\n',
														
 
															-            color='red',
														
 
															-        )
														
 
															-        raise SystemExit(2)
														
 
															-    elif config_options_str:
														
 
															-        config_options = config_options_str.split('\n')
														
 
															-
														
 
															-    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
														
 
															-    CONFIGS = archivebox.pm.hook.get_CONFIGS()
														
 
															-    
														
 
															-    config_options = config_options or []
														
 
															-
														
 
															-    no_args = not (get or set or reset or config_options)
														
 
															-
														
 
															-    matching_config = {}
														
 
															-    if search:
														
 
															-        if config_options:
														
 
															-            config_options = [get_real_name(key) for key in config_options]
														
 
															-            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
														
 
															-            for config_section in CONFIGS.values():
														
 
															-                aliases = config_section.aliases
														
 
															-                
														
 
															-                for search_key in config_options:
														
 
															-                    # search all aliases in the section
														
 
															-                    for alias_key, key in aliases.items():
														
 
															-                        if search_key.lower() in alias_key.lower():
														
 
															-                            matching_config[key] = config_section.model_dump()[key]
														
 
															-                    
														
 
															-                    # search all keys and values in the section
														
 
															-                    for existing_key, value in config_section.model_dump().items():
														
 
															-                        if search_key.lower() in existing_key.lower() or search_key.lower() in str(value).lower():
														
 
															-                            matching_config[existing_key] = value
														
 
															-            
														
 
															-        print(printable_config(matching_config))
														
 
															-        raise SystemExit(not matching_config)
														
 
															-    elif get or no_args:
														
 
															-        if config_options:
														
 
															-            config_options = [get_real_name(key) for key in config_options]
														
 
															-            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
														
 
															-            failed_config = [key for key in config_options if key not in FLAT_CONFIG]
														
 
															-            if failed_config:
														
 
															-                stderr()
														
 
															-                stderr('[X] These options failed to get', color='red')
														
 
															-                stderr('    {}'.format('\n    '.join(config_options)))
														
 
															-                raise SystemExit(1)
														
 
															-        else:
														
 
															-            matching_config = FLAT_CONFIG
														
 
															-        
														
 
															-        print(printable_config(matching_config))
														
 
															-        raise SystemExit(not matching_config)
														
 
															-    elif set:
														
 
															-        new_config = {}
														
 
															-        failed_options = []
														
 
															-        for line in config_options:
														
 
															-            if line.startswith('#') or not line.strip():
														
 
															-                continue
														
 
															-            if '=' not in line:
														
 
															-                stderr('[X] Config KEY=VALUE must have an = sign in it', color='red')
														
 
															-                stderr(f'    {line}')
														
 
															-                raise SystemExit(2)
														
 
															-
														
 
															-            raw_key, val = line.split('=', 1)
														
 
															-            raw_key = raw_key.upper().strip()
														
 
															-            key = get_real_name(raw_key)
														
 
															-            if key != raw_key:
														
 
															-                stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
														
 
															-
														
 
															-            if key in FLAT_CONFIG:
														
 
															-                new_config[key] = val.strip()
														
 
															-            else:
														
 
															-                failed_options.append(line)
														
 
															-
														
 
															-        if new_config:
														
 
															-            before = FLAT_CONFIG
														
 
															-            matching_config = write_config_file(new_config)
														
 
															-            after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
														
 
															-            print(printable_config(matching_config))
														
 
															-
														
 
															-            side_effect_changes = {}
														
 
															-            for key, val in after.items():
														
 
															-                if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
														
 
															-                    side_effect_changes[key] = after[key]
														
 
															-                    # import ipdb; ipdb.set_trace()
														
 
															-
														
 
															-            if side_effect_changes:
														
 
															-                stderr()
														
 
															-                stderr('[i] Note: This change also affected these other options that depended on it:', color='lightyellow')
														
 
															-                print('    {}'.format(printable_config(side_effect_changes, prefix='    ')))
														
 
															-        if failed_options:
														
 
															-            stderr()
														
 
															-            stderr('[X] These options failed to set (check for typos):', color='red')
														
 
															-            stderr('    {}'.format('\n    '.join(failed_options)))
														
 
															-            raise SystemExit(1)
														
 
															-    elif reset:
														
 
															-        stderr('[X] This command is not implemented yet.', color='red')
														
 
															-        stderr('    Please manually remove the relevant lines from your config file:')
														
 
															-        raise SystemExit(2)
														
 
															-    else:
														
 
															-        stderr('[X] You must pass either --get or --set, or no arguments to get the whole config.', color='red')
														
 
															-        stderr('    archivebox config')
														
 
															-        stderr('    archivebox config --get SOME_KEY')
														
 
															-        stderr('    archivebox config --set SOME_KEY=SOME_VALUE')
														
 
															-        raise SystemExit(2)
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def schedule(add: bool=False,
														
 
															-             show: bool=False,
														
 
															-             clear: bool=False,
														
 
															-             foreground: bool=False,
														
 
															-             run_all: bool=False,
														
 
															-             quiet: bool=False,
														
 
															-             every: Optional[str]=None,
														
 
															-             tag: str='',
														
 
															-             depth: int=0,
														
 
															-             overwrite: bool=False,
														
 
															-             update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
														
 
															-             import_path: Optional[str]=None,
														
 
															-             out_dir: Path=DATA_DIR):
														
 
															-    """Set ArchiveBox to regularly import URLs at specific times using cron"""
														
 
															-    
														
 
															-    check_data_folder()
														
 
															-    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
														
 
															-    from archivebox.config.permissions import USER
														
 
															-
														
 
															-    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
														
 
															-
														
 
															-    cron = CronTab(user=True)
														
 
															-    cron = dedupe_cron_jobs(cron)
														
 
															-
														
 
															-    if clear:
														
 
															-        print(cron.remove_all(comment=CRON_COMMENT))
														
 
															-        cron.write()
														
 
															-        raise SystemExit(0)
														
 
															-
														
 
															-    existing_jobs = list(cron.find_comment(CRON_COMMENT))
														
 
															-
														
 
															-    if every or add:
														
 
															-        every = every or 'day'
														
 
															-        quoted = lambda s: f'"{s}"' if (s and ' ' in str(s)) else str(s)
														
 
															-        cmd = [
														
 
															-            'cd',
														
 
															-            quoted(out_dir),
														
 
															-            '&&',
														
 
															-            quoted(ARCHIVEBOX_BINARY.load().abspath),
														
 
															-            *([
														
 
															-                'add',
														
 
															-                *(['--overwrite'] if overwrite else []),
														
 
															-                *(['--update'] if update else []),
														
 
															-                *([f'--tag={tag}'] if tag else []),
														
 
															-                f'--depth={depth}',
														
 
															-                f'"{import_path}"',
														
 
															-            ] if import_path else ['update']),
														
 
															-            '>>',
														
 
															-            quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
														
 
															-            '2>&1',
														
 
															-
														
 
															-        ]
														
 
															-        new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
														
 
															-
														
 
															-        if every in ('minute', 'hour', 'day', 'month', 'year'):
														
 
															-            set_every = getattr(new_job.every(), every)
														
 
															-            set_every()
														
 
															-        elif CronSlices.is_valid(every):
														
 
															-            new_job.setall(every)
														
 
															-        else:
														
 
															-            stderr('{red}[X] Got invalid timeperiod for cron task.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-            stderr('    It must be one of minute/hour/day/month')
														
 
															-            stderr('    or a quoted cron-format schedule like:')
														
 
															-            stderr('        archivebox init --every=day --depth=1 https://example.com/some/rss/feed.xml')
														
 
															-            stderr('        archivebox init --every="0/5 * * * *" --depth=1 https://example.com/some/rss/feed.xml')
														
 
															-            raise SystemExit(1)
														
 
															-
														
 
															-        cron = dedupe_cron_jobs(cron)
														
 
															-        cron.write()
														
 
															-
														
 
															-        total_runs = sum(j.frequency_per_year() for j in cron)
														
 
															-        existing_jobs = list(cron.find_comment(CRON_COMMENT))
														
 
															-
														
 
															-        print()
														
 
															-        print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
														
 
															-        print('\n'.join(f'  > {cmd}' if str(cmd) == str(new_job) else f'    {cmd}' for cmd in existing_jobs))
														
 
															-        if total_runs > 60 and not quiet:
														
 
															-            stderr()
														
 
															-            stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **SHELL_CONFIG.ANSI))
														
 
															-            stderr('    Congrats on being an enthusiastic internet archiver! 👌')
														
 
															-            stderr()
														
 
															-            stderr('    Make sure you have enough storage space available to hold all the data.')
														
 
															-            stderr('    Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
														
 
															-            stderr('')
														
 
															-    elif show:
														
 
															-        if existing_jobs:
														
 
															-            print('\n'.join(str(cmd) for cmd in existing_jobs))
														
 
															-        else:
														
 
															-            stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **SHELL_CONFIG.ANSI))
														
 
															-            stderr('    To schedule a new job, run:')
														
 
															-            stderr('        archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
														
 
															-        raise SystemExit(0)
														
 
															-
														
 
															-    cron = CronTab(user=True)
														
 
															-    cron = dedupe_cron_jobs(cron)
														
 
															-    existing_jobs = list(cron.find_comment(CRON_COMMENT))
														
 
															-
														
 
															-    if foreground or run_all:
														
 
															-        if not existing_jobs:
														
 
															-            stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-            stderr('    archivebox schedule --every=hour --depth=1 https://example.com/some/rss/feed.xml')
														
 
															-            raise SystemExit(1)
														
 
															-
														
 
															-        print('{green}[*] Running {} ArchiveBox jobs in foreground task scheduler...{reset}'.format(len(existing_jobs), **SHELL_CONFIG.ANSI))
														
 
															-        if run_all:
														
 
															-            try:
														
 
															-                for job in existing_jobs:
														
 
															-                    sys.stdout.write(f'  > {job.command.split("/archivebox ")[0].split(" && ")[0]}\n')
														
 
															-                    sys.stdout.write(f'    > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
														
 
															-                    sys.stdout.flush()
														
 
															-                    job.run()
														
 
															-                    sys.stdout.write(f'\r    √ {job.command.split("/archivebox ")[-1]}\n')
														
 
															-            except KeyboardInterrupt:
														
 
															-                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-                raise SystemExit(1)
														
 
															-
														
 
															-        if foreground:
														
 
															-            try:
														
 
															-                for job in existing_jobs:
														
 
															-                    print(f'  > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
														
 
															-                for result in cron.run_scheduler():
														
 
															-                    print(result)
														
 
															-            except KeyboardInterrupt:
														
 
															-                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
														
 
															-                raise SystemExit(1)
														
 
															-
														
 
															-    # if CAN_UPGRADE:
														
 
															-    #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
														
 
															-
														
 
															-    
														
 
															-@enforce_types
														
 
															-def server(runserver_args: Optional[List[str]]=None,
														
 
															-           reload: bool=False,
														
 
															-           debug: bool=False,
														
 
															-           init: bool=False,
														
 
															-           quick_init: bool=False,
														
 
															-           createsuperuser: bool=False,
														
 
															-           daemonize: bool=False,
														
 
															-           out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Run the ArchiveBox HTTP server"""
														
 
															-
														
 
															-    from rich import print
														
 
															-
														
 
															-    runserver_args = runserver_args or []
														
 
															-    
														
 
															-    if init:
														
 
															-        run_subcommand('init', stdin=None, pwd=out_dir)
														
 
															-        print()
														
 
															-    elif quick_init:
														
 
															-        run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
														
 
															-        print()
														
 
															-
														
 
															-    if createsuperuser:
														
 
															-        run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
														
 
															-        print()
														
 
															-
														
 
															-
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    from django.core.management import call_command
														
 
															-    from django.contrib.auth.models import User
														
 
															-    
														
 
															-    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
														
 
															-        print()
														
 
															-        # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
														
 
															-        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
														
 
															-        print('      [green]archivebox manage createsuperuser[/green]')
														
 
															-        print()
														
 
															-    
														
 
															-
														
 
															-    host = '127.0.0.1'
														
 
															-    port = '8000'
														
 
															-    
														
 
															-    try:
														
 
															-        host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
														
 
															-        if ':' in host_and_port:
														
 
															-            host, port = host_and_port.split(':')
														
 
															-        else:
														
 
															-            if '.' in host_and_port:
														
 
															-                host = host_and_port
														
 
															-            else:
														
 
															-                port = host_and_port
														
 
															-    except IndexError:
														
 
															-        pass
														
 
															-
														
 
															-    print('[green][+] Starting ArchiveBox webserver...[/green]')
														
 
															-    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
														
 
															-    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
														
 
															-    print('    > Writing ArchiveBox error log to ./logs/errors.log')
														
 
															-
														
 
															-    if SHELL_CONFIG.DEBUG:
														
 
															-        if not reload:
														
 
															-            runserver_args.append('--noreload')  # '--insecure'
														
 
															-        call_command("runserver", *runserver_args)
														
 
															-    else:
														
 
															-        from workers.supervisor_util import start_server_workers
														
 
															-
														
 
															-        print()
														
 
															-        start_server_workers(host=host, port=port, daemonize=False)
														
 
															-        print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Run an ArchiveBox Django management command"""
														
 
															-
														
 
															-    check_data_folder()
														
 
															-    from django.core.management import execute_from_command_line
														
 
															-
														
 
															-    if (args and "createsuperuser" in args) and (IN_DOCKER and not SHELL_CONFIG.IS_TTY):
														
 
															-        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
														
 
															-        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
														
 
															-        stderr('')
														
 
															-        
														
 
															-    # import ipdb; ipdb.set_trace()
														
 
															-
														
 
															-    execute_from_command_line(['manage.py', *(args or ['help'])])
														
 
															-
														
 
															-
														
 
															-@enforce_types
														
 
															-def shell(out_dir: Path=DATA_DIR) -> None:
														
 
															-    """Enter an interactive ArchiveBox Django shell"""
														
 
															-
														
 
															-    check_data_folder()
														
 
															-
														
 
															-    from django.core.management import call_command
														
 
															-    call_command("shell_plus")
														
 
															-
														
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -24,7 +24,7 @@ def check_data_folder() -> None:
 
															     from archivebox.config import CONSTANTS
														
 
															     from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
														
 
															-    archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
														
 
															+    archive_dir_exists = os.path.isdir(ARCHIVE_DIR)
														
 
															     if not archive_dir_exists:
														
 
															         print('[red][X] No archivebox index found in the current directory.[/red]', file=sys.stderr)
														
 
															         print(f'    {DATA_DIR}', file=sys.stderr)
														
--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@@ -12,7 +12,7 @@ from pathlib import Path
 
															 from datetime import datetime, timezone
														
 
															 from dataclasses import dataclass
														
 
															-from typing import Any, Optional, List, Dict, Union, IO, TYPE_CHECKING
														
 
															+from typing import Any, Optional, List, Dict, Union, Iterable, IO, TYPE_CHECKING
														
 
															 if TYPE_CHECKING:
														
 
															     from ..index.schema import Link, ArchiveResult
														
@@ -228,7 +228,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
 
															         print()
														
 
															-def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str='.'):
														
 
															+def log_cli_command(subcommand: str, subcommand_args: Iterable[str]=(), stdin: str | IO | None=None, pwd: str='.'):
														
 
															     args = ' '.join(subcommand_args)
														
 
															     version_msg = '[dark_magenta]\\[{now}][/dark_magenta] [dark_red]ArchiveBox[/dark_red] [dark_goldenrod]v{VERSION}[/dark_goldenrod]: [green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format(
														
 
															         now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
														
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@@ -20,11 +20,9 @@ from datetime import datetime, timedelta   # noqa
 
															 from django.conf import settings           # noqa
														
 
															 from archivebox import CONSTANTS           # noqa
														
 
															-from ..main import *                       # noqa
														
 
															-from ..cli import CLI_SUBCOMMANDS
														
 
															+from archivebox.cli import *               # noqa
														
 
															 CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
														
 
															-CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
														
 
															 if __name__ == '__main__':
														
 
															     # load the rich extension for ipython for pretty printing
														
@@ -40,7 +38,7 @@ if __name__ == '__main__':
 
															     prnt('[green]import re, os, sys, psutil, subprocess, reqiests, json, pydantic, benedict, django, abx[/]')
														
 
															     prnt('[yellow4]# ArchiveBox Imports[/]')
														
 
															     prnt('[yellow4]import archivebox[/]')
														
 
															-    prnt('[yellow4]from archivebox.main import {}[/]'.format(CLI_COMMAND_NAMES))
														
 
															+    prnt('[yellow4]from archivebox.cli import *[/]')
														
 
															     prnt()
														
 
															     if console.width >= 80:
														
--- a/archivebox/pkgs/abx/abx.py
+++ b/archivebox/pkgs/abx/abx.py
@@ -459,8 +459,8 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
 
															     PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order'])
														
 
															     for plugin_info in PLUGINS_TO_LOAD:
														
 
															-        if '--version' not in sys.argv and '--help' not in sys.argv:
														
 
															-            print(f'🧩 Loading plugin: {plugin_info["id"]}...', end='\r', flush=True, file=sys.stderr)
														
 
															+        # if '--version' not in sys.argv and '--help' not in sys.argv:
														
 
															+        #     print(f'🧩 Loading plugin: {plugin_info["id"]}...', end='\r', flush=True, file=sys.stderr)
														
 
															         pm.register(plugin_info['module'])
														
 
															         LOADED_PLUGINS[plugin_info['id']] = plugin_info
														
 
															     # print('\x1b[2K', end='\r', flush=True, file=sys.stderr)
														
--- a/archivebox/workers/semaphores.py
+++ b/archivebox/workers/semaphores.py
@@ -1,103 +1,103 @@
 
															-import uuid
														
 
															-from functools import wraps
														
 
															-from django.db import connection, transaction
														
 
															-from django.utils import timezone
														
 
															-from huey.exceptions import TaskLockedException
														
 
															-
														
 
															-from archivebox.config import CONSTANTS
														
 
															-
														
 
															-class SqliteSemaphore:
														
 
															-    def __init__(self, db_path, table_name, name, value=1, timeout=None):
														
 
															-        self.db_path = db_path
														
 
															-        self.table_name = table_name
														
 
															-        self.name = name
														
 
															-        self.value = value
														
 
															-        self.timeout = timeout or 86400  # Set a max age for lock holders
														
 
															-
														
 
															-        # Ensure the table exists
														
 
															-        with connection.cursor() as cursor:
														
 
															-            cursor.execute(f"""
														
 
															-                CREATE TABLE IF NOT EXISTS {self.table_name} (
														
 
															-                    id TEXT PRIMARY KEY,
														
 
															-                    name TEXT,
														
 
															-                    timestamp DATETIME
														
 
															-                )
														
 
															-            """)
														
 
															-
														
 
															-    def acquire(self, name=None):
														
 
															-        name = name or str(uuid.uuid4())
														
 
															-        now = timezone.now()
														
 
															-        expiration = now - timezone.timedelta(seconds=self.timeout)
														
 
															-
														
 
															-        with transaction.atomic():
														
 
															-            # Remove expired locks
														
 
															-            with connection.cursor() as cursor:
														
 
															-                cursor.execute(f"""
														
 
															-                    DELETE FROM {self.table_name}
														
 
															-                    WHERE name = %s AND timestamp < %s
														
 
															-                """, [self.name, expiration])
														
 
															-
														
 
															-            # Try to acquire the lock
														
 
															-            with connection.cursor() as cursor:
														
 
															-                cursor.execute(f"""
														
 
															-                    INSERT INTO {self.table_name} (id, name, timestamp)
														
 
															-                    SELECT %s, %s, %s
														
 
															-                    WHERE (
														
 
															-                        SELECT COUNT(*) FROM {self.table_name}
														
 
															-                        WHERE name = %s
														
 
															-                    ) < %s
														
 
															-                """, [name, self.name, now, self.name, self.value])
														
 
															-
														
 
															-                if cursor.rowcount > 0:
														
 
															-                    return name
														
 
															-
														
 
															-        # If we couldn't acquire the lock, remove our attempted entry
														
 
															-        with connection.cursor() as cursor:
														
 
															-            cursor.execute(f"""
														
 
															-                DELETE FROM {self.table_name}
														
 
															-                WHERE id = %s AND name = %s
														
 
															-            """, [name, self.name])
														
 
															-
														
 
															-        return None
														
 
															-
														
 
															-    def release(self, name):
														
 
															-        with connection.cursor() as cursor:
														
 
															-            cursor.execute(f"""
														
 
															-                DELETE FROM {self.table_name}
														
 
															-                WHERE id = %s AND name = %s
														
 
															-            """, [name, self.name])
														
 
															-        return cursor.rowcount > 0
														
 
															-
														
 
															-
														
 
															-LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
														
 
															-
														
 
															-
														
 
															-def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
														
 
															-    """
														
 
															-    Lock which can be acquired multiple times (default = 1).
														
 
															-
														
 
															-    NOTE: no provisions are made for blocking, waiting, or notifying. This is
														
 
															-    just a lock which can be acquired a configurable number of times.
														
 
															-
														
 
															-    Example:
														
 
															-
														
 
															-    # Allow up to 3 workers to run this task concurrently. If the task is
														
 
															-    # locked, retry up to 2 times with a delay of 60s.
														
 
															-    @huey.task(retries=2, retry_delay=60)
														
 
															-    @lock_task_semaphore('path/to/db.sqlite3', 'semaphore_locks', 'my-lock', 3)
														
 
															-    def my_task():
														
 
															-        ...
														
 
															-    """
														
 
															-    sem = SqliteSemaphore(db_path, table_name, lock_name, value, timeout)
														
 
															-    def decorator(fn):
														
 
															-        @wraps(fn)
														
 
															-        def inner(*args, **kwargs):
														
 
															-            tid = sem.acquire()
														
 
															-            if tid is None:
														
 
															-                raise TaskLockedException(f'unable to acquire lock {lock_name}')
														
 
															-            try:
														
 
															-                return fn(*args, **kwargs)
														
 
															-            finally:
														
 
															-                sem.release(tid)
														
 
															-        return inner
														
 
															-    return decorator
														
 
															+# import uuid
														
 
															+# from functools import wraps
														
 
															+# from django.db import connection, transaction
														
 
															+# from django.utils import timezone
														
 
															+# from huey.exceptions import TaskLockedException
														
 
															+
														
 
															+# from archivebox.config import CONSTANTS
														
 
															+
														
 
															+# class SqliteSemaphore:
														
 
															+#     def __init__(self, db_path, table_name, name, value=1, timeout=None):
														
 
															+#         self.db_path = db_path
														
 
															+#         self.table_name = table_name
														
 
															+#         self.name = name
														
 
															+#         self.value = value
														
 
															+#         self.timeout = timeout or 86400  # Set a max age for lock holders
														
 
															+
														
 
															+#         # Ensure the table exists
														
 
															+#         with connection.cursor() as cursor:
														
 
															+#             cursor.execute(f"""
														
 
															+#                 CREATE TABLE IF NOT EXISTS {self.table_name} (
														
 
															+#                     id TEXT PRIMARY KEY,
														
 
															+#                     name TEXT,
														
 
															+#                     timestamp DATETIME
														
 
															+#                 )
														
 
															+#             """)
														
 
															+
														
 
															+#     def acquire(self, name=None):
														
 
															+#         name = name or str(uuid.uuid4())
														
 
															+#         now = timezone.now()
														
 
															+#         expiration = now - timezone.timedelta(seconds=self.timeout)
														
 
															+
														
 
															+#         with transaction.atomic():
														
 
															+#             # Remove expired locks
														
 
															+#             with connection.cursor() as cursor:
														
 
															+#                 cursor.execute(f"""
														
 
															+#                     DELETE FROM {self.table_name}
														
 
															+#                     WHERE name = %s AND timestamp < %s
														
 
															+#                 """, [self.name, expiration])
														
 
															+
														
 
															+#             # Try to acquire the lock
														
 
															+#             with connection.cursor() as cursor:
														
 
															+#                 cursor.execute(f"""
														
 
															+#                     INSERT INTO {self.table_name} (id, name, timestamp)
														
 
															+#                     SELECT %s, %s, %s
														
 
															+#                     WHERE (
														
 
															+#                         SELECT COUNT(*) FROM {self.table_name}
														
 
															+#                         WHERE name = %s
														
 
															+#                     ) < %s
														
 
															+#                 """, [name, self.name, now, self.name, self.value])
														
 
															+
														
 
															+#                 if cursor.rowcount > 0:
														
 
															+#                     return name
														
 
															+
														
 
															+#         # If we couldn't acquire the lock, remove our attempted entry
														
 
															+#         with connection.cursor() as cursor:
														
 
															+#             cursor.execute(f"""
														
 
															+#                 DELETE FROM {self.table_name}
														
 
															+#                 WHERE id = %s AND name = %s
														
 
															+#             """, [name, self.name])
														
 
															+
														
 
															+#         return None
														
 
															+
														
 
															+#     def release(self, name):
														
 
															+#         with connection.cursor() as cursor:
														
 
															+#             cursor.execute(f"""
														
 
															+#                 DELETE FROM {self.table_name}
														
 
															+#                 WHERE id = %s AND name = %s
														
 
															+#             """, [name, self.name])
														
 
															+#         return cursor.rowcount > 0
														
 
															+
														
 
															+
														
 
															+# LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
														
 
															+
														
 
															+
														
 
															+# def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
														
 
															+#     """
														
 
															+#     Lock which can be acquired multiple times (default = 1).
														
 
															+
														
 
															+#     NOTE: no provisions are made for blocking, waiting, or notifying. This is
														
 
															+#     just a lock which can be acquired a configurable number of times.
														
 
															+
														
 
															+#     Example:
														
 
															+
														
 
															+#     # Allow up to 3 workers to run this task concurrently. If the task is
														
 
															+#     # locked, retry up to 2 times with a delay of 60s.
														
 
															+#     @huey.task(retries=2, retry_delay=60)
														
 
															+#     @lock_task_semaphore('path/to/db.sqlite3', 'semaphore_locks', 'my-lock', 3)
														
 
															+#     def my_task():
														
 
															+#         ...
														
 
															+#     """
														
 
															+#     sem = SqliteSemaphore(db_path, table_name, lock_name, value, timeout)
														
 
															+#     def decorator(fn):
														
 
															+#         @wraps(fn)
														
 
															+#         def inner(*args, **kwargs):
														
 
															+#             tid = sem.acquire()
														
 
															+#             if tid is None:
														
 
															+#                 raise TaskLockedException(f'unable to acquire lock {lock_name}')
														
 
															+#             try:
														
 
															+#                 return fn(*args, **kwargs)
														
 
															+#             finally:
														
 
															+#                 sem.release(tid)
														
 
															+#         return inner
														
 
															+#     return decorator
														
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
--- a/archivebox/workers/tasks.py
+++ b/archivebox/workers/tasks.py
@@ -8,7 +8,7 @@ from django_huey import db_task, task
 
															 from huey_monitor.models import TaskModel
														
 
															 from huey_monitor.tqdm import ProcessInfo
														
 
															-from .supervisor_util import get_or_create_supervisord_process
														
 
															+from .supervisord_util import get_or_create_supervisord_process
														
 
															 # @db_task(queue="commands", context=True, schedule=1)
														
 
															 # def scheduler_tick():
														
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,6 +115,8 @@ dependencies = [
 
															     "abx-plugin-mercury>=2024.10.28",
														
 
															     "abx-plugin-htmltotext>=2024.10.28",
														
 
															     "python-statemachine>=2.3.6",
														
 
															+    "click>=8.1.7",
														
 
															+    "rich-click>=1.8.4",
														
 
															 ]
														
 
															 [project.optional-dependencies]
														
--- a/uv.lock
+++ b/uv.lock
@@ -658,6 +658,7 @@ dependencies = [
 
															     { name = "atomicwrites", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "base32-crockford", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "channels", extra = ["daphne"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "croniter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "dateparser", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
@@ -688,6 +689,7 @@ dependencies = [
 
															     { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "rich-argparse", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															+    { name = "rich-click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "sonic-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															     { name = "supervisor", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
@@ -784,6 +786,7 @@ requires-dist = [
 
															     { name = "atomicwrites", specifier = "==1.4.1" },
														
 
															     { name = "base32-crockford", specifier = "==0.3.0" },
														
 
															     { name = "channels", extras = ["daphne"], specifier = ">=4.1.0" },
														
 
															+    { name = "click", specifier = ">=8.1.7" },
														
 
															     { name = "croniter", specifier = ">=3.0.3" },
														
 
															     { name = "dateparser", specifier = ">=1.2.0" },
														
 
															     { name = "django", specifier = ">=5.1.1,<6.0" },
														
@@ -821,6 +824,7 @@ requires-dist = [
 
															     { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" },
														
 
															     { name = "rich", specifier = ">=13.8.0" },
														
 
															     { name = "rich-argparse", specifier = ">=1.5.2" },
														
 
															+    { name = "rich-click", specifier = ">=1.8.4" },
														
 
															     { name = "setuptools", specifier = ">=74.1.0" },
														
 
															     { name = "sonic-client", specifier = ">=1.0.0" },
														
 
															     { name = "supervisor", specifier = ">=4.2.5" },
														
@@ -2806,6 +2810,20 @@ wheels = [
 
															     { url = "https://files.pythonhosted.org/packages/25/45/54b95bb72bb17c27a7252bee5034955020b5869a33918b660ffc29cbf608/rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7", size = 20072 },
														
 
															 ]
														
 
															+[[package]]
														
 
															+name = "rich-click"
														
 
															+version = "1.8.4"
														
 
															+source = { registry = "https://pypi.org/simple" }
														
 
															+dependencies = [
														
 
															+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
														
 
															+]
														
 
															+sdist = { url = "https://files.pythonhosted.org/packages/fc/f4/e48dc2850662526a26fb0961aacb0162c6feab934312b109b748ae4efee2/rich_click-1.8.4.tar.gz", hash = "sha256:0f49471f04439269d0e66a6f43120f52d11d594869a2a0be600cfb12eb0616b9", size = 38247 }
														
 
															+wheels = [
														
 
															+    { url = "https://files.pythonhosted.org/packages/84/f3/72f93d8494ee641bde76bfe1208cf4abc44c6f9448673762f6077bc162d6/rich_click-1.8.4-py3-none-any.whl", hash = "sha256:2d2841b3cebe610d5682baa1194beaf78ab00c4fa31931533261b5eba2ee80b7", size = 35071 },
														
 
															+]
														
 
															+
														
 
															 [[package]]
														
 
															 name = "ruff"
														
 
															 version = "0.7.4"