2 months ago · 866f993f26
--- a/archivebox/ArchiveBox.conf
+++ b/archivebox/ArchiveBox.conf
@@ -0,0 +1,3 @@
 
				+[SERVER_CONFIG]
			
 
				+SECRET_KEY = amuxg7v5e2l_6jrktp_f3kszlpx4ieqk4rtwda5q6nfiavits4
			
 
				+
			
--- a/archivebox/api/admin.py
+++ b/archivebox/api/admin.py
@@ -13,7 +13,21 @@ class APITokenAdmin(BaseModelAdmin):
 
				     sort_fields = ('id', 'created_at', 'created_by', 'expires')
			
 
				     readonly_fields = ('created_at', 'modified_at')
			
 
				     search_fields = ('id', 'created_by__username', 'token')
			
 
				-    fields = ('created_by', 'token', 'expires', *readonly_fields)
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Token', {
			
 
				+            'fields': ('token', 'expires'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Owner', {
			
 
				+            'fields': ('created_by',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('created_by',)
			
 
				     ordering = ['-created_at']
			
@@ -25,6 +39,29 @@ class CustomWebhookAdmin(WebhookAdmin, BaseModelAdmin):
 
				     sort_fields = ('created_at', 'created_by', 'id', 'referenced_model', 'endpoint', 'last_success', 'last_error')
			
 
				     readonly_fields = ('created_at', 'modified_at', *WebhookAdmin.readonly_fields)
			
 
				 
			
 
				+    fieldsets = (
			
 
				+        ('Webhook', {
			
 
				+            'fields': ('name', 'signal', 'referenced_model', 'endpoint'),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Authentication', {
			
 
				+            'fields': ('auth_token',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Status', {
			
 
				+            'fields': ('enabled', 'last_success', 'last_error'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Owner', {
			
 
				+            'fields': ('created_by',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				+
			
 
				 
			
 
				 def register_admin(admin_site):
			
 
				     admin_site.register(APIToken, APITokenAdmin)
			
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -115,12 +115,10 @@ def add(urls: str | list[str],
 
				     #    - Repeat until max_depth reached
			
 
				 
			
 
				     if bg:
			
 
				-        # Background mode: start orchestrator and return immediately
			
 
				-        print('[yellow]\\[*] Running in background mode - starting orchestrator...[/yellow]')
			
 
				-        orchestrator = Orchestrator(exit_on_idle=True)
			
 
				-        orchestrator.start()  # Fork to background
			
 
				+        # Background mode: just queue work and return (orchestrator via server will pick it up)
			
 
				+        print('[yellow]\\[*] URLs queued. Orchestrator will process them (run `archivebox server` if not already running).[/yellow]')
			
 
				     else:
			
 
				-        # Foreground mode: run orchestrator until all work is done
			
 
				+        # Foreground mode: run orchestrator inline until all work is done
			
 
				         print(f'[green]\\[*] Starting orchestrator to process crawl...[/green]')
			
 
				         orchestrator = Orchestrator(exit_on_idle=True)
			
 
				         orchestrator.runloop()  # Block until complete
			
--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -117,11 +117,11 @@ def run_plugins(
 
				             if snapshot_id:
			
 
				                 snapshot_ids.add(snapshot_id)
			
 
				             elif record.get('url'):
			
 
				-                # Look up by URL
			
 
				-                try:
			
 
				-                    snap = Snapshot.objects.get(url=record['url'])
			
 
				+                # Look up by URL (get most recent if multiple exist)
			
 
				+                snap = Snapshot.objects.filter(url=record['url']).order_by('-created_at').first()
			
 
				+                if snap:
			
 
				                     snapshot_ids.add(str(snap.id))
			
 
				-                except Snapshot.DoesNotExist:
			
 
				+                else:
			
 
				                     rprint(f'[yellow]Snapshot not found for URL: {record["url"]}[/yellow]', file=sys.stderr)
			
 
				 
			
 
				         elif record_type == TYPE_ARCHIVERESULT:
			
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@@ -49,20 +49,45 @@ def install(dry_run: bool=False) -> None:
 
				     # Using a minimal crawl that will trigger on_Crawl hooks
			
 
				     created_by_id = get_or_create_system_user_pk()
			
 
				 
			
 
				-    seed = Seed.objects.create(
			
 
				+    seed, _created = Seed.objects.get_or_create(
			
 
				         uri='archivebox://install',
			
 
				         label='Dependency detection',
			
 
				         created_by_id=created_by_id,
			
 
				+        defaults={
			
 
				+            'extractor': 'auto',
			
 
				+        }
			
 
				     )
			
 
				 
			
 
				-    crawl = Crawl.objects.create(
			
 
				+    crawl, created = Crawl.objects.get_or_create(
			
 
				         seed=seed,
			
 
				         max_depth=0,
			
 
				         created_by_id=created_by_id,
			
 
				-        status='queued',
			
 
				+        defaults={
			
 
				+            'status': 'queued',
			
 
				+        }
			
 
				     )
			
 
				 
			
 
				+    # If crawl already existed, reset it to queued state so it can be processed again
			
 
				+    if not created:
			
 
				+        crawl.status = 'queued'
			
 
				+        crawl.retry_at = timezone.now()
			
 
				+        crawl.save()
			
 
				+
			
 
				     print(f'[+] Created dependency detection crawl: {crawl.id}')
			
 
				+    print(f'[+] Crawl status: {crawl.status}, retry_at: {crawl.retry_at}')
			
 
				+
			
 
				+    # Verify the crawl is in the queue
			
 
				+    from crawls.models import Crawl as CrawlModel
			
 
				+    queued_crawls = CrawlModel.objects.filter(
			
 
				+        retry_at__lte=timezone.now()
			
 
				+    ).exclude(
			
 
				+        status__in=CrawlModel.FINAL_STATES
			
 
				+    )
			
 
				+    print(f'[+] Crawls in queue: {queued_crawls.count()}')
			
 
				+    if queued_crawls.exists():
			
 
				+        for c in queued_crawls:
			
 
				+            print(f'    - Crawl {c.id}: status={c.status}, retry_at={c.retry_at}')
			
 
				+
			
 
				     print('[+] Running crawl to detect binaries via on_Crawl hooks...')
			
 
				     print()
			
 
				 
			
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -56,20 +56,53 @@ def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
 
				     except IndexError:
			
 
				         pass
			
 
				 
			
 
				-    print('[green][+] Starting ArchiveBox webserver...[/green]')
			
 
				-    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
			
 
				-    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
			
 
				-    print('    > Writing ArchiveBox error log to ./logs/errors.log')
			
 
				-
			
 
				     if SHELL_CONFIG.DEBUG:
			
 
				+        print('[green][+] Starting ArchiveBox webserver in DEBUG mode...[/green]')
			
 
				+        print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
			
 
				+        print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
			
 
				+        print('    > Writing ArchiveBox error log to ./logs/errors.log')
			
 
				         if not reload:
			
 
				             runserver_args.append('--noreload')  # '--insecure'
			
 
				         if nothreading:
			
 
				             runserver_args.append('--nothreading')
			
 
				         call_command("runserver", *runserver_args)
			
 
				     else:
			
 
				-        from workers.supervisord_util import start_server_workers
			
 
				-
			
 
				+        from workers.supervisord_util import (
			
 
				+            get_existing_supervisord_process,
			
 
				+            get_worker,
			
 
				+            start_server_workers,
			
 
				+            tail_multiple_worker_logs,
			
 
				+        )
			
 
				+
			
 
				+        # Check if supervisord is already running
			
 
				+        supervisor = get_existing_supervisord_process()
			
 
				+        if supervisor:
			
 
				+            daphne_proc = get_worker(supervisor, 'worker_daphne')
			
 
				+
			
 
				+            # If daphne is already running, just tail logs
			
 
				+            if daphne_proc and daphne_proc.get('statename') == 'RUNNING':
			
 
				+                orchestrator_proc = get_worker(supervisor, 'worker_orchestrator')
			
 
				+                print('[yellow][!] ArchiveBox server is already running[/yellow]')
			
 
				+                print(f'    [green]√[/green] Web server (worker_daphne) is RUNNING on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
			
 
				+                if orchestrator_proc and orchestrator_proc.get('statename') == 'RUNNING':
			
 
				+                    print(f'    [green]√[/green] Background worker (worker_orchestrator) is RUNNING')
			
 
				+                print()
			
 
				+                print('[blue][i] Tailing worker logs (Ctrl+C to stop watching)...[/i][/blue]')
			
 
				+                print()
			
 
				+
			
 
				+                # Tail logs for both workers
			
 
				+                tail_multiple_worker_logs(
			
 
				+                    log_files=['logs/worker_daphne.log', 'logs/worker_orchestrator.log'],
			
 
				+                    follow=True,
			
 
				+                )
			
 
				+                return
			
 
				+            # Otherwise, daphne is not running - fall through to start it
			
 
				+
			
 
				+        # No existing workers found - start new ones
			
 
				+        print('[green][+] Starting ArchiveBox webserver...[/green]')
			
 
				+        print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
			
 
				+        print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
			
 
				+        print('    > Writing ArchiveBox error log to ./logs/errors.log')
			
 
				         print()
			
 
				         start_server_workers(host=host, port=port, daemonize=daemonize)
			
 
				         print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
			
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -119,12 +119,13 @@ def version(quiet: bool=False,
 
				     else:
			
 
				         for key in sorted(set(binary_config_keys)):
			
 
				             # Get the actual binary name/path from config value
			
 
				-            bin_value = config.get(key, '').strip()
			
 
				+            # Prioritize Machine.config overrides over base config
			
 
				+            bin_value = machine.config.get(key) or config.get(key, '').strip()
			
 
				             if not bin_value:
			
 
				                 continue
			
 
				 
			
 
				             # Check if it's a path (has slashes) or just a name
			
 
				-            is_path = '/' in bin_value
			
 
				+            is_path = '/' in str(bin_value)
			
 
				 
			
 
				             if is_path:
			
 
				                 # It's a full path - match against abspath
			
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -5,7 +5,6 @@ import sys
 
				 
			
 
				 from datetime import datetime, timezone
			
 
				 
			
 
				-from rich.progress import Progress
			
 
				 from rich.console import Console
			
 
				 
			
 
				 import django
			
@@ -27,16 +26,6 @@ STDERR = Console(stderr=True)
 
				 logging.CONSOLE = CONSOLE
			
 
				 
			
 
				 
			
 
				-INITIAL_STARTUP_PROGRESS = None
			
 
				-INITIAL_STARTUP_PROGRESS_TASK = 0
			
 
				-
			
 
				-def bump_startup_progress_bar(advance=1):
			
 
				-    global INITIAL_STARTUP_PROGRESS
			
 
				-    global INITIAL_STARTUP_PROGRESS_TASK
			
 
				-    if INITIAL_STARTUP_PROGRESS:
			
 
				-        INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=advance)   # type: ignore
			
 
				-
			
 
				-
			
 
				 def setup_django_minimal():
			
 
				     # sys.path.append(str(CONSTANTS.PACKAGE_DIR))
			
 
				     # os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
			
@@ -49,9 +38,7 @@ DJANGO_SET_UP = False
 
				 
			
 
				 def setup_django(check_db=False, in_memory_db=False) -> None:
			
 
				     from rich.panel import Panel
			
 
				-    
			
 
				-    global INITIAL_STARTUP_PROGRESS
			
 
				-    global INITIAL_STARTUP_PROGRESS_TASK
			
 
				+
			
 
				     global DJANGO_SET_UP
			
 
				 
			
 
				     if DJANGO_SET_UP:
			
@@ -59,118 +46,100 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
 
				         # TODO: figure out why CLI entrypoints with init_pending are running this twice sometimes
			
 
				         return
			
 
				 
			
 
				-    with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
			
 
				-        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=True)
			
 
				-        
			
 
				-        from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
			
 
				-    
			
 
				-        # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user
			
 
				-        if IS_ROOT and ARCHIVEBOX_USER != 0:
			
 
				-            with SudoPermission(uid=0):
			
 
				-                # running as root is a special case where it's ok to be a bit slower
			
 
				-                # make sure data dir is always owned by the correct user
			
 
				-                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
			
 
				-                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
			
 
				-
			
 
				-        bump_startup_progress_bar()
			
 
				-        try:
			
 
				-            from django.core.management import call_command
			
 
				-                
			
 
				-            bump_startup_progress_bar()
			
 
				-
			
 
				-            if in_memory_db:
			
 
				-                raise Exception('dont use this anymore')
			
 
				-            
			
 
				-                # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
			
 
				-                # in those cases we create a temporary in-memory db and run the migrations
			
 
				-                # immediately to get a usable in-memory-database at startup
			
 
				-                os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
			
 
				+    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
			
 
				+
			
 
				+    # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user
			
 
				+    if IS_ROOT and ARCHIVEBOX_USER != 0:
			
 
				+        with SudoPermission(uid=0):
			
 
				+            # running as root is a special case where it's ok to be a bit slower
			
 
				+            # make sure data dir is always owned by the correct user
			
 
				+            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
			
 
				+            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
			
 
				+
			
 
				+    try:
			
 
				+        from django.core.management import call_command
			
 
				+
			
 
				+        if in_memory_db:
			
 
				+            raise Exception('dont use this anymore')
			
 
				+
			
 
				+            # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
			
 
				+            # in those cases we create a temporary in-memory db and run the migrations
			
 
				+            # immediately to get a usable in-memory-database at startup
			
 
				+            os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
			
 
				+            django.setup()
			
 
				+
			
 
				+            call_command("migrate", interactive=False, verbosity=0)
			
 
				+        else:
			
 
				+            # Otherwise use default sqlite3 file-based database and initialize django
			
 
				+            # without running migrations automatically (user runs them manually by calling init)
			
 
				+            try:
			
 
				                 django.setup()
			
 
				-                
			
 
				-                bump_startup_progress_bar()
			
 
				-                call_command("migrate", interactive=False, verbosity=0)
			
 
				-            else:
			
 
				-                # Otherwise use default sqlite3 file-based database and initialize django
			
 
				-                # without running migrations automatically (user runs them manually by calling init)
			
 
				-                try:
			
 
				-                    django.setup()
			
 
				-                except Exception as e:
			
 
				-                    bump_startup_progress_bar(advance=1000)
			
 
				-                    
			
 
				-                    is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
			
 
				-                    if not is_using_meta_cmd:
			
 
				-                        # show error message to user only if they're not running a meta command / just trying to get help
			
 
				-                        STDERR.print()
			
 
				-                        STDERR.print(Panel(
			
 
				-                            f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n',
			
 
				-                            title='\n\n[red][X] Error while trying to load database![/red]',
			
 
				-                            subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]',
			
 
				-                            expand=False,
			
 
				-                            style='bold red',
			
 
				-                        ))
			
 
				-                        STDERR.print()
			
 
				-                        STDERR.print_exception(show_locals=False)
			
 
				-                    return
			
 
				-            
			
 
				-            bump_startup_progress_bar()
			
 
				-
			
 
				-            from django.conf import settings
			
 
				-            
			
 
				-            # log startup message to the error log
			
 
				-            with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
			
 
				-                command = ' '.join(sys.argv)
			
 
				-                ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
			
 
				-                f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
			
 
				-
			
 
				-            if check_db:
			
 
				-                # make sure the data dir is owned by a non-root user
			
 
				-                if CONSTANTS.DATA_DIR.stat().st_uid == 0:
			
 
				-                    STDERR.print('[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]')
			
 
				-                    STDERR.print(f'    {CONSTANTS.DATA_DIR}')
			
 
				+            except Exception as e:
			
 
				+                is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
			
 
				+                if not is_using_meta_cmd:
			
 
				+                    # show error message to user only if they're not running a meta command / just trying to get help
			
 
				                     STDERR.print()
			
 
				-                    STDERR.print('[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)')
			
 
				-                    STDERR.print('    cd path/to/your/archive/data')
			
 
				-                    STDERR.print('    archivebox [command]')
			
 
				+                    STDERR.print(Panel(
			
 
				+                        f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n',
			
 
				+                        title='\n\n[red][X] Error while trying to load database![/red]',
			
 
				+                        subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]',
			
 
				+                        expand=False,
			
 
				+                        style='bold red',
			
 
				+                    ))
			
 
				                     STDERR.print()
			
 
				-                    raise SystemExit(9)
			
 
				-                
			
 
				-                # Create cache table in DB if needed
			
 
				-                try:
			
 
				-                    from django.core.cache import cache
			
 
				-                    cache.get('test', None)
			
 
				-                except django.db.utils.OperationalError:
			
 
				-                    call_command("createcachetable", verbosity=0)
			
 
				-
			
 
				-                bump_startup_progress_bar()
			
 
				-
			
 
				-                # if archivebox gets imported multiple times, we have to close
			
 
				-                # the sqlite3 whenever we init from scratch to avoid multiple threads
			
 
				-                # sharing the same connection by accident
			
 
				-                from django.db import connections
			
 
				-                for conn in connections.all():
			
 
				-                    conn.close_if_unusable_or_obsolete()
			
 
				-
			
 
				-                sql_index_path = CONSTANTS.DATABASE_FILE
			
 
				-                assert os.access(sql_index_path, os.F_OK), (
			
 
				-                    f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
			
 
				-
			
 
				-                bump_startup_progress_bar()
			
 
				-
			
 
				-                # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
			
 
				-                # if settings.DEBUG_LOGFIRE:
			
 
				-                #     from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
			
 
				-                #     SQLite3Instrumentor().instrument()
			
 
				-
			
 
				-                #     import logfire
			
 
				-
			
 
				-                #     logfire.configure()
			
 
				-                #     logfire.instrument_django(is_sql_commentor_enabled=True)
			
 
				-                #     logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv)
			
 
				-
			
 
				-        except KeyboardInterrupt:
			
 
				-            raise SystemExit(2)
			
 
				-        
			
 
				-    DJANGO_SET_UP = True
			
 
				+                    STDERR.print_exception(show_locals=False)
			
 
				+                return
			
 
				+
			
 
				+        from django.conf import settings
			
 
				+
			
 
				+        # log startup message to the error log
			
 
				+        with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
			
 
				+            command = ' '.join(sys.argv)
			
 
				+            ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
			
 
				+            f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
			
 
				+
			
 
				+        if check_db:
			
 
				+            # make sure the data dir is owned by a non-root user
			
 
				+            if CONSTANTS.DATA_DIR.stat().st_uid == 0:
			
 
				+                STDERR.print('[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]')
			
 
				+                STDERR.print(f'    {CONSTANTS.DATA_DIR}')
			
 
				+                STDERR.print()
			
 
				+                STDERR.print('[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)')
			
 
				+                STDERR.print('    cd path/to/your/archive/data')
			
 
				+                STDERR.print('    archivebox [command]')
			
 
				+                STDERR.print()
			
 
				+                raise SystemExit(9)
			
 
				+
			
 
				+            # Create cache table in DB if needed
			
 
				+            try:
			
 
				+                from django.core.cache import cache
			
 
				+                cache.get('test', None)
			
 
				+            except django.db.utils.OperationalError:
			
 
				+                call_command("createcachetable", verbosity=0)
			
 
				+
			
 
				+            # if archivebox gets imported multiple times, we have to close
			
 
				+            # the sqlite3 whenever we init from scratch to avoid multiple threads
			
 
				+            # sharing the same connection by accident
			
 
				+            from django.db import connections
			
 
				+            for conn in connections.all():
			
 
				+                conn.close_if_unusable_or_obsolete()
			
 
				+
			
 
				+            sql_index_path = CONSTANTS.DATABASE_FILE
			
 
				+            assert os.access(sql_index_path, os.F_OK), (
			
 
				+                f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
			
 
				+
			
 
				+            # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
			
 
				+            # if settings.DEBUG_LOGFIRE:
			
 
				+            #     from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
			
 
				+            #     SQLite3Instrumentor().instrument()
			
 
				+
			
 
				+            #     import logfire
			
 
				+
			
 
				+            #     logfire.configure()
			
 
				+            #     logfire.instrument_django(is_sql_commentor_enabled=True)
			
 
				+            #     logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv)
			
 
				+
			
 
				+    except KeyboardInterrupt:
			
 
				+        raise SystemExit(2)
			
 
				 
			
 
				-    INITIAL_STARTUP_PROGRESS = None
			
 
				-    INITIAL_STARTUP_PROGRESS_TASK = None
			
 
				+    DJANGO_SET_UP = True
			
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -19,6 +19,150 @@ from archivebox.hooks import get_extractor_icon
 
				 from core.models import ArchiveResult, Snapshot
			
 
				 
			
 
				 
			
 
				+def render_archiveresults_list(archiveresults_qs, limit=50):
			
 
				+    """Render a nice inline list view of archive results with status, extractor, output, and actions."""
			
 
				+
			
 
				+    results = list(archiveresults_qs.order_by('-end_ts').select_related('snapshot')[:limit])
			
 
				+
			
 
				+    if not results:
			
 
				+        return mark_safe('<div style="color: #64748b; font-style: italic; padding: 16px 0;">No Archive Results yet...</div>')
			
 
				+
			
 
				+    # Status colors
			
 
				+    status_colors = {
			
 
				+        'succeeded': ('#166534', '#dcfce7'),   # green
			
 
				+        'failed': ('#991b1b', '#fee2e2'),       # red
			
 
				+        'queued': ('#6b7280', '#f3f4f6'),       # gray
			
 
				+        'started': ('#92400e', '#fef3c7'),      # amber
			
 
				+    }
			
 
				+
			
 
				+    rows = []
			
 
				+    for idx, result in enumerate(results):
			
 
				+        status = result.status or 'queued'
			
 
				+        color, bg = status_colors.get(status, ('#6b7280', '#f3f4f6'))
			
 
				+
			
 
				+        # Get extractor icon
			
 
				+        icon = get_extractor_icon(result.extractor)
			
 
				+
			
 
				+        # Format timestamp
			
 
				+        end_time = result.end_ts.strftime('%Y-%m-%d %H:%M:%S') if result.end_ts else '-'
			
 
				+
			
 
				+        # Truncate output for display
			
 
				+        full_output = result.output or '-'
			
 
				+        output_display = full_output[:60]
			
 
				+        if len(full_output) > 60:
			
 
				+            output_display += '...'
			
 
				+
			
 
				+        # Get full command as tooltip
			
 
				+        cmd_str = ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd or '-')
			
 
				+
			
 
				+        # Build output link
			
 
				+        output_link = f'/archive/{result.snapshot.timestamp}/{result.output}' if result.output and result.status == 'succeeded' else f'/archive/{result.snapshot.timestamp}/'
			
 
				+
			
 
				+        # Get version - try cmd_version field
			
 
				+        version = result.cmd_version if result.cmd_version else '-'
			
 
				+
			
 
				+        # Unique ID for this row's expandable output
			
 
				+        row_id = f'output_{idx}_{str(result.id)[:8]}'
			
 
				+
			
 
				+        rows.append(f'''
			
 
				+            <tr style="border-bottom: 1px solid #f1f5f9; transition: background 0.15s;" onmouseover="this.style.background='#f8fafc'" onmouseout="this.style.background='transparent'">
			
 
				+                <td style="padding: 10px 12px; white-space: nowrap;">
			
 
				+                    <span style="display: inline-block; padding: 3px 10px; border-radius: 12px;
			
 
				+                                 font-size: 11px; font-weight: 600; text-transform: uppercase;
			
 
				+                                 color: {color}; background: {bg};">{status}</span>
			
 
				+                </td>
			
 
				+                <td style="padding: 10px 12px; white-space: nowrap; font-size: 20px;" title="{result.extractor}">
			
 
				+                    {icon}
			
 
				+                </td>
			
 
				+                <td style="padding: 10px 12px; font-weight: 500; color: #334155;">
			
 
				+                    {result.extractor}
			
 
				+                </td>
			
 
				+                <td style="padding: 10px 12px; max-width: 280px;">
			
 
				+                    <span onclick="document.getElementById('{row_id}').open = !document.getElementById('{row_id}').open"
			
 
				+                          style="color: #2563eb; text-decoration: none; font-family: ui-monospace, monospace; font-size: 12px; cursor: pointer;"
			
 
				+                          title="Click to expand full output">
			
 
				+                        {output_display}
			
 
				+                    </span>
			
 
				+                </td>
			
 
				+                <td style="padding: 10px 12px; white-space: nowrap; color: #64748b; font-size: 12px;">
			
 
				+                    {end_time}
			
 
				+                </td>
			
 
				+                <td style="padding: 10px 12px; white-space: nowrap; font-family: ui-monospace, monospace; font-size: 11px; color: #64748b;">
			
 
				+                    {version}
			
 
				+                </td>
			
 
				+                <td style="padding: 10px 8px; white-space: nowrap;">
			
 
				+                    <div style="display: flex; gap: 4px;">
			
 
				+                        <a href="{output_link}" target="_blank"
			
 
				+                           style="padding: 4px 8px; background: #f1f5f9; border-radius: 4px; color: #475569; text-decoration: none; font-size: 11px;"
			
 
				+                           title="View output">📄</a>
			
 
				+                        <a href="{reverse('admin:core_archiveresult_change', args=[result.id])}"
			
 
				+                           style="padding: 4px 8px; background: #f1f5f9; border-radius: 4px; color: #475569; text-decoration: none; font-size: 11px;"
			
 
				+                           title="Edit">✏️</a>
			
 
				+                    </div>
			
 
				+                </td>
			
 
				+            </tr>
			
 
				+            <tr style="border-bottom: 1px solid #e2e8f0;">
			
 
				+                <td colspan="7" style="padding: 0 12px 10px 12px;">
			
 
				+                    <details id="{row_id}" style="margin: 0;">
			
 
				+                        <summary style="cursor: pointer; font-size: 11px; color: #94a3b8; user-select: none;">
			
 
				+                            Details &amp; Output
			
 
				+                        </summary>
			
 
				+                        <div style="margin-top: 8px; padding: 10px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 6px; max-height: 200px; overflow: auto;">
			
 
				+                            <div style="font-size: 11px; color: #64748b; margin-bottom: 8px;">
			
 
				+                                <span style="margin-right: 16px;"><b>ID:</b> <code>{str(result.id)[:8]}...</code></span>
			
 
				+                                <span style="margin-right: 16px;"><b>Version:</b> <code>{version}</code></span>
			
 
				+                                <span style="margin-right: 16px;"><b>PWD:</b> <code>{result.pwd or '-'}</code></span>
			
 
				+                            </div>
			
 
				+                            <div style="font-size: 11px; color: #64748b; margin-bottom: 8px;">
			
 
				+                                <b>Output:</b>
			
 
				+                            </div>
			
 
				+                            <pre style="margin: 0; padding: 8px; background: #1e293b; border-radius: 4px; color: #e2e8f0; font-size: 12px; white-space: pre-wrap; word-break: break-all; max-height: 120px; overflow: auto;">{full_output}</pre>
			
 
				+                            <div style="font-size: 11px; color: #64748b; margin-top: 8px;">
			
 
				+                                <b>Command:</b>
			
 
				+                            </div>
			
 
				+                            <pre style="margin: 0; padding: 8px; background: #1e293b; border-radius: 4px; color: #e2e8f0; font-size: 11px; white-space: pre-wrap; word-break: break-all;">{cmd_str}</pre>
			
 
				+                        </div>
			
 
				+                    </details>
			
 
				+                </td>
			
 
				+            </tr>
			
 
				+        ''')
			
 
				+
			
 
				+    total_count = archiveresults_qs.count()
			
 
				+    footer = ''
			
 
				+    if total_count > limit:
			
 
				+        footer = f'''
			
 
				+            <tr>
			
 
				+                <td colspan="7" style="padding: 12px; text-align: center; color: #64748b; font-size: 13px; background: #f8fafc;">
			
 
				+                    Showing {limit} of {total_count} results &nbsp;
			
 
				+                    <a href="/admin/core/archiveresult/?snapshot__id__exact={results[0].snapshot_id if results else ''}"
			
 
				+                       style="color: #2563eb;">View all →</a>
			
 
				+                </td>
			
 
				+            </tr>
			
 
				+        '''
			
 
				+
			
 
				+    return mark_safe(f'''
			
 
				+        <div style="border: 1px solid #e2e8f0; border-radius: 8px; overflow: hidden; background: #fff; width: 100%;">
			
 
				+            <table style="width: 100%; border-collapse: collapse; font-size: 14px;">
			
 
				+                <thead>
			
 
				+                    <tr style="background: #f8fafc; border-bottom: 2px solid #e2e8f0;">
			
 
				+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Status</th>
			
 
				+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; width: 32px;"></th>
			
 
				+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Extractor</th>
			
 
				+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Output</th>
			
 
				+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Completed</th>
			
 
				+                        <th style="padding: 10px 12px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Version</th>
			
 
				+                        <th style="padding: 10px 8px; text-align: left; font-weight: 600; color: #475569; font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em;">Actions</th>
			
 
				+                    </tr>
			
 
				+                </thead>
			
 
				+                <tbody>
			
 
				+                    {''.join(rows)}
			
 
				+                    {footer}
			
 
				+                </tbody>
			
 
				+            </table>
			
 
				+        </div>
			
 
				+    ''')
			
 
				+
			
 
				+
			
 
				 
			
 
				 class ArchiveResultInline(admin.TabularInline):
			
 
				     name = 'Archive Results Log'
			
@@ -97,18 +241,44 @@ class ArchiveResultAdmin(BaseModelAdmin):
 
				     sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
			
 
				     readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary', 'extractor_with_icon')
			
 
				     search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
			
 
				-    fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
			
 
				     autocomplete_fields = ['snapshot']
			
 
				 
			
 
				+    fieldsets = (
			
 
				+        ('Snapshot', {
			
 
				+            'fields': ('snapshot', 'snapshot_info', 'tags_str'),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Extractor', {
			
 
				+            'fields': ('extractor', 'extractor_with_icon', 'status', 'retry_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timing', {
			
 
				+            'fields': ('start_ts', 'end_ts', 'created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Command', {
			
 
				+            'fields': ('cmd', 'cmd_str', 'cmd_version', 'pwd'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Output', {
			
 
				+            'fields': ('output', 'output_summary'),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Metadata', {
			
 
				+            'fields': ('created_by',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				+
			
 
				     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
			
 
				     ordering = ['-start_ts']
			
 
				     list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
			
 
				-    
			
 
				+
			
 
				     paginator = AccelleratedPaginator
			
 
				     save_on_top = True
			
 
				-    
			
 
				+
			
 
				     actions = ['delete_selected']
			
 
				-    
			
 
				+
			
 
				     class Meta:
			
 
				         verbose_name = 'Archive Result'
			
 
				         verbose_name_plural = 'Archive Results'
			
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -25,7 +25,7 @@ from archivebox.workers.tasks import bg_archive_snapshots, bg_add
 
				 
			
 
				 from core.models import Tag
			
 
				 from core.admin_tags import TagInline
			
 
				-from core.admin_archiveresults import ArchiveResultInline
			
 
				+from core.admin_archiveresults import ArchiveResultInline, render_archiveresults_list
			
 
				 
			
 
				 
			
 
				 # GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
			
@@ -54,13 +54,48 @@ class SnapshotActionForm(ActionForm):
 
				 class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
			
 
				     list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
			
 
				     sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
			
 
				-    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'output_dir')
			
 
				+    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'output_dir', 'archiveresults_list')
			
 
				     search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
			
 
				     list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
			
 
				-    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', 'config', *readonly_fields)
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('URL', {
			
 
				+            'fields': ('url', 'title'),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Status', {
			
 
				+            'fields': ('status', 'retry_at', 'status_info'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('bookmarked_at', 'created_at', 'modified_at', 'downloaded_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Relations', {
			
 
				+            'fields': ('crawl', 'created_by', 'tags_str'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Config', {
			
 
				+            'fields': ('config',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Files', {
			
 
				+            'fields': ('output_dir',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Actions', {
			
 
				+            'fields': ('admin_actions',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Archive Results', {
			
 
				+            'fields': ('archiveresults_list',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+    )
			
 
				+
			
 
				     ordering = ['-created_at']
			
 
				     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
			
 
				-    inlines = [TagInline, ArchiveResultInline]
			
 
				+    inlines = [TagInline]  # Removed ArchiveResultInline, using custom renderer instead
			
 
				     list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)
			
 
				 
			
 
				     action_form = SnapshotActionForm
			
@@ -155,6 +190,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):
 
				             obj.extension or '-',
			
 
				         )
			
 
				 
			
 
				+    @admin.display(description='Archive Results')
			
 
				+    def archiveresults_list(self, obj):
			
 
				+        return render_archiveresults_list(obj.archiveresult_set.all())
			
 
				+
			
 
				     @admin.display(
			
 
				         description='Title',
			
 
				         ordering='title',
			
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@@ -51,11 +51,25 @@ class TagAdmin(BaseModelAdmin):
 
				     sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
			
 
				     readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
			
 
				     search_fields = ('id', 'name', 'slug')
			
 
				-    fields = ('name', 'created_by', *readonly_fields)
			
 
				     actions = ['delete_selected', 'merge_tags']
			
 
				     ordering = ['-created_at']
			
 
				     # inlines = [TaggedItemInline]
			
 
				 
			
 
				+    fieldsets = (
			
 
				+        ('Tag Info', {
			
 
				+            'fields': ('name', 'slug'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Metadata', {
			
 
				+            'fields': ('id', 'created_by', 'created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Snapshots', {
			
 
				+            'fields': ('snapshots',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+    )
			
 
				+
			
 
				     paginator = AccelleratedPaginator
			
 
				 
			
 
				 
			
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -1,7 +1,5 @@
 
				 __package__ = 'archivebox.core'
			
 
				 
			
 
				-import sys
			
 
				-
			
 
				 from django.apps import AppConfig
			
 
				 
			
 
				 
			
@@ -12,41 +10,3 @@ class CoreConfig(AppConfig):
 
				         """Register the archivebox.core.admin_site as the main django admin site"""
			
 
				         from core.admin_site import register_admin_site
			
 
				         register_admin_site()
			
 
				-
			
 
				-        # Auto-start the orchestrator when running the web server
			
 
				-        self._maybe_start_orchestrator()
			
 
				-
			
 
				-    def _maybe_start_orchestrator(self):
			
 
				-        """Start the orchestrator if we're running a web server."""
			
 
				-        import os
			
 
				-
			
 
				-        # Don't start orchestrator during migrations, shell, tests, etc.
			
 
				-        # Only start when running: runserver, daphne, gunicorn, uwsgi
			
 
				-        if not self._is_web_server():
			
 
				-            return
			
 
				-
			
 
				-        # Don't start if RUN_ORCHESTRATOR env var is explicitly set to false
			
 
				-        if os.environ.get('RUN_ORCHESTRATOR', '').lower() in ('false', '0', 'no'):
			
 
				-            return
			
 
				-
			
 
				-        # Don't start in autoreload child process (avoid double-start)
			
 
				-        if os.environ.get('RUN_MAIN') != 'true' and 'runserver' in sys.argv:
			
 
				-            return
			
 
				-
			
 
				-        try:
			
 
				-            from workers.orchestrator import Orchestrator
			
 
				-
			
 
				-            if not Orchestrator.is_running():
			
 
				-                # Start orchestrator as daemon (won't exit on idle when started by server)
			
 
				-                orchestrator = Orchestrator(exit_on_idle=False)
			
 
				-                orchestrator.start()
			
 
				-        except Exception as e:
			
 
				-            # Don't crash the server if orchestrator fails to start
			
 
				-            import logging
			
 
				-            logging.getLogger('archivebox').warning(f'Failed to auto-start orchestrator: {e}')
			
 
				-
			
 
				-    def _is_web_server(self) -> bool:
			
 
				-        """Check if we're running a web server command."""
			
 
				-        # Check for common web server indicators
			
 
				-        server_commands = ('runserver', 'daphne', 'gunicorn', 'uwsgi', 'server')
			
 
				-        return any(cmd in ' '.join(sys.argv).lower() for cmd in server_commands)
			
--- a/archivebox/core/migrations/0025_allow_duplicate_urls_per_crawl.py
+++ b/archivebox/core/migrations/0025_allow_duplicate_urls_per_crawl.py
@@ -0,0 +1,22 @@
 
				+from django.db import migrations, models
			
 
				+
			
 
				+
			
 
				+class Migration(migrations.Migration):
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ('core', '0024_snapshot_crawl'),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        # Remove the unique constraint on url
			
 
				+        migrations.AlterField(
			
 
				+            model_name='snapshot',
			
 
				+            name='url',
			
 
				+            field=models.URLField(db_index=True, unique=False),
			
 
				+        ),
			
 
				+        # Add unique constraint on (url, crawl) combination
			
 
				+        migrations.AddConstraint(
			
 
				+            model_name='snapshot',
			
 
				+            constraint=models.UniqueConstraint(fields=['url', 'crawl'], name='unique_url_per_crawl'),
			
 
				+        ),
			
 
				+    ]
			
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -60,7 +60,8 @@ class Tag(ModelWithSerializers):
 
				         return self.name
			
 
				 
			
 
				     def save(self, *args, **kwargs):
			
 
				-        if self._state.adding:
			
 
				+        is_new = self._state.adding
			
 
				+        if is_new:
			
 
				             self.slug = slugify(self.name)
			
 
				             existing = set(Tag.objects.filter(slug__startswith=self.slug).values_list("slug", flat=True))
			
 
				             i = None
			
@@ -72,6 +73,19 @@ class Tag(ModelWithSerializers):
 
				                 i = (i or 0) + 1
			
 
				         super().save(*args, **kwargs)
			
 
				 
			
 
				+        if is_new:
			
 
				+            from archivebox.misc.logging_util import log_worker_event
			
 
				+            log_worker_event(
			
 
				+                worker_type='DB',
			
 
				+                event='Created Tag',
			
 
				+                indent_level=0,
			
 
				+                metadata={
			
 
				+                    'id': self.id,
			
 
				+                    'name': self.name,
			
 
				+                    'slug': self.slug,
			
 
				+                },
			
 
				+            )
			
 
				+
			
 
				     @property
			
 
				     def api_url(self) -> str:
			
 
				         return reverse_lazy('api-1:get_tag', args=[self.id])
			
@@ -241,12 +255,13 @@ class SnapshotManager(models.Manager.from_queryset(SnapshotQuerySet)):
 
				                 if tag.strip()
			
 
				             ))
			
 
				 
			
 
				-        try:
			
 
				-            snapshot = self.get(url=url)
			
 
				+        # Get most recent snapshot with this URL (URLs can exist in multiple crawls)
			
 
				+        snapshot = self.filter(url=url).order_by('-created_at').first()
			
 
				+        if snapshot:
			
 
				             if title and (not snapshot.title or len(title) > len(snapshot.title or '')):
			
 
				                 snapshot.title = title
			
 
				                 snapshot.save(update_fields=['title', 'modified_at'])
			
 
				-        except self.model.DoesNotExist:
			
 
				+        else:
			
 
				             if timestamp:
			
 
				                 while self.filter(timestamp=timestamp).exists():
			
 
				                     timestamp = str(float(timestamp) + 1.0)
			
@@ -284,7 +299,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
				     created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				 
			
 
				-    url = models.URLField(unique=True, db_index=True)
			
 
				+    url = models.URLField(unique=False, db_index=True)  # URLs can appear in multiple crawls
			
 
				     timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
			
 
				     bookmarked_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     crawl: Crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set', db_index=True)  # type: ignore
			
@@ -313,11 +328,16 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
				     class Meta(TypedModelMeta):
			
 
				         verbose_name = "Snapshot"
			
 
				         verbose_name_plural = "Snapshots"
			
 
				+        constraints = [
			
 
				+            # Allow same URL in different crawls, but not duplicates within same crawl
			
 
				+            models.UniqueConstraint(fields=['url', 'crawl'], name='unique_url_per_crawl'),
			
 
				+        ]
			
 
				 
			
 
				     def __str__(self):
			
 
				         return f'[{self.id}] {self.url[:64]}'
			
 
				 
			
 
				     def save(self, *args, **kwargs):
			
 
				+        is_new = self._state.adding
			
 
				         if not self.bookmarked_at:
			
 
				             self.bookmarked_at = self.created_at or timezone.now()
			
 
				         if not self.timestamp:
			
@@ -327,6 +347,21 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
				             self.crawl.urls += f'\n{self.url}'
			
 
				             self.crawl.save()
			
 
				 
			
 
				+        if is_new:
			
 
				+            from archivebox.misc.logging_util import log_worker_event
			
 
				+            log_worker_event(
			
 
				+                worker_type='DB',
			
 
				+                event='Created Snapshot',
			
 
				+                indent_level=2,
			
 
				+                url=self.url,
			
 
				+                metadata={
			
 
				+                    'id': str(self.id),
			
 
				+                    'crawl_id': str(self.crawl_id) if self.crawl_id else None,
			
 
				+                    'depth': self.depth,
			
 
				+                    'status': self.status,
			
 
				+                },
			
 
				+            )
			
 
				+
			
 
				     def output_dir_parent(self) -> str:
			
 
				         return 'archive'
			
 
				 
			
@@ -807,6 +842,24 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 
				     def __str__(self):
			
 
				         return f'[{self.id}] {self.snapshot.url[:64]} -> {self.extractor}'
			
 
				 
			
 
				+    def save(self, *args, **kwargs):
			
 
				+        is_new = self._state.adding
			
 
				+        super().save(*args, **kwargs)
			
 
				+        if is_new:
			
 
				+            from archivebox.misc.logging_util import log_worker_event
			
 
				+            log_worker_event(
			
 
				+                worker_type='DB',
			
 
				+                event='Created ArchiveResult',
			
 
				+                indent_level=3,
			
 
				+                extractor=self.extractor,
			
 
				+                metadata={
			
 
				+                    'id': str(self.id),
			
 
				+                    'snapshot_id': str(self.snapshot_id),
			
 
				+                    'snapshot_url': str(self.snapshot.url)[:64],
			
 
				+                    'status': self.status,
			
 
				+                },
			
 
				+            )
			
 
				+
			
 
				     @cached_property
			
 
				     def snapshot_dir(self):
			
 
				         return Path(self.snapshot.output_dir)
			
@@ -879,7 +932,6 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 
				         from django.utils import timezone
			
 
				         from archivebox.hooks import BUILTIN_PLUGINS_DIR, USER_PLUGINS_DIR, run_hook
			
 
				 
			
 
				-        extractor_dir = Path(self.snapshot.output_dir) / self.extractor
			
 
				         config_objects = [self.snapshot.crawl, self.snapshot] if self.snapshot.crawl else [self.snapshot]
			
 
				 
			
 
				         # Find hook for this extractor
			
@@ -899,6 +951,10 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 
				             self.save()
			
 
				             return
			
 
				 
			
 
				+        # Use plugin directory name instead of extractor name (removes numeric prefix)
			
 
				+        plugin_name = hook.parent.name
			
 
				+        extractor_dir = Path(self.snapshot.output_dir) / plugin_name
			
 
				+
			
 
				         # Run the hook
			
 
				         start_ts = timezone.now()
			
 
				         result = run_hook(
			
--- a/archivebox/core/statemachines.py
+++ b/archivebox/core/statemachines.py
@@ -45,15 +45,14 @@ class SnapshotMachine(StateMachine, strict_states=True):
 
				         super().__init__(snapshot, *args, **kwargs)
			
 
				         
			
 
				     def __repr__(self) -> str:
			
 
				-        return f'[grey53]Snapshot\\[{self.snapshot.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				-    
			
 
				+        return f'Snapshot[{self.snapshot.id}]'
			
 
				+
			
 
				     def __str__(self) -> str:
			
 
				         return self.__repr__()
			
 
				-        
			
 
				+
			
 
				     def can_start(self) -> bool:
			
 
				         can_start = bool(self.snapshot.url)
			
 
				-        if not can_start:
			
 
				-            print(f'{self}.can_start() [blue]QUEUED[/blue] ➡️❌ [blue]STARTED[/blue] cant start yet +{timezone.now() - self.snapshot.retry_at}s')
			
 
				+        # Suppressed: queue waiting logs
			
 
				         return can_start
			
 
				         
			
 
				     def is_finished(self) -> bool:
			
@@ -73,15 +72,15 @@ class SnapshotMachine(StateMachine, strict_states=True):
 
				         
			
 
				     @queued.enter
			
 
				     def enter_queued(self):
			
 
				-        print(f'{self}.on_queued() ↳ snapshot.retry_at = now()')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.snapshot.update_for_workers(
			
 
				             retry_at=timezone.now(),
			
 
				             status=Snapshot.StatusChoices.QUEUED,
			
 
				         )
			
 
				-        
			
 
				+
			
 
				     @started.enter
			
 
				     def enter_started(self):
			
 
				-        print(f'{self}.on_started() ↳ snapshot.run()')
			
 
				+        # Suppressed: state transition logs
			
 
				         # lock the snapshot while we create the pending archiveresults
			
 
				         self.snapshot.update_for_workers(
			
 
				             retry_at=timezone.now() + timedelta(seconds=30),  # if failed, wait 30s before retrying
			
@@ -95,10 +94,10 @@ class SnapshotMachine(StateMachine, strict_states=True):
 
				             retry_at=timezone.now() + timedelta(seconds=5),  # wait 5s before checking it again
			
 
				             status=Snapshot.StatusChoices.STARTED,
			
 
				         )
			
 
				-        
			
 
				+
			
 
				     @sealed.enter
			
 
				     def enter_sealed(self):
			
 
				-        print(f'{self}.on_sealed() ↳ snapshot.retry_at=None')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.snapshot.update_for_workers(
			
 
				             retry_at=None,
			
 
				             status=Snapshot.StatusChoices.SEALED,
			
@@ -161,15 +160,14 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				         super().__init__(archiveresult, *args, **kwargs)
			
 
				     
			
 
				     def __repr__(self) -> str:
			
 
				-        return f'[grey53]ArchiveResult\\[{self.archiveresult.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				-    
			
 
				+        return f'ArchiveResult[{self.archiveresult.id}]'
			
 
				+
			
 
				     def __str__(self) -> str:
			
 
				         return self.__repr__()
			
 
				-        
			
 
				+
			
 
				     def can_start(self) -> bool:
			
 
				         can_start = bool(self.archiveresult.snapshot.url)
			
 
				-        if not can_start:
			
 
				-            print(f'{self}.can_start() [blue]QUEUED[/blue] ➡️❌ [blue]STARTED[/blue]: cant start yet +{timezone.now() - self.archiveresult.retry_at}s')
			
 
				+        # Suppressed: queue waiting logs
			
 
				         return can_start
			
 
				     
			
 
				     def is_succeeded(self) -> bool:
			
@@ -202,41 +200,34 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				 
			
 
				     @queued.enter
			
 
				     def enter_queued(self):
			
 
				-        print(f'{self}.on_queued() ↳ archiveresult.retry_at = now()')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.archiveresult.update_for_workers(
			
 
				             retry_at=timezone.now(),
			
 
				             status=ArchiveResult.StatusChoices.QUEUED,
			
 
				             start_ts=None,
			
 
				         )  # bump the snapshot's retry_at so they pickup any new changes
			
 
				-        
			
 
				+
			
 
				     @started.enter
			
 
				     def enter_started(self):
			
 
				-        print(f'{self}.on_started() ↳ archiveresult.start_ts + run_extractor()')
			
 
				-        
			
 
				+        # Suppressed: state transition logs
			
 
				         # Lock the object and mark start time
			
 
				         self.archiveresult.update_for_workers(
			
 
				             retry_at=timezone.now() + timedelta(seconds=120),  # 2 min timeout for extractor
			
 
				             status=ArchiveResult.StatusChoices.STARTED,
			
 
				             start_ts=timezone.now(),
			
 
				         )
			
 
				-        
			
 
				+
			
 
				         # Run the extractor - this updates status, output, timestamps, etc.
			
 
				         self.archiveresult.run()
			
 
				-        
			
 
				+
			
 
				         # Save the updated result
			
 
				         self.archiveresult.save()
			
 
				-        
			
 
				-        # Log the result
			
 
				-        if self.archiveresult.status == ArchiveResult.StatusChoices.SUCCEEDED:
			
 
				-            print(f'{self} ✅ extractor succeeded: {self.archiveresult.output[:50] if self.archiveresult.output else ""}...')
			
 
				-        elif self.archiveresult.status == ArchiveResult.StatusChoices.FAILED:
			
 
				-            print(f'{self} ❌ extractor failed: {self.archiveresult.output[:100] if self.archiveresult.output else ""}...')
			
 
				-        elif self.archiveresult.status == ArchiveResult.StatusChoices.SKIPPED:
			
 
				-            print(f'{self} ⏭️ extractor skipped: {self.archiveresult.output[:50] if self.archiveresult.output else ""}')
			
 
				+
			
 
				+        # Suppressed: extractor result logs (already logged by worker)
			
 
				 
			
 
				     @backoff.enter
			
 
				     def enter_backoff(self):
			
 
				-        print(f'{self}.on_backoff() ↳ archiveresult.retries += 1, archiveresult.bump_retry_at(+60s), archiveresult.end_ts = None')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.archiveresult.update_for_workers(
			
 
				             retry_at=timezone.now() + timedelta(seconds=60),
			
 
				             status=ArchiveResult.StatusChoices.BACKOFF,
			
@@ -244,10 +235,10 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				             # retries=F('retries') + 1,               # F() equivalent to getattr(self.archiveresult, 'retries', 0) + 1,
			
 
				         )
			
 
				         self.archiveresult.save(write_indexes=True)
			
 
				-        
			
 
				+
			
 
				     @succeeded.enter
			
 
				     def enter_succeeded(self):
			
 
				-        print(f'{self}.on_succeeded() ↳ archiveresult.retry_at = None, archiveresult.end_ts = now()')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.archiveresult.update_for_workers(
			
 
				             retry_at=None,
			
 
				             status=ArchiveResult.StatusChoices.SUCCEEDED,
			
@@ -270,7 +261,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				 
			
 
				     @failed.enter
			
 
				     def enter_failed(self):
			
 
				-        print(f'{self}.on_failed() ↳ archiveresult.retry_at = None, archiveresult.end_ts = now()')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.archiveresult.update_for_workers(
			
 
				             retry_at=None,
			
 
				             status=ArchiveResult.StatusChoices.FAILED,
			
@@ -291,7 +282,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				 
			
 
				     @skipped.enter
			
 
				     def enter_skipped(self):
			
 
				-        print(f'{self}.on_skipped() ↳ archiveresult.retry_at = None, archiveresult.end_ts = now()')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.archiveresult.update_for_workers(
			
 
				             retry_at=None,
			
 
				             status=ArchiveResult.StatusChoices.SKIPPED,
			
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -503,15 +503,7 @@ class AddView(UserPassesTestMixin, FormView):
 
				             mark_safe(f"Adding {rough_url_count} URLs in the background. (refresh in a minute start seeing results) {crawl.admin_change_url}"),
			
 
				         )
			
 
				 
			
 
				-        # Start orchestrator in background to process the queued crawl
			
 
				-        try:
			
 
				-            from archivebox.workers.tasks import ensure_orchestrator_running
			
 
				-            ensure_orchestrator_running()
			
 
				-        except Exception as e:
			
 
				-            # Orchestrator may already be running via supervisord, or fail to start
			
 
				-            # This is not fatal - the crawl will be processed when orchestrator runs
			
 
				-            print(f'[!] Failed to start orchestrator: {e}')
			
 
				-
			
 
				+        # Orchestrator (managed by supervisord) will pick up the queued crawl
			
 
				         return redirect(crawl.admin_change_url)
			
 
				 
			
 
				 
			
@@ -539,6 +531,7 @@ def live_progress_view(request):
 
				         from workers.orchestrator import Orchestrator
			
 
				         from crawls.models import Crawl
			
 
				         from core.models import Snapshot, ArchiveResult
			
 
				+        from django.db.models import Case, When, Value, IntegerField
			
 
				 
			
 
				         # Get orchestrator status
			
 
				         orchestrator_running = Orchestrator.is_running()
			
@@ -570,8 +563,26 @@ def live_progress_view(request):
 
				             crawl_snapshots = Snapshot.objects.filter(crawl=crawl)
			
 
				             total_snapshots = crawl_snapshots.count()
			
 
				             completed_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.SEALED).count()
			
 
				+            started_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.STARTED).count()
			
 
				             pending_snapshots = crawl_snapshots.filter(status=Snapshot.StatusChoices.QUEUED).count()
			
 
				 
			
 
				+            # Count URLs in the crawl (for when snapshots haven't been created yet)
			
 
				+            urls_count = 0
			
 
				+            if crawl.urls:
			
 
				+                urls_count = len([u for u in crawl.urls.split('\n') if u.strip()])
			
 
				+            elif crawl.seed and crawl.seed.uri:
			
 
				+                # Try to get URL count from seed
			
 
				+                if crawl.seed.uri.startswith('file:///'):
			
 
				+                    try:
			
 
				+                        from pathlib import Path
			
 
				+                        seed_file = Path(crawl.seed.uri.replace('file://', ''))
			
 
				+                        if seed_file.exists():
			
 
				+                            urls_count = len([l for l in seed_file.read_text().split('\n') if l.strip() and not l.startswith('#')])
			
 
				+                    except:
			
 
				+                        pass
			
 
				+                else:
			
 
				+                    urls_count = 1  # Single URL seed
			
 
				+
			
 
				             # Calculate crawl progress
			
 
				             crawl_progress = int((completed_snapshots / total_snapshots) * 100) if total_snapshots > 0 else 0
			
 
				 
			
@@ -590,16 +601,24 @@ def live_progress_view(request):
 
				                 # Calculate snapshot progress
			
 
				                 snapshot_progress = int(((completed_extractors + failed_extractors) / total_extractors) * 100) if total_extractors > 0 else 0
			
 
				 
			
 
				-                # Get active extractors for this snapshot
			
 
				-                active_extractors = [
			
 
				+                # Get all extractors for this snapshot
			
 
				+                # Order: started first, then queued, then completed
			
 
				+                all_extractors = [
			
 
				                     {
			
 
				                         'id': str(ar.id),
			
 
				                         'extractor': ar.extractor,
			
 
				                         'status': ar.status,
			
 
				-                        'started': ar.start_ts.isoformat() if ar.start_ts else None,
			
 
				-                        'progress': 50,
			
 
				                     }
			
 
				-                    for ar in snapshot_results.filter(status=ArchiveResult.StatusChoices.STARTED).order_by('-start_ts')[:5]
			
 
				+                    for ar in snapshot_results.annotate(
			
 
				+                        status_order=Case(
			
 
				+                            When(status=ArchiveResult.StatusChoices.STARTED, then=Value(0)),
			
 
				+                            When(status=ArchiveResult.StatusChoices.QUEUED, then=Value(1)),
			
 
				+                            When(status=ArchiveResult.StatusChoices.SUCCEEDED, then=Value(2)),
			
 
				+                            When(status=ArchiveResult.StatusChoices.FAILED, then=Value(3)),
			
 
				+                            default=Value(4),
			
 
				+                            output_field=IntegerField(),
			
 
				+                        )
			
 
				+                    ).order_by('status_order', 'extractor')
			
 
				                 ]
			
 
				 
			
 
				                 active_snapshots_for_crawl.append({
			
@@ -612,9 +631,17 @@ def live_progress_view(request):
 
				                     'completed_extractors': completed_extractors,
			
 
				                     'failed_extractors': failed_extractors,
			
 
				                     'pending_extractors': pending_extractors,
			
 
				-                    'active_extractors': active_extractors,
			
 
				+                    'all_extractors': all_extractors,
			
 
				                 })
			
 
				 
			
 
				+            # Check if crawl can start (for debugging stuck crawls)
			
 
				+            can_start = bool(crawl.seed and crawl.seed.uri)
			
 
				+            seed_uri = crawl.seed.uri[:60] if crawl.seed and crawl.seed.uri else None
			
 
				+
			
 
				+            # Check if retry_at is in the future (would prevent worker from claiming)
			
 
				+            retry_at_future = crawl.retry_at > timezone.now() if crawl.retry_at else False
			
 
				+            seconds_until_retry = int((crawl.retry_at - timezone.now()).total_seconds()) if crawl.retry_at and retry_at_future else 0
			
 
				+
			
 
				             active_crawls.append({
			
 
				                 'id': str(crawl.id),
			
 
				                 'label': str(crawl)[:60],
			
@@ -622,11 +649,17 @@ def live_progress_view(request):
 
				                 'started': crawl.modified_at.isoformat() if crawl.modified_at else None,
			
 
				                 'progress': crawl_progress,
			
 
				                 'max_depth': crawl.max_depth,
			
 
				+                'urls_count': urls_count,
			
 
				                 'total_snapshots': total_snapshots,
			
 
				                 'completed_snapshots': completed_snapshots,
			
 
				+                'started_snapshots': started_snapshots,
			
 
				                 'failed_snapshots': 0,
			
 
				                 'pending_snapshots': pending_snapshots,
			
 
				                 'active_snapshots': active_snapshots_for_crawl,
			
 
				+                'can_start': can_start,
			
 
				+                'seed_uri': seed_uri,
			
 
				+                'retry_at_future': retry_at_future,
			
 
				+                'seconds_until_retry': seconds_until_retry,
			
 
				             })
			
 
				 
			
 
				         return JsonResponse({
			
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -8,6 +8,7 @@ from django.contrib import admin, messages
 
				 from django.urls import path
			
 
				 from django.http import JsonResponse
			
 
				 from django.views.decorators.http import require_POST
			
 
				+from django.db.models import Count, Q
			
 
				 
			
 
				 from archivebox import DATA_DIR
			
 
				 
			
@@ -19,13 +20,155 @@ from core.models import Snapshot
 
				 from crawls.models import Seed, Crawl, CrawlSchedule
			
 
				 
			
 
				 
			
 
				+def render_snapshots_list(snapshots_qs, limit=20):
			
 
				+    """Render a nice inline list view of snapshots with status, title, URL, and progress."""
			
 
				+
			
 
				+    snapshots = snapshots_qs.order_by('-created_at')[:limit].annotate(
			
 
				+        total_results=Count('archiveresult'),
			
 
				+        succeeded_results=Count('archiveresult', filter=Q(archiveresult__status='succeeded')),
			
 
				+        failed_results=Count('archiveresult', filter=Q(archiveresult__status='failed')),
			
 
				+    )
			
 
				+
			
 
				+    if not snapshots:
			
 
				+        return mark_safe('<div style="color: #666; font-style: italic; padding: 8px 0;">No Snapshots yet...</div>')
			
 
				+
			
 
				+    # Status colors matching Django admin and progress monitor
			
 
				+    status_colors = {
			
 
				+        'queued': ('#6c757d', '#f8f9fa'),      # gray
			
 
				+        'started': ('#856404', '#fff3cd'),     # amber
			
 
				+        'sealed': ('#155724', '#d4edda'),      # green
			
 
				+        'failed': ('#721c24', '#f8d7da'),      # red
			
 
				+    }
			
 
				+
			
 
				+    rows = []
			
 
				+    for snapshot in snapshots:
			
 
				+        status = snapshot.status or 'queued'
			
 
				+        color, bg = status_colors.get(status, ('#6c757d', '#f8f9fa'))
			
 
				+
			
 
				+        # Calculate progress
			
 
				+        total = snapshot.total_results
			
 
				+        done = snapshot.succeeded_results + snapshot.failed_results
			
 
				+        progress_pct = int((done / total) * 100) if total > 0 else 0
			
 
				+        progress_text = f'{done}/{total}' if total > 0 else '-'
			
 
				+
			
 
				+        # Truncate title and URL
			
 
				+        title = (snapshot.title or 'Untitled')[:60]
			
 
				+        if len(snapshot.title or '') > 60:
			
 
				+            title += '...'
			
 
				+        url_display = snapshot.url[:50]
			
 
				+        if len(snapshot.url) > 50:
			
 
				+            url_display += '...'
			
 
				+
			
 
				+        # Format date
			
 
				+        date_str = snapshot.created_at.strftime('%Y-%m-%d %H:%M') if snapshot.created_at else '-'
			
 
				+
			
 
				+        rows.append(f'''
			
 
				+            <tr style="border-bottom: 1px solid #eee;">
			
 
				+                <td style="padding: 6px 8px; white-space: nowrap;">
			
 
				+                    <span style="display: inline-block; padding: 2px 8px; border-radius: 10px;
			
 
				+                                 font-size: 11px; font-weight: 500; text-transform: uppercase;
			
 
				+                                 color: {color}; background: {bg};">{status}</span>
			
 
				+                </td>
			
 
				+                <td style="padding: 6px 8px; white-space: nowrap;">
			
 
				+                    <a href="/archive/{snapshot.timestamp}/" style="text-decoration: none;">
			
 
				+                        <img src="/archive/{snapshot.timestamp}/favicon.ico"
			
 
				+                             style="width: 16px; height: 16px; vertical-align: middle; margin-right: 4px;"
			
 
				+                             onerror="this.style.display='none'"/>
			
 
				+                    </a>
			
 
				+                </td>
			
 
				+                <td style="padding: 6px 8px; max-width: 300px;">
			
 
				+                    <a href="{snapshot.admin_change_url}" style="color: #417690; text-decoration: none; font-weight: 500;"
			
 
				+                       title="{snapshot.title or 'Untitled'}">{title}</a>
			
 
				+                </td>
			
 
				+                <td style="padding: 6px 8px; max-width: 250px;">
			
 
				+                    <a href="{snapshot.url}" target="_blank"
			
 
				+                       style="color: #666; text-decoration: none; font-family: monospace; font-size: 11px;"
			
 
				+                       title="{snapshot.url}">{url_display}</a>
			
 
				+                </td>
			
 
				+                <td style="padding: 6px 8px; white-space: nowrap; text-align: center;">
			
 
				+                    <div style="display: inline-flex; align-items: center; gap: 6px;">
			
 
				+                        <div style="width: 60px; height: 6px; background: #eee; border-radius: 3px; overflow: hidden;">
			
 
				+                            <div style="width: {progress_pct}%; height: 100%;
			
 
				+                                        background: {'#28a745' if snapshot.failed_results == 0 else '#ffc107' if snapshot.succeeded_results > 0 else '#dc3545'};
			
 
				+                                        transition: width 0.3s;"></div>
			
 
				+                        </div>
			
 
				+                        <a href="/admin/core/archiveresult/?snapshot__id__exact={snapshot.id}"
			
 
				+                           style="font-size: 11px; color: #417690; min-width: 35px; text-decoration: none;"
			
 
				+                           title="View archive results">{progress_text}</a>
			
 
				+                    </div>
			
 
				+                </td>
			
 
				+                <td style="padding: 6px 8px; white-space: nowrap; color: #888; font-size: 11px;">
			
 
				+                    {date_str}
			
 
				+                </td>
			
 
				+            </tr>
			
 
				+        ''')
			
 
				+
			
 
				+    total_count = snapshots_qs.count()
			
 
				+    footer = ''
			
 
				+    if total_count > limit:
			
 
				+        footer = f'''
			
 
				+            <tr>
			
 
				+                <td colspan="6" style="padding: 8px; text-align: center; color: #666; font-size: 12px; background: #f8f9fa;">
			
 
				+                    Showing {limit} of {total_count} snapshots
			
 
				+                </td>
			
 
				+            </tr>
			
 
				+        '''
			
 
				+
			
 
				+    return mark_safe(f'''
			
 
				+        <div style="border: 1px solid #ddd; border-radius: 6px; overflow: hidden; max-width: 100%;">
			
 
				+            <table style="width: 100%; border-collapse: collapse; font-size: 13px;">
			
 
				+                <thead>
			
 
				+                    <tr style="background: #f5f5f5; border-bottom: 2px solid #ddd;">
			
 
				+                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333;">Status</th>
			
 
				+                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333; width: 24px;"></th>
			
 
				+                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333;">Title</th>
			
 
				+                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333;">URL</th>
			
 
				+                        <th style="padding: 8px; text-align: center; font-weight: 600; color: #333;">Progress</th>
			
 
				+                        <th style="padding: 8px; text-align: left; font-weight: 600; color: #333;">Created</th>
			
 
				+                    </tr>
			
 
				+                </thead>
			
 
				+                <tbody>
			
 
				+                    {''.join(rows)}
			
 
				+                    {footer}
			
 
				+                </tbody>
			
 
				+            </table>
			
 
				+        </div>
			
 
				+    ''')
			
 
				+
			
 
				+
			
 
				 class SeedAdmin(ConfigEditorMixin, BaseModelAdmin):
			
 
				     list_display = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str', 'crawls', 'num_crawls', 'num_snapshots')
			
 
				     sort_fields = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str')
			
 
				     search_fields = ('id', 'created_by__username', 'label', 'notes', 'uri', 'extractor', 'tags_str')
			
 
				 
			
 
				     readonly_fields = ('created_at', 'modified_at', 'scheduled_crawls', 'crawls', 'snapshots', 'contents')
			
 
				-    fields = ('label', 'notes', 'uri', 'extractor', 'tags_str', 'config', 'created_by', *readonly_fields)
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Source', {
			
 
				+            'fields': ('uri', 'contents'),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Info', {
			
 
				+            'fields': ('label', 'notes', 'tags_str'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Settings', {
			
 
				+            'fields': ('extractor', 'config'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Metadata', {
			
 
				+            'fields': ('created_by', 'created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Crawls', {
			
 
				+            'fields': ('scheduled_crawls', 'crawls'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Snapshots', {
			
 
				+            'fields': ('snapshots',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('extractor', 'created_by')
			
 
				     ordering = ['-created_at']
			
@@ -51,22 +194,19 @@ class SeedAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				         )) or mark_safe('<i>No Crawls yet...</i>')
			
 
				 
			
 
				     def snapshots(self, obj):
			
 
				-        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
			
 
				-            (snapshot.admin_change_url, snapshot)
			
 
				-            for snapshot in obj.snapshot_set.all().order_by('-created_at')[:20]
			
 
				-        )) or mark_safe('<i>No Snapshots yet...</i>')
			
 
				+        return render_snapshots_list(obj.snapshot_set.all())
			
 
				 
			
 
				     def contents(self, obj):
			
 
				-        if obj.uri.startswith('file:///data/'):
			
 
				-            source_file = DATA_DIR / obj.uri.replace('file:///data/', '', 1)
			
 
				+        source_file = obj.get_file_path()
			
 
				+        if source_file:
			
 
				             contents = ""
			
 
				             try:
			
 
				                 contents = source_file.read_text().strip()[:14_000]
			
 
				             except Exception as e:
			
 
				                 contents = f'Error reading {source_file}: {e}'
			
 
				-                
			
 
				+
			
 
				             return format_html('<b><code>{}</code>:</b><br/><pre>{}</pre>', source_file, contents)
			
 
				-        
			
 
				+
			
 
				         return format_html('See URLs here: <a href="{}">{}</a>', obj.uri, obj.uri)
			
 
				 
			
 
				 
			
@@ -78,7 +218,37 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				     search_fields = ('id', 'created_by__username', 'max_depth', 'label', 'notes', 'seed_id', 'schedule_id', 'status', 'seed__uri')
			
 
				 
			
 
				     readonly_fields = ('created_at', 'modified_at', 'snapshots', 'seed_urls_editor')
			
 
				-    fields = ('label', 'notes', 'seed_urls_editor', 'config', 'status', 'retry_at', 'max_depth', 'seed', 'schedule', 'created_by', 'created_at', 'modified_at', 'snapshots')
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('URLs', {
			
 
				+            'fields': ('seed_urls_editor',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Info', {
			
 
				+            'fields': ('label', 'notes'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Settings', {
			
 
				+            'fields': ('max_depth', 'config'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Status', {
			
 
				+            'fields': ('status', 'retry_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Relations', {
			
 
				+            'fields': ('seed', 'schedule', 'created_by'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Snapshots', {
			
 
				+            'fields': ('snapshots',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('max_depth', 'seed', 'schedule', 'created_by', 'status', 'retry_at')
			
 
				     ordering = ['-created_at', '-retry_at']
			
@@ -90,6 +260,16 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				     def recrawl(self, request, obj):
			
 
				         """Duplicate this crawl as a new crawl with the same seed and settings."""
			
 
				         from django.utils import timezone
			
 
				+        from django.shortcuts import redirect
			
 
				+
			
 
				+        # Validate seed has a URI (required for crawl to start)
			
 
				+        if not obj.seed:
			
 
				+            messages.error(request, 'Cannot recrawl: original crawl has no seed.')
			
 
				+            return redirect('admin:crawls_crawl_change', obj.id)
			
 
				+
			
 
				+        if not obj.seed.uri:
			
 
				+            messages.error(request, 'Cannot recrawl: seed has no URI.')
			
 
				+            return redirect('admin:crawls_crawl_change', obj.id)
			
 
				 
			
 
				         new_crawl = Crawl.objects.create(
			
 
				             seed=obj.seed,
			
@@ -110,8 +290,6 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				             f'It will start processing shortly.'
			
 
				         )
			
 
				 
			
 
				-        # Redirect to the new crawl's change page
			
 
				-        from django.shortcuts import redirect
			
 
				         return redirect('admin:crawls_crawl_change', new_crawl.id)
			
 
				 
			
 
				     def get_urls(self):
			
@@ -133,7 +311,8 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				         except Crawl.DoesNotExist:
			
 
				             return JsonResponse({'success': False, 'error': 'Crawl not found'}, status=404)
			
 
				 
			
 
				-        if not (crawl.seed and crawl.seed.uri and crawl.seed.uri.startswith('file:///data/')):
			
 
				+        source_file = crawl.seed.get_file_path() if crawl.seed else None
			
 
				+        if not source_file:
			
 
				             return JsonResponse({'success': False, 'error': 'Seed is not a local file'}, status=400)
			
 
				 
			
 
				         try:
			
@@ -142,8 +321,6 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				         except json.JSONDecodeError:
			
 
				             return JsonResponse({'success': False, 'error': 'Invalid JSON'}, status=400)
			
 
				 
			
 
				-        source_file = DATA_DIR / crawl.seed.uri.replace('file:///data/', '', 1)
			
 
				-
			
 
				         try:
			
 
				             # Ensure parent directory exists
			
 
				             source_file.parent.mkdir(parents=True, exist_ok=True)
			
@@ -156,10 +333,7 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				         return obj.snapshot_set.count()
			
 
				 
			
 
				     def snapshots(self, obj):
			
 
				-        return format_html_join('<br/>', '<a href="{}">{}</a>', (
			
 
				-            (snapshot.admin_change_url, snapshot)
			
 
				-            for snapshot in obj.snapshot_set.all().order_by('-created_at')[:20]
			
 
				-        )) or mark_safe('<i>No Snapshots yet...</i>')
			
 
				+        return render_snapshots_list(obj.snapshot_set.all())
			
 
				 
			
 
				     @admin.display(description='Schedule', ordering='schedule')
			
 
				     def schedule_str(self, obj):
			
@@ -186,13 +360,12 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				             seed_uri = obj.urls
			
 
				 
			
 
				         # Check if it's a local file we can edit
			
 
				-        is_file = seed_uri.startswith('file:///data/')
			
 
				+        source_file = obj.seed.get_file_path() if obj.seed else None
			
 
				+        is_file = source_file is not None
			
 
				         contents = ""
			
 
				         error = None
			
 
				-        source_file = None
			
 
				 
			
 
				-        if is_file:
			
 
				-            source_file = DATA_DIR / seed_uri.replace('file:///data/', '', 1)
			
 
				+        if is_file and source_file:
			
 
				             try:
			
 
				                 contents = source_file.read_text().strip()
			
 
				             except Exception as e:
			
@@ -337,7 +510,29 @@ class CrawlScheduleAdmin(BaseModelAdmin):
 
				     search_fields = ('id', 'created_by__username', 'label', 'notes', 'schedule_id', 'template_id', 'template__seed__uri')
			
 
				 
			
 
				     readonly_fields = ('created_at', 'modified_at', 'crawls', 'snapshots')
			
 
				-    fields = ('label', 'notes', 'schedule', 'template', 'created_by', *readonly_fields)
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Schedule Info', {
			
 
				+            'fields': ('label', 'notes'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Configuration', {
			
 
				+            'fields': ('schedule', 'template'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Metadata', {
			
 
				+            'fields': ('created_by', 'created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Crawls', {
			
 
				+            'fields': ('crawls',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Snapshots', {
			
 
				+            'fields': ('snapshots',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('created_by',)
			
 
				     ordering = ['-created_at']
			
@@ -362,10 +557,7 @@ class CrawlScheduleAdmin(BaseModelAdmin):
 
				     
			
 
				     def snapshots(self, obj):
			
 
				         crawl_ids = obj.crawl_set.values_list('pk', flat=True)
			
 
				-        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
			
 
				-            (snapshot.admin_change_url, snapshot)
			
 
				-            for snapshot in Snapshot.objects.filter(crawl_id__in=crawl_ids).order_by('-created_at')[:20]
			
 
				-        )) or mark_safe('<i>No Snapshots yet...</i>')
			
 
				+        return render_snapshots_list(Snapshot.objects.filter(crawl_id__in=crawl_ids))
			
 
				 
			
 
				 
			
 
				 def register_admin(admin_site):
			
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -44,9 +44,27 @@ class Seed(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthS
 
				     def __str__(self):
			
 
				         return f'[{self.id}] {self.uri[:64]}'
			
 
				 
			
 
				+    def save(self, *args, **kwargs):
			
 
				+        is_new = self._state.adding
			
 
				+        super().save(*args, **kwargs)
			
 
				+        if is_new:
			
 
				+            from archivebox.misc.logging_util import log_worker_event
			
 
				+            log_worker_event(
			
 
				+                worker_type='DB',
			
 
				+                event='Created Seed',
			
 
				+                indent_level=0,
			
 
				+                metadata={
			
 
				+                    'id': str(self.id),
			
 
				+                    'uri': str(self.uri)[:64],
			
 
				+                    'extractor': self.extractor,
			
 
				+                    'label': self.label or None,
			
 
				+                },
			
 
				+            )
			
 
				+
			
 
				     @classmethod
			
 
				     def from_file(cls, source_file: Path, label: str = '', parser: str = 'auto', tag: str = '', created_by=None, config=None):
			
 
				-        source_path = str(source_file.resolve()).replace(str(CONSTANTS.DATA_DIR), '/data')
			
 
				+        # Use absolute path for file:// URLs so extractors can find the files
			
 
				+        source_path = str(source_file.resolve())
			
 
				         seed, _ = cls.objects.get_or_create(
			
 
				             label=label or source_file.name, uri=f'file://{source_path}',
			
 
				             created_by_id=getattr(created_by, 'pk', created_by) or get_or_create_system_user_pk(),
			
@@ -62,6 +80,25 @@ class Seed(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthS
 
				     def api_url(self) -> str:
			
 
				         return reverse_lazy('api-1:get_seed', args=[self.id])
			
 
				 
			
 
				+    def get_file_path(self) -> Path | None:
			
 
				+        """
			
 
				+        Get the filesystem path for file:// URIs.
			
 
				+        Handles both old format (file:///data/...) and new format (file:///absolute/path).
			
 
				+        Returns None if URI is not a file:// URI.
			
 
				+        """
			
 
				+        if not self.uri.startswith('file://'):
			
 
				+            return None
			
 
				+
			
 
				+        # Remove file:// prefix
			
 
				+        path_str = self.uri.replace('file://', '', 1)
			
 
				+
			
 
				+        # Handle old format: file:///data/... -> DATA_DIR/...
			
 
				+        if path_str.startswith('/data/'):
			
 
				+            return CONSTANTS.DATA_DIR / path_str.replace('/data/', '', 1)
			
 
				+
			
 
				+        # Handle new format: file:///absolute/path
			
 
				+        return Path(path_str)
			
 
				+
			
 
				     @property
			
 
				     def snapshot_set(self) -> QuerySet['Snapshot']:
			
 
				         from core.models import Snapshot
			
@@ -136,6 +173,23 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
 
				     def __str__(self):
			
 
				         return f'[{self.id}] {self.seed.uri[:64] if self.seed else ""}'
			
 
				 
			
 
				+    def save(self, *args, **kwargs):
			
 
				+        is_new = self._state.adding
			
 
				+        super().save(*args, **kwargs)
			
 
				+        if is_new:
			
 
				+            from archivebox.misc.logging_util import log_worker_event
			
 
				+            log_worker_event(
			
 
				+                worker_type='DB',
			
 
				+                event='Created Crawl',
			
 
				+                indent_level=1,
			
 
				+                metadata={
			
 
				+                    'id': str(self.id),
			
 
				+                    'seed_uri': str(self.seed.uri)[:64] if self.seed else None,
			
 
				+                    'max_depth': self.max_depth,
			
 
				+                    'status': self.status,
			
 
				+                },
			
 
				+            )
			
 
				+
			
 
				     @classmethod
			
 
				     def from_seed(cls, seed: Seed, max_depth: int = 0, persona: str = 'Default', tags_str: str = '', config=None, created_by=None):
			
 
				         crawl, _ = cls.objects.get_or_create(
			
--- a/archivebox/crawls/statemachines.py
+++ b/archivebox/crawls/statemachines.py
@@ -36,13 +36,19 @@ class CrawlMachine(StateMachine, strict_states=True):
 
				         super().__init__(crawl, *args, **kwargs)
			
 
				     
			
 
				     def __repr__(self) -> str:
			
 
				-        return f'[grey53]Crawl\\[{self.crawl.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.crawl.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				-    
			
 
				+        return f'Crawl[{self.crawl.id}]'
			
 
				+
			
 
				     def __str__(self) -> str:
			
 
				         return self.__repr__()
			
 
				         
			
 
				     def can_start(self) -> bool:
			
 
				-        return bool(self.crawl.seed and self.crawl.seed.uri)
			
 
				+        if not self.crawl.seed:
			
 
				+            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: no seed[/red]')
			
 
				+            return False
			
 
				+        if not self.crawl.seed.uri:
			
 
				+            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: seed has no URI[/red]')
			
 
				+            return False
			
 
				+        return True
			
 
				         
			
 
				     def is_finished(self) -> bool:
			
 
				         from core.models import Snapshot, ArchiveResult
			
@@ -73,25 +79,121 @@ class CrawlMachine(StateMachine, strict_states=True):
 
				 
			
 
				     @started.enter
			
 
				     def enter_started(self):
			
 
				-        print(f'{self}.on_started(): [blue]↳ STARTED[/blue] crawl.run()')
			
 
				+        # Suppressed: state transition logs
			
 
				         # lock the crawl object while we create snapshots
			
 
				         self.crawl.update_for_workers(
			
 
				             retry_at=timezone.now() + timedelta(seconds=5),
			
 
				             status=Crawl.StatusChoices.QUEUED,
			
 
				         )
			
 
				 
			
 
				-        # Run the crawl - creates root snapshot and processes queued URLs
			
 
				-        self.crawl.run()
			
 
				+        try:
			
 
				+            # Run on_Crawl hooks to validate/install dependencies
			
 
				+            self._run_crawl_hooks()
			
 
				 
			
 
				-        # only update status to STARTED once snapshots are created
			
 
				-        self.crawl.update_for_workers(
			
 
				-            retry_at=timezone.now() + timedelta(seconds=5),
			
 
				-            status=Crawl.StatusChoices.STARTED,
			
 
				+            # Run the crawl - creates root snapshot and processes queued URLs
			
 
				+            self.crawl.run()
			
 
				+
			
 
				+            # only update status to STARTED once snapshots are created
			
 
				+            self.crawl.update_for_workers(
			
 
				+                retry_at=timezone.now() + timedelta(seconds=5),
			
 
				+                status=Crawl.StatusChoices.STARTED,
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            print(f'[red]⚠️ Crawl {self.crawl.id} failed to start: {e}[/red]')
			
 
				+            import traceback
			
 
				+            traceback.print_exc()
			
 
				+            # Re-raise so the worker knows it failed
			
 
				+            raise
			
 
				+
			
 
				+    def _run_crawl_hooks(self):
			
 
				+        """Run on_Crawl hooks to validate/install dependencies."""
			
 
				+        from pathlib import Path
			
 
				+        from archivebox.hooks import run_hooks, discover_hooks
			
 
				+        from archivebox.config import CONSTANTS
			
 
				+
			
 
				+        # Discover and run all on_Crawl hooks
			
 
				+        hooks = discover_hooks('Crawl')
			
 
				+        if not hooks:
			
 
				+            return
			
 
				+
			
 
				+        # Create a temporary output directory for hook results
			
 
				+        output_dir = Path(CONSTANTS.DATA_DIR) / 'tmp' / f'crawl_{self.crawl.id}'
			
 
				+        output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        # Run all on_Crawl hooks
			
 
				+        results = run_hooks(
			
 
				+            event_name='Crawl',
			
 
				+            output_dir=output_dir,
			
 
				+            timeout=60,
			
 
				+            config_objects=[self.crawl, self.crawl.seed] if self.crawl.seed else [self.crawl],
			
 
				+            crawl_id=str(self.crawl.id),
			
 
				+            seed_uri=self.crawl.seed.uri if self.crawl.seed else '',
			
 
				         )
			
 
				 
			
 
				-    @sealed.enter        
			
 
				+        # Process hook results - parse JSONL output and create DB objects
			
 
				+        self._process_hook_results(results)
			
 
				+
			
 
				+    def _process_hook_results(self, results: list):
			
 
				+        """Process JSONL output from hooks to create InstalledBinary and update Machine config."""
			
 
				+        import json
			
 
				+        from machine.models import Machine, InstalledBinary
			
 
				+
			
 
				+        machine = Machine.current()
			
 
				+
			
 
				+        for result in results:
			
 
				+            if result['returncode'] != 0:
			
 
				+                # Hook failed - might indicate missing dependency
			
 
				+                continue
			
 
				+
			
 
				+            # Parse JSONL output
			
 
				+            for line in result['stdout'].strip().split('\n'):
			
 
				+                if not line.strip():
			
 
				+                    continue
			
 
				+
			
 
				+                try:
			
 
				+                    obj = json.loads(line)
			
 
				+                    obj_type = obj.get('type')
			
 
				+
			
 
				+                    if obj_type == 'InstalledBinary':
			
 
				+                        # Create or update InstalledBinary record
			
 
				+                        # Skip if essential fields are missing
			
 
				+                        if not obj.get('name') or not obj.get('abspath') or not obj.get('version'):
			
 
				+                            continue
			
 
				+
			
 
				+                        InstalledBinary.objects.update_or_create(
			
 
				+                            machine=machine,
			
 
				+                            name=obj['name'],
			
 
				+                            defaults={
			
 
				+                                'abspath': obj['abspath'],
			
 
				+                                'version': obj['version'],
			
 
				+                                'sha256': obj.get('sha256') or '',
			
 
				+                                'binprovider': obj.get('binprovider') or 'env',
			
 
				+                            }
			
 
				+                        )
			
 
				+
			
 
				+                    elif obj_type == 'Machine':
			
 
				+                        # Update Machine config
			
 
				+                        method = obj.get('_method', 'update')
			
 
				+                        if method == 'update':
			
 
				+                            key = obj.get('key', '')
			
 
				+                            value = obj.get('value')
			
 
				+                            if key.startswith('config/'):
			
 
				+                                config_key = key[7:]  # Remove 'config/' prefix
			
 
				+                                machine.config[config_key] = value
			
 
				+                                machine.save(update_fields=['config'])
			
 
				+
			
 
				+                    elif obj_type == 'Dependency':
			
 
				+                        # Dependency request - could trigger installation
			
 
				+                        # For now just log it (installation hooks would be separate)
			
 
				+                        print(f'[yellow]Dependency requested: {obj.get("bin_name")}[/yellow]')
			
 
				+
			
 
				+                except json.JSONDecodeError:
			
 
				+                    # Not JSON, skip
			
 
				+                    continue
			
 
				+
			
 
				+    @sealed.enter
			
 
				     def enter_sealed(self):
			
 
				-        print(f'{self}.on_sealed(): [blue]↳ SEALED[/blue] crawl.retry_at=None')
			
 
				+        # Suppressed: state transition logs
			
 
				         self.crawl.update_for_workers(
			
 
				             retry_at=None,
			
 
				             status=Crawl.StatusChoices.SEALED,
			
--- a/archivebox/hooks.py
+++ b/archivebox/hooks.py
@@ -245,6 +245,14 @@ def run_hook(
 
				     env.setdefault('USER_AGENT', str(getattr(config, 'USER_AGENT', '')))
			
 
				     env.setdefault('RESOLUTION', str(getattr(config, 'RESOLUTION', '')))
			
 
				 
			
 
				+    # Pass SEARCH_BACKEND_ENGINE from new-style config
			
 
				+    try:
			
 
				+        from archivebox.config.configset import get_config
			
 
				+        search_config = get_config()
			
 
				+        env.setdefault('SEARCH_BACKEND_ENGINE', str(search_config.get('SEARCH_BACKEND_ENGINE', 'ripgrep')))
			
 
				+    except Exception:
			
 
				+        env.setdefault('SEARCH_BACKEND_ENGINE', 'ripgrep')
			
 
				+
			
 
				     # Create output directory if needed
			
 
				     output_dir.mkdir(parents=True, exist_ok=True)
			
 
				 
			
--- a/archivebox/logs/errors.log
+++ b/archivebox/logs/errors.log
@@ -0,0 +1,2 @@
 
				+
			
 
				+> /Users/squash/Local/Code/archiveboxes/archivebox-nue/archivebox/cli/archivebox_init.py --force; TS=2025-12-25__08:03:12 VERSION=0.9.0rc1 IN_DOCKER=False IS_TTY=False
			
--- a/archivebox/machine/admin.py
+++ b/archivebox/machine/admin.py
@@ -12,7 +12,33 @@ class MachineAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				     sort_fields = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
			
 
				 
			
 
				     readonly_fields = ('guid', 'created_at', 'modified_at', 'ips')
			
 
				-    fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'config', 'num_uses_succeeded', 'num_uses_failed')
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Identity', {
			
 
				+            'fields': ('hostname', 'guid', 'ips'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Hardware', {
			
 
				+            'fields': ('hw_manufacturer', 'hw_product', 'hw_uuid', 'hw_in_docker', 'hw_in_vm'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Operating System', {
			
 
				+            'fields': ('os_platform', 'os_family', 'os_arch', 'os_kernel', 'os_release'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Statistics', {
			
 
				+            'fields': ('stats', 'num_uses_succeeded', 'num_uses_failed'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Configuration', {
			
 
				+            'fields': ('config',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
			
 
				     ordering = ['-created_at']
			
@@ -33,7 +59,29 @@ class NetworkInterfaceAdmin(BaseModelAdmin):
 
				     search_fields = ('id', 'machine__id', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
			
 
				 
			
 
				     readonly_fields = ('machine', 'created_at', 'modified_at', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
			
 
				-    fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country', 'num_uses_succeeded', 'num_uses_failed')
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Machine', {
			
 
				+            'fields': ('machine',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Network', {
			
 
				+            'fields': ('iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Location', {
			
 
				+            'fields': ('hostname', 'isp', 'city', 'region', 'country'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Usage', {
			
 
				+            'fields': ('num_uses_succeeded', 'num_uses_failed'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('isp', 'country', 'region')
			
 
				     ordering = ['-created_at']
			
@@ -54,7 +102,25 @@ class DependencyAdmin(ConfigEditorMixin, BaseModelAdmin):
 
				     search_fields = ('id', 'bin_name', 'bin_providers')
			
 
				 
			
 
				     readonly_fields = ('id', 'created_at', 'modified_at', 'is_installed', 'installed_count')
			
 
				-    fields = ('bin_name', 'bin_providers', 'custom_cmds', 'config', *readonly_fields)
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Binary', {
			
 
				+            'fields': ('bin_name', 'bin_providers', 'is_installed', 'installed_count'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Commands', {
			
 
				+            'fields': ('custom_cmds',),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Configuration', {
			
 
				+            'fields': ('config',),
			
 
				+            'classes': ('card', 'wide'),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('id', 'created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('bin_providers', 'created_at')
			
 
				     ordering = ['-created_at']
			
@@ -82,7 +148,29 @@ class InstalledBinaryAdmin(BaseModelAdmin):
 
				     search_fields = ('id', 'machine__id', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'dependency__bin_name')
			
 
				 
			
 
				     readonly_fields = ('created_at', 'modified_at')
			
 
				-    fields = ('machine', 'dependency', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
			
 
				+
			
 
				+    fieldsets = (
			
 
				+        ('Binary Info', {
			
 
				+            'fields': ('name', 'dependency', 'binprovider'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Location', {
			
 
				+            'fields': ('machine', 'abspath'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Version', {
			
 
				+            'fields': ('version', 'sha256'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Usage', {
			
 
				+            'fields': ('num_uses_succeeded', 'num_uses_failed'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+        ('Timestamps', {
			
 
				+            'fields': ('created_at', 'modified_at'),
			
 
				+            'classes': ('card',),
			
 
				+        }),
			
 
				+    )
			
 
				 
			
 
				     list_filter = ('name', 'binprovider', 'machine_id', 'dependency')
			
 
				     ordering = ['-created_at']
			
--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@@ -544,16 +544,21 @@ def log_worker_event(
 
				 
			
 
				     # Build worker identifier
			
 
				     worker_parts = [worker_type]
			
 
				-    if pid:
			
 
				+    # Don't add pid/worker_id for DB operations (they happen in whatever process is running)
			
 
				+    if pid and worker_type != 'DB':
			
 
				         worker_parts.append(f'pid={pid}')
			
 
				-    if worker_id and worker_type in ('CrawlWorker', 'Orchestrator'):
			
 
				+    if worker_id and worker_type in ('CrawlWorker', 'Orchestrator') and worker_type != 'DB':
			
 
				         worker_parts.append(f'id={worker_id}')
			
 
				-    if url and worker_type == 'SnapshotWorker':
			
 
				+    if url and worker_type in ('SnapshotWorker', 'DB'):
			
 
				         worker_parts.append(f'url={truncate_url(url)}')
			
 
				-    if extractor and worker_type == 'ArchiveResultWorker':
			
 
				+    if extractor and worker_type in ('ArchiveResultWorker', 'DB'):
			
 
				         worker_parts.append(f'extractor={extractor}')
			
 
				 
			
 
				-    worker_label = f'{worker_parts[0]}[{", ".join(worker_parts[1:])}]'
			
 
				+    # Format worker label - only add brackets if there are additional identifiers
			
 
				+    if len(worker_parts) > 1:
			
 
				+        worker_label = f'{worker_parts[0]}[{", ".join(worker_parts[1:])}]'
			
 
				+    else:
			
 
				+        worker_label = worker_parts[0]
			
 
				 
			
 
				     # Build metadata string
			
 
				     metadata_str = ''
			
@@ -579,12 +584,14 @@ def log_worker_event(
 
				                 meta_parts.append(f'{k}: {len(v)}')
			
 
				             else:
			
 
				                 meta_parts.append(f'{k}: {v}')
			
 
				-        metadata_str = ' {' + ', '.join(meta_parts) + '}'
			
 
				+        metadata_str = ' | '.join(meta_parts)
			
 
				 
			
 
				     # Determine color based on event
			
 
				     color = 'white'
			
 
				     if event in ('Starting...', 'Started', 'STARTED', 'Started in background'):
			
 
				         color = 'green'
			
 
				+    elif event.startswith('Created'):
			
 
				+        color = 'cyan'  # DB creation events
			
 
				     elif event in ('Processing...', 'PROCESSING'):
			
 
				         color = 'blue'
			
 
				     elif event in ('Completed', 'COMPLETED', 'All work complete'):
			
@@ -606,8 +613,9 @@ def log_worker_event(
 
				     text.append(indent)  # Indentation
			
 
				     # Append worker label and event with color
			
 
				     text.append(f'{worker_label} {event}{error_str}', style=color)
			
 
				-    # Append metadata without color
			
 
				-    text.append(metadata_str)
			
 
				+    # Append metadata without color (add separator if metadata exists)
			
 
				+    if metadata_str:
			
 
				+        text.append(f' | {metadata_str}')
			
 
				 
			
 
				     CONSOLE.print(text)
			
 
				 
			
--- a/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js
+++ b/archivebox/plugins/accessibility/on_Snapshot__39_accessibility.js
@@ -21,9 +21,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'accessibility';
			
 
				-const OUTPUT_DIR = 'accessibility';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'accessibility.json';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -60,10 +60,7 @@ function getCdpUrl() {
 
				 
			
 
				 // Extract accessibility info
			
 
				 async function extractAccessibility(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/archive_org/on_Snapshot__13_archive_org.py
+++ b/archivebox/plugins/archive_org/on_Snapshot__13_archive_org.py
@@ -24,7 +24,7 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'archive_org'
			
 
				-OUTPUT_DIR = 'archive_org'
			
 
				+OUTPUT_DIR = '.'
			
 
				 OUTPUT_FILE = 'archive.org.txt'
			
 
				 
			
 
				 
			
--- a/archivebox/plugins/chrome_cleanup/on_Snapshot__45_chrome_cleanup.py
+++ b/archivebox/plugins/chrome_cleanup/on_Snapshot__45_chrome_cleanup.py
@@ -26,7 +26,7 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'chrome_cleanup'
			
 
				-CHROME_SESSION_DIR = 'chrome_session'
			
 
				+CHROME_SESSION_DIR = '../chrome_session'
			
 
				 
			
 
				 
			
 
				 def get_env(name: str, default: str = '') -> str:
			
--- a/archivebox/plugins/chrome_navigate/on_Snapshot__30_chrome_navigate.js
+++ b/archivebox/plugins/chrome_navigate/on_Snapshot__30_chrome_navigate.js
@@ -31,7 +31,7 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'chrome_navigate';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
--- a/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.js
+++ b/archivebox/plugins/consolelog/on_Snapshot__21_consolelog.js
@@ -21,9 +21,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'consolelog';
			
 
				-const OUTPUT_DIR = 'consolelog';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'console.jsonl';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -86,10 +86,7 @@ async function serializeArgs(args) {
 
				 async function captureConsoleLogs(url) {
			
 
				     const captureTimeout = (getEnvInt('CONSOLELOG_TIMEOUT') || 5) * 1000;
			
 
				 
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     // Clear existing file
			
--- a/archivebox/plugins/dom/on_Snapshot__36_dom.js
+++ b/archivebox/plugins/dom/on_Snapshot__36_dom.js
@@ -24,9 +24,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'dom';
			
 
				-const OUTPUT_DIR = 'dom';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'output.html';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -58,7 +58,7 @@ function getEnvInt(name, defaultValue = 0) {
 
				 }
			
 
				 
			
 
				 // Check if staticfile extractor already downloaded this URL
			
 
				-const STATICFILE_DIR = 'staticfile';
			
 
				+const STATICFILE_DIR = '../staticfile';
			
 
				 function hasStaticFileOutput() {
			
 
				     return fs.existsSync(STATICFILE_DIR) && fs.readdirSync(STATICFILE_DIR).length > 0;
			
 
				 }
			
@@ -114,10 +114,7 @@ async function dumpDom(url) {
 
				 
			
 
				     const { width, height } = parseResolution(resolution);
			
 
				 
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/favicon/on_Snapshot__11_favicon.py
+++ b/archivebox/plugins/favicon/on_Snapshot__11_favicon.py
@@ -26,7 +26,7 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'favicon'
			
 
				-OUTPUT_DIR = 'favicon'
			
 
				+OUTPUT_DIR = '.'
			
 
				 OUTPUT_FILE = 'favicon.ico'
			
 
				 
			
 
				 
			
--- a/archivebox/plugins/git/on_Snapshot__12_git.py
+++ b/archivebox/plugins/git/on_Snapshot__12_git.py
@@ -26,7 +26,7 @@ import rich_click as click
 
				 EXTRACTOR_NAME = 'git'
			
 
				 BIN_NAME = 'git'
			
 
				 BIN_PROVIDERS = 'apt,brew,env'
			
 
				-OUTPUT_DIR = 'repo'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 
			
 
				 def get_env(name: str, default: str = '') -> str:
			
--- a/archivebox/plugins/headers/on_Snapshot__33_headers.js
+++ b/archivebox/plugins/headers/on_Snapshot__33_headers.js
@@ -22,9 +22,9 @@ const http = require('http');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'headers';
			
 
				-const OUTPUT_DIR = 'headers';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'headers.json';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 const CHROME_HEADERS_FILE = 'response_headers.json';
			
 
				 
			
 
				 // Parse command line arguments
			
@@ -110,10 +110,7 @@ function fetchHeaders(url) {
 
				 }
			
 
				 
			
 
				 async function extractHeaders(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     // Try Chrome session first
			
--- a/archivebox/plugins/htmltotext/on_Snapshot__54_htmltotext.py
+++ b/archivebox/plugins/htmltotext/on_Snapshot__54_htmltotext.py
@@ -28,7 +28,7 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'htmltotext'
			
 
				-OUTPUT_DIR = 'htmltotext'
			
 
				+OUTPUT_DIR = '.'
			
 
				 OUTPUT_FILE = 'htmltotext.txt'
			
 
				 
			
 
				 
			
@@ -114,9 +114,8 @@ def extract_htmltotext(url: str) -> tuple[bool, str | None, str]:
 
				     if not text or len(text) < 10:
			
 
				         return False, None, 'No meaningful text extracted from HTML'
			
 
				 
			
 
				-    # Create output directory and write output
			
 
				+    # Output directory is current directory (hook already runs in output dir)
			
 
				     output_dir = Path(OUTPUT_DIR)
			
 
				-    output_dir.mkdir(exist_ok=True)
			
 
				     output_path = output_dir / OUTPUT_FILE
			
 
				     output_path.write_text(text, encoding='utf-8')
			
 
				 
			
--- a/archivebox/plugins/media/on_Snapshot__51_media.py
+++ b/archivebox/plugins/media/on_Snapshot__51_media.py
@@ -39,7 +39,7 @@ import rich_click as click
 
				 EXTRACTOR_NAME = 'media'
			
 
				 BIN_NAME = 'yt-dlp'
			
 
				 BIN_PROVIDERS = 'pip,apt,brew,env'
			
 
				-OUTPUT_DIR = 'media'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 
			
 
				 def get_env(name: str, default: str = '') -> str:
			
@@ -62,7 +62,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
				         return default
			
 
				 
			
 
				 
			
 
				-STATICFILE_DIR = 'staticfile'
			
 
				+STATICFILE_DIR = '../staticfile'
			
 
				 
			
 
				 def has_staticfile_output() -> bool:
			
 
				     """Check if staticfile extractor already downloaded this URL."""
			
@@ -129,9 +129,8 @@ def save_media(url: str, binary: str) -> tuple[bool, str | None, str]:
 
				     extra_args = get_env('YTDLP_EXTRA_ARGS') or get_env('YOUTUBEDL_EXTRA_ARGS', '')
			
 
				     media_max_size = get_env('MEDIA_MAX_SIZE', '750m')
			
 
				 
			
 
				-    # Create output directory
			
 
				+    # Output directory is current directory (hook already runs in output dir)
			
 
				     output_dir = Path(OUTPUT_DIR)
			
 
				-    output_dir.mkdir(exist_ok=True)
			
 
				 
			
 
				     # Build command (later options take precedence)
			
 
				     cmd = [
			
--- a/archivebox/plugins/mercury/on_Snapshot__53_mercury.py
+++ b/archivebox/plugins/mercury/on_Snapshot__53_mercury.py
@@ -27,7 +27,7 @@ import rich_click as click
 
				 EXTRACTOR_NAME = 'mercury'
			
 
				 BIN_NAME = 'postlight-parser'
			
 
				 BIN_PROVIDERS = 'npm,env'
			
 
				-OUTPUT_DIR = 'mercury'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 
			
 
				 def get_env(name: str, default: str = '') -> str:
			
@@ -72,9 +72,8 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
 
				     """
			
 
				     timeout = get_env_int('TIMEOUT', 60)
			
 
				 
			
 
				-    # Create output directory
			
 
				+    # Output directory is current directory (hook already runs in output dir)
			
 
				     output_dir = Path(OUTPUT_DIR)
			
 
				-    output_dir.mkdir(exist_ok=True)
			
 
				 
			
 
				     try:
			
 
				         # Get text version
			
--- a/archivebox/plugins/parse_dom_outlinks/on_Snapshot__40_parse_dom_outlinks.js
+++ b/archivebox/plugins/parse_dom_outlinks/on_Snapshot__40_parse_dom_outlinks.js
@@ -24,10 +24,10 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'parse_dom_outlinks';
			
 
				-const OUTPUT_DIR = 'parse_dom_outlinks';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'outlinks.json';
			
 
				 const URLS_FILE = 'urls.jsonl';  // For crawl system
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -64,10 +64,7 @@ function getCdpUrl() {
 
				 
			
 
				 // Extract outlinks
			
 
				 async function extractOutlinks(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/pdf/on_Snapshot__35_pdf.js
+++ b/archivebox/plugins/pdf/on_Snapshot__35_pdf.js
@@ -23,9 +23,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'pdf';
			
 
				-const OUTPUT_DIR = 'pdf';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'output.pdf';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -57,7 +57,7 @@ function getEnvInt(name, defaultValue = 0) {
 
				 }
			
 
				 
			
 
				 // Check if staticfile extractor already downloaded this URL
			
 
				-const STATICFILE_DIR = 'staticfile';
			
 
				+const STATICFILE_DIR = '../staticfile';
			
 
				 function hasStaticFileOutput() {
			
 
				     return fs.existsSync(STATICFILE_DIR) && fs.readdirSync(STATICFILE_DIR).length > 0;
			
 
				 }
			
@@ -113,10 +113,7 @@ async function printToPdf(url) {
 
				 
			
 
				     const { width, height } = parseResolution(resolution);
			
 
				 
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/readability/on_Snapshot__52_readability.py
+++ b/archivebox/plugins/readability/on_Snapshot__52_readability.py
@@ -29,7 +29,7 @@ import rich_click as click
 
				 EXTRACTOR_NAME = 'readability'
			
 
				 BIN_NAME = 'readability-extractor'
			
 
				 BIN_PROVIDERS = 'npm,env'
			
 
				-OUTPUT_DIR = 'readability'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 
			
 
				 def get_env(name: str, default: str = '') -> str:
			
@@ -101,9 +101,8 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
 
				     if not html_source:
			
 
				         return False, None, 'No HTML source found (run singlefile, dom, or wget first)'
			
 
				 
			
 
				-    # Create output directory
			
 
				+    # Output directory is current directory (hook already runs in output dir)
			
 
				     output_dir = Path(OUTPUT_DIR)
			
 
				-    output_dir.mkdir(exist_ok=True)
			
 
				 
			
 
				     try:
			
 
				         # Run readability-extractor (outputs JSON by default)
			
--- a/archivebox/plugins/redirects/on_Snapshot__22_redirects.js
+++ b/archivebox/plugins/redirects/on_Snapshot__22_redirects.js
@@ -21,9 +21,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'redirects';
			
 
				-const OUTPUT_DIR = 'redirects';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'redirects.json';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -60,10 +60,7 @@ function getCdpUrl() {
 
				 
			
 
				 // Track redirect chain
			
 
				 async function trackRedirects(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/responses/on_Snapshot__24_responses.js
+++ b/archivebox/plugins/responses/on_Snapshot__24_responses.js
@@ -26,8 +26,8 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'responses';
			
 
				-const OUTPUT_DIR = 'responses';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const OUTPUT_DIR = '.';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Resource types to capture (by default, capture everything)
			
 
				 const DEFAULT_TYPES = ['script', 'stylesheet', 'font', 'image', 'media', 'xhr', 'websocket'];
			
@@ -149,10 +149,8 @@ async function archiveResponses(originalUrl) {
 
				     const typesStr = getEnv('RESPONSES_TYPES', DEFAULT_TYPES.join(','));
			
 
				     const typesToSave = typesStr.split(',').map(t => t.trim().toLowerCase());
			
 
				 
			
 
				-    // Create output directories
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				+    // Create subdirectories for organizing responses
			
 
				     const allDir = path.join(OUTPUT_DIR, 'all');
			
 
				     if (!fs.existsSync(allDir)) {
			
 
				         fs.mkdirSync(allDir, { recursive: true });
			
--- a/archivebox/plugins/screenshot/on_Snapshot__34_screenshot.js
+++ b/archivebox/plugins/screenshot/on_Snapshot__34_screenshot.js
@@ -23,9 +23,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'screenshot';
			
 
				-const OUTPUT_DIR = 'screenshot';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'screenshot.png';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -57,7 +57,7 @@ function getEnvInt(name, defaultValue = 0) {
 
				 }
			
 
				 
			
 
				 // Check if staticfile extractor already downloaded this URL
			
 
				-const STATICFILE_DIR = 'staticfile';
			
 
				+const STATICFILE_DIR = '../staticfile';
			
 
				 function hasStaticFileOutput() {
			
 
				     return fs.existsSync(STATICFILE_DIR) && fs.readdirSync(STATICFILE_DIR).length > 0;
			
 
				 }
			
@@ -116,10 +116,7 @@ async function takeScreenshot(url) {
 
				 
			
 
				     const { width, height } = parseResolution(resolution);
			
 
				 
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/search_backend_ripgrep/on_Crawl__00_validate_ripgrep.py
+++ b/archivebox/plugins/search_backend_ripgrep/on_Crawl__00_validate_ripgrep.py
@@ -0,0 +1,131 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Validation hook for ripgrep binary.
			
 
				+
			
 
				+Only runs if SEARCH_BACKEND_ENGINE is set to 'ripgrep'.
			
 
				+Outputs JSONL for InstalledBinary and Machine config updates.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import json
			
 
				+import shutil
			
 
				+import hashlib
			
 
				+import subprocess
			
 
				+from pathlib import Path
			
 
				+
			
 
				+
			
 
				+def get_binary_version(abspath: str) -> str | None:
			
 
				+    """Get version string from ripgrep binary."""
			
 
				+    try:
			
 
				+        result = subprocess.run(
			
 
				+            [abspath, '--version'],
			
 
				+            capture_output=True,
			
 
				+            text=True,
			
 
				+            timeout=5,
			
 
				+        )
			
 
				+        if result.returncode == 0 and result.stdout:
			
 
				+            # ripgrep version string: "ripgrep 14.1.0"
			
 
				+            first_line = result.stdout.strip().split('\n')[0]
			
 
				+            parts = first_line.split()
			
 
				+            for i, part in enumerate(parts):
			
 
				+                if part.lower() == 'ripgrep' and i + 1 < len(parts):
			
 
				+                    return parts[i + 1]
			
 
				+            # Try to find version number pattern
			
 
				+            for part in parts:
			
 
				+                if part[0].isdigit() and '.' in part:
			
 
				+                    return part
			
 
				+            return first_line[:32]
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def get_binary_hash(abspath: str) -> str | None:
			
 
				+    """Get SHA256 hash of binary."""
			
 
				+    try:
			
 
				+        with open(abspath, 'rb') as f:
			
 
				+            return hashlib.sha256(f.read()).hexdigest()
			
 
				+    except Exception:
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def find_ripgrep() -> dict | None:
			
 
				+    """Find ripgrep binary using shutil.which or env var."""
			
 
				+    # Check env var first - if it's an absolute path and exists, use it
			
 
				+    ripgrep_env = os.environ.get('RIPGREP_BINARY', '')
			
 
				+    if ripgrep_env and '/' in ripgrep_env and Path(ripgrep_env).is_file():
			
 
				+        abspath = ripgrep_env
			
 
				+    else:
			
 
				+        # Otherwise try shutil.which with the env var as the binary name
			
 
				+        abspath = shutil.which(ripgrep_env) if ripgrep_env else None
			
 
				+        if not abspath:
			
 
				+            abspath = shutil.which('rg')
			
 
				+
			
 
				+    if abspath and Path(abspath).is_file():
			
 
				+        return {
			
 
				+            'name': 'rg',
			
 
				+            'abspath': abspath,
			
 
				+            'version': get_binary_version(abspath),
			
 
				+            'sha256': get_binary_hash(abspath),
			
 
				+            'binprovider': 'env',
			
 
				+        }
			
 
				+
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """Validate ripgrep binary and output JSONL."""
			
 
				+
			
 
				+    # Check if ripgrep search backend is enabled
			
 
				+    search_backend = os.environ.get('SEARCH_BACKEND_ENGINE', '').lower()
			
 
				+
			
 
				+    if search_backend != 'ripgrep':
			
 
				+        # No-op: ripgrep is not the active search backend
			
 
				+        sys.exit(0)
			
 
				+
			
 
				+    result = find_ripgrep()
			
 
				+
			
 
				+    if result and result.get('abspath'):
			
 
				+        # Output InstalledBinary
			
 
				+        print(json.dumps({
			
 
				+            'type': 'InstalledBinary',
			
 
				+            'name': result['name'],
			
 
				+            'abspath': result['abspath'],
			
 
				+            'version': result['version'],
			
 
				+            'sha256': result['sha256'],
			
 
				+            'binprovider': result['binprovider'],
			
 
				+        }))
			
 
				+
			
 
				+        # Output Machine config update
			
 
				+        print(json.dumps({
			
 
				+            'type': 'Machine',
			
 
				+            '_method': 'update',
			
 
				+            'key': 'config/RIPGREP_BINARY',
			
 
				+            'value': result['abspath'],
			
 
				+        }))
			
 
				+
			
 
				+        if result['version']:
			
 
				+            print(json.dumps({
			
 
				+                'type': 'Machine',
			
 
				+                '_method': 'update',
			
 
				+                'key': 'config/RIPGREP_VERSION',
			
 
				+                'value': result['version'],
			
 
				+            }))
			
 
				+
			
 
				+        sys.exit(0)
			
 
				+    else:
			
 
				+        # Output Dependency request
			
 
				+        print(json.dumps({
			
 
				+            'type': 'Dependency',
			
 
				+            'bin_name': 'rg',
			
 
				+            'bin_providers': 'apt,brew,cargo,env',
			
 
				+        }))
			
 
				+
			
 
				+        # Exit non-zero to indicate binary not found
			
 
				+        print(f"ripgrep binary not found", file=sys.stderr)
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/archivebox/plugins/search_backend_ripgrep/tests/__init__.py
+++ b/archivebox/plugins/search_backend_ripgrep/tests/__init__.py
--- a/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
+++ b/archivebox/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py
@@ -0,0 +1,306 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Tests for ripgrep binary detection and archivebox install functionality.
			
 
				+
			
 
				+Guards against regressions in:
			
 
				+1. Machine.config overrides not being used in version command
			
 
				+2. Ripgrep hook not resolving binary names via shutil.which()
			
 
				+3. SEARCH_BACKEND_ENGINE not being passed to hook environment
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import json
			
 
				+import shutil
			
 
				+import tempfile
			
 
				+import subprocess
			
 
				+from pathlib import Path
			
 
				+from unittest.mock import patch, MagicMock
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+
			
 
				+def test_ripgrep_hook_detects_binary_from_path():
			
 
				+    """Test that ripgrep hook finds binary using shutil.which() when env var is just a name."""
			
 
				+    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
			
 
				+
			
 
				+    # Skip if rg is not installed
			
 
				+    if not shutil.which('rg'):
			
 
				+        pytest.skip("ripgrep (rg) not installed")
			
 
				+
			
 
				+    # Set SEARCH_BACKEND_ENGINE to enable the hook
			
 
				+    env = os.environ.copy()
			
 
				+    env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
			
 
				+    env['RIPGREP_BINARY'] = 'rg'  # Just the name, not the full path (this was the bug)
			
 
				+
			
 
				+    result = subprocess.run(
			
 
				+        [sys.executable, str(hook_path)],
			
 
				+        capture_output=True,
			
 
				+        text=True,
			
 
				+        env=env,
			
 
				+        timeout=10,
			
 
				+    )
			
 
				+
			
 
				+    assert result.returncode == 0, f"Hook failed: {result.stderr}"
			
 
				+
			
 
				+    # Parse JSONL output
			
 
				+    lines = [line for line in result.stdout.strip().split('\n') if line.strip()]
			
 
				+    assert len(lines) >= 2, "Expected at least 2 JSONL lines (InstalledBinary + Machine config)"
			
 
				+
			
 
				+    installed_binary = json.loads(lines[0])
			
 
				+    assert installed_binary['type'] == 'InstalledBinary'
			
 
				+    assert installed_binary['name'] == 'rg'
			
 
				+    assert '/' in installed_binary['abspath'], "Expected full path, not just binary name"
			
 
				+    assert Path(installed_binary['abspath']).is_file(), "Binary path should exist"
			
 
				+    assert installed_binary['version'], "Version should be detected"
			
 
				+
			
 
				+    machine_config = json.loads(lines[1])
			
 
				+    assert machine_config['type'] == 'Machine'
			
 
				+    assert machine_config['key'] == 'config/RIPGREP_BINARY'
			
 
				+    assert '/' in machine_config['value'], "Machine config should store full path"
			
 
				+
			
 
				+
			
 
				+def test_ripgrep_hook_skips_when_backend_not_ripgrep():
			
 
				+    """Test that ripgrep hook exits silently when search backend is not ripgrep."""
			
 
				+    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
			
 
				+
			
 
				+    env = os.environ.copy()
			
 
				+    env['SEARCH_BACKEND_ENGINE'] = 'sqlite'  # Different backend
			
 
				+
			
 
				+    result = subprocess.run(
			
 
				+        [sys.executable, str(hook_path)],
			
 
				+        capture_output=True,
			
 
				+        text=True,
			
 
				+        env=env,
			
 
				+        timeout=10,
			
 
				+    )
			
 
				+
			
 
				+    assert result.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
			
 
				+    assert result.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
			
 
				+
			
 
				+
			
 
				+def test_ripgrep_hook_handles_absolute_path():
			
 
				+    """Test that ripgrep hook works when RIPGREP_BINARY is an absolute path."""
			
 
				+    hook_path = Path(__file__).parent.parent / 'on_Crawl__00_validate_ripgrep.py'
			
 
				+
			
 
				+    rg_path = shutil.which('rg')
			
 
				+    if not rg_path:
			
 
				+        pytest.skip("ripgrep (rg) not installed")
			
 
				+
			
 
				+    env = os.environ.copy()
			
 
				+    env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
			
 
				+    env['RIPGREP_BINARY'] = rg_path  # Full absolute path
			
 
				+
			
 
				+    result = subprocess.run(
			
 
				+        [sys.executable, str(hook_path)],
			
 
				+        capture_output=True,
			
 
				+        text=True,
			
 
				+        env=env,
			
 
				+        timeout=10,
			
 
				+    )
			
 
				+
			
 
				+    assert result.returncode == 0, f"Hook failed: {result.stderr}"
			
 
				+    assert result.stdout.strip(), "Hook should produce output"
			
 
				+
			
 
				+    installed_binary = json.loads(result.stdout.strip().split('\n')[0])
			
 
				+    assert installed_binary['abspath'] == rg_path
			
 
				+
			
 
				+
			
 
				[email protected]_db
			
 
				+def test_machine_config_overrides_base_config():
			
 
				+    """
			
 
				+    Test that Machine.config overrides take precedence over base config.
			
 
				+
			
 
				+    Guards against regression where archivebox version was showing binaries
			
 
				+    as "not installed" even though they were detected and stored in Machine.config.
			
 
				+    """
			
 
				+    from machine.models import Machine, InstalledBinary
			
 
				+
			
 
				+    machine = Machine.current()
			
 
				+
			
 
				+    # Simulate a hook detecting chrome and storing it with a different path than base config
			
 
				+    detected_chrome_path = '/custom/path/to/chrome'
			
 
				+    machine.config['CHROME_BINARY'] = detected_chrome_path
			
 
				+    machine.config['CHROME_VERSION'] = '143.0.7499.170'
			
 
				+    machine.save()
			
 
				+
			
 
				+    # Create InstalledBinary record
			
 
				+    InstalledBinary.objects.create(
			
 
				+        machine=machine,
			
 
				+        name='chrome',
			
 
				+        abspath=detected_chrome_path,
			
 
				+        version='143.0.7499.170',
			
 
				+        binprovider='env',
			
 
				+    )
			
 
				+
			
 
				+    # Verify Machine.config takes precedence
			
 
				+    from archivebox.config.configset import get_config
			
 
				+    config = get_config()
			
 
				+
			
 
				+    # Machine.config should override the base config value
			
 
				+    assert machine.config.get('CHROME_BINARY') == detected_chrome_path
			
 
				+
			
 
				+    # The version command should use Machine.config, not base config
			
 
				+    # (Base config might have 'chromium' while Machine.config has the full path)
			
 
				+    bin_value = machine.config.get('CHROME_BINARY') or config.get('CHROME_BINARY', '')
			
 
				+    assert bin_value == detected_chrome_path, \
			
 
				+        "Machine.config override should take precedence over base config"
			
 
				+
			
 
				+
			
 
				[email protected]_db
			
 
				+def test_search_backend_engine_passed_to_hooks():
			
 
				+    """
			
 
				+    Test that SEARCH_BACKEND_ENGINE is passed to hook environment.
			
 
				+
			
 
				+    Guards against regression where hooks couldn't determine which search backend was active.
			
 
				+    """
			
 
				+    from pathlib import Path
			
 
				+    from archivebox.hooks import build_hook_environment
			
 
				+    from archivebox.config.configset import get_config
			
 
				+
			
 
				+    config = get_config()
			
 
				+    search_backend = config.get('SEARCH_BACKEND_ENGINE', 'ripgrep')
			
 
				+
			
 
				+    env = build_hook_environment(overrides=None)
			
 
				+
			
 
				+    assert 'SEARCH_BACKEND_ENGINE' in env, \
			
 
				+        "SEARCH_BACKEND_ENGINE must be in hook environment"
			
 
				+    assert env['SEARCH_BACKEND_ENGINE'] == search_backend, \
			
 
				+        f"Expected SEARCH_BACKEND_ENGINE={search_backend}, got {env.get('SEARCH_BACKEND_ENGINE')}"
			
 
				+
			
 
				+
			
 
				[email protected]_db
			
 
				+def test_install_creates_installedbinary_records():
			
 
				+    """
			
 
				+    Test that archivebox install creates InstalledBinary records for detected binaries.
			
 
				+
			
 
				+    This is an integration test that verifies the full install flow.
			
 
				+    """
			
 
				+    from machine.models import Machine, InstalledBinary
			
 
				+    from crawls.models import Seed, Crawl
			
 
				+    from crawls.statemachines import CrawlMachine
			
 
				+    from archivebox.base_models.models import get_or_create_system_user_pk
			
 
				+
			
 
				+    machine = Machine.current()
			
 
				+    initial_binary_count = InstalledBinary.objects.filter(machine=machine).count()
			
 
				+
			
 
				+    # Create an install crawl (like archivebox install does)
			
 
				+    created_by_id = get_or_create_system_user_pk()
			
 
				+    seed, _ = Seed.objects.get_or_create(
			
 
				+        uri='archivebox://test-install',
			
 
				+        label='Test dependency detection',
			
 
				+        created_by_id=created_by_id,
			
 
				+        defaults={'extractor': 'auto'},
			
 
				+    )
			
 
				+
			
 
				+    crawl = Crawl.objects.create(
			
 
				+        seed=seed,
			
 
				+        max_depth=0,
			
 
				+        created_by_id=created_by_id,
			
 
				+        status='queued',
			
 
				+    )
			
 
				+
			
 
				+    # Run the crawl state machine (this triggers hooks)
			
 
				+    sm = CrawlMachine(crawl)
			
 
				+    sm.send('tick')  # queued -> started (runs hooks)
			
 
				+
			
 
				+    # Verify InstalledBinary records were created
			
 
				+    final_binary_count = InstalledBinary.objects.filter(machine=machine).count()
			
 
				+    assert final_binary_count > initial_binary_count, \
			
 
				+        "archivebox install should create InstalledBinary records"
			
 
				+
			
 
				+    # Verify at least some common binaries were detected
			
 
				+    common_binaries = ['git', 'wget', 'node']
			
 
				+    detected = []
			
 
				+    for bin_name in common_binaries:
			
 
				+        if InstalledBinary.objects.filter(machine=machine, name=bin_name).exists():
			
 
				+            detected.append(bin_name)
			
 
				+
			
 
				+    assert detected, f"At least one of {common_binaries} should be detected"
			
 
				+
			
 
				+    # Verify detected binaries have valid paths and versions
			
 
				+    for binary in InstalledBinary.objects.filter(machine=machine):
			
 
				+        if binary.abspath:  # Only check non-empty paths
			
 
				+            assert '/' in binary.abspath, \
			
 
				+                f"{binary.name} should have full path, not just name: {binary.abspath}"
			
 
				+            # Version might be empty for some binaries, that's ok
			
 
				+
			
 
				+
			
 
				[email protected]_db
			
 
				+def test_ripgrep_only_detected_when_backend_enabled():
			
 
				+    """
			
 
				+    Test that ripgrep is only detected when SEARCH_BACKEND_ENGINE='ripgrep'.
			
 
				+
			
 
				+    Guards against ripgrep being installed/detected when not needed.
			
 
				+    """
			
 
				+    from machine.models import Machine, InstalledBinary
			
 
				+    from crawls.models import Seed, Crawl
			
 
				+    from crawls.statemachines import CrawlMachine
			
 
				+    from archivebox.base_models.models import get_or_create_system_user_pk
			
 
				+    from django.conf import settings
			
 
				+
			
 
				+    if not shutil.which('rg'):
			
 
				+        pytest.skip("ripgrep (rg) not installed")
			
 
				+
			
 
				+    machine = Machine.current()
			
 
				+
			
 
				+    # Clear any existing ripgrep records
			
 
				+    InstalledBinary.objects.filter(machine=machine, name='rg').delete()
			
 
				+
			
 
				+    # Test 1: With ripgrep backend - should be detected
			
 
				+    with patch('archivebox.config.configset.get_config') as mock_config:
			
 
				+        mock_config.return_value = {'SEARCH_BACKEND_ENGINE': 'ripgrep', 'RIPGREP_BINARY': 'rg'}
			
 
				+
			
 
				+        created_by_id = get_or_create_system_user_pk()
			
 
				+        seed = Seed.objects.create(
			
 
				+            uri='archivebox://test-rg-enabled',
			
 
				+            label='Test ripgrep detection enabled',
			
 
				+            created_by_id=created_by_id,
			
 
				+            extractor='auto',
			
 
				+        )
			
 
				+
			
 
				+        crawl = Crawl.objects.create(
			
 
				+            seed=seed,
			
 
				+            max_depth=0,
			
 
				+            created_by_id=created_by_id,
			
 
				+            status='queued',
			
 
				+        )
			
 
				+
			
 
				+        sm = CrawlMachine(crawl)
			
 
				+        sm.send('tick')
			
 
				+
			
 
				+        # Ripgrep should be detected
			
 
				+        rg_detected = InstalledBinary.objects.filter(machine=machine, name='rg').exists()
			
 
				+        assert rg_detected, "Ripgrep should be detected when SEARCH_BACKEND_ENGINE='ripgrep'"
			
 
				+
			
 
				+    # Clear records again
			
 
				+    InstalledBinary.objects.filter(machine=machine, name='rg').delete()
			
 
				+
			
 
				+    # Test 2: With different backend - should NOT be detected
			
 
				+    with patch('archivebox.config.configset.get_config') as mock_config:
			
 
				+        mock_config.return_value = {'SEARCH_BACKEND_ENGINE': 'sqlite', 'RIPGREP_BINARY': 'rg'}
			
 
				+
			
 
				+        seed2 = Seed.objects.create(
			
 
				+            uri='archivebox://test-rg-disabled',
			
 
				+            label='Test ripgrep detection disabled',
			
 
				+            created_by_id=created_by_id,
			
 
				+            extractor='auto',
			
 
				+        )
			
 
				+
			
 
				+        crawl2 = Crawl.objects.create(
			
 
				+            seed=seed2,
			
 
				+            max_depth=0,
			
 
				+            created_by_id=created_by_id,
			
 
				+            status='queued',
			
 
				+        )
			
 
				+
			
 
				+        sm2 = CrawlMachine(crawl2)
			
 
				+        sm2.send('tick')
			
 
				+
			
 
				+        # Ripgrep should NOT be detected
			
 
				+        rg_detected = InstalledBinary.objects.filter(machine=machine, name='rg').exists()
			
 
				+        assert not rg_detected, "Ripgrep should NOT be detected when SEARCH_BACKEND_ENGINE!='ripgrep'"
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    pytest.main([__file__, '-v'])
			
--- a/archivebox/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
+++ b/archivebox/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py
@@ -29,7 +29,7 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'index_sonic'
			
 
				-OUTPUT_DIR = 'search_index'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 # Text file patterns to index
			
 
				 INDEXABLE_FILES = [
			
--- a/archivebox/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
+++ b/archivebox/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py
@@ -27,7 +27,7 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'index_sqlite'
			
 
				-OUTPUT_DIR = 'search_index'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 # Text file patterns to index, in priority order
			
 
				 INDEXABLE_FILES = [
			
--- a/archivebox/plugins/seo/on_Snapshot__38_seo.js
+++ b/archivebox/plugins/seo/on_Snapshot__38_seo.js
@@ -21,9 +21,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'seo';
			
 
				-const OUTPUT_DIR = 'seo';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'seo.json';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -60,10 +60,7 @@ function getCdpUrl() {
 
				 
			
 
				 // Extract SEO metadata
			
 
				 async function extractSeo(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     let browser = null;
			
--- a/archivebox/plugins/singlefile/on_Snapshot__04_singlefile.js
+++ b/archivebox/plugins/singlefile/on_Snapshot__04_singlefile.js
@@ -40,7 +40,7 @@ const EXTENSIONS_DIR = process.env.CHROME_EXTENSIONS_DIR ||
 
				 const CHROME_DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
			
 
				     path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_downloads');
			
 
				 
			
 
				-const OUTPUT_DIR = 'singlefile';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'singlefile.html';
			
 
				 
			
 
				 /**
			
@@ -102,8 +102,7 @@ async function saveSinglefileWithExtension(page, extension, options = {}) {
 
				             .filter(fn => fn.endsWith('.html'))
			
 
				     );
			
 
				 
			
 
				-    // Ensure output directory exists
			
 
				-    await fs.promises.mkdir(OUTPUT_DIR, { recursive: true });
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const out_path = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     console.log(`[🛠️] Saving SingleFile HTML using extension (${extension.id})...`);
			
@@ -170,8 +169,7 @@ async function saveSinglefileWithCLI(url, options = {}) {
 
				         return null;
			
 
				     }
			
 
				 
			
 
				-    // Ensure output directory exists
			
 
				-    await fs.promises.mkdir(OUTPUT_DIR, { recursive: true });
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const out_path = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     // Build command
			
--- a/archivebox/plugins/singlefile/on_Snapshot__37_singlefile.py
+++ b/archivebox/plugins/singlefile/on_Snapshot__37_singlefile.py
@@ -41,7 +41,7 @@ import rich_click as click
 
				 EXTRACTOR_NAME = 'singlefile'
			
 
				 BIN_NAME = 'single-file'
			
 
				 BIN_PROVIDERS = 'npm,env'
			
 
				-OUTPUT_DIR = 'singlefile'
			
 
				+OUTPUT_DIR = '.'
			
 
				 OUTPUT_FILE = 'singlefile.html'
			
 
				 
			
 
				 
			
@@ -65,7 +65,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
				         return default
			
 
				 
			
 
				 
			
 
				-STATICFILE_DIR = 'staticfile'
			
 
				+STATICFILE_DIR = '../staticfile'
			
 
				 
			
 
				 def has_staticfile_output() -> bool:
			
 
				     """Check if staticfile extractor already downloaded this URL."""
			
@@ -135,7 +135,7 @@ def get_version(binary: str) -> str:
 
				         return ''
			
 
				 
			
 
				 
			
 
				-CHROME_SESSION_DIR = 'chrome_session'
			
 
				+CHROME_SESSION_DIR = '../chrome_session'
			
 
				 
			
 
				 
			
 
				 def get_cdp_url() -> str | None:
			
@@ -203,9 +203,8 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
 
				     if extra_args:
			
 
				         cmd.extend(extra_args.split())
			
 
				 
			
 
				-    # Create output directory
			
 
				+    # Output directory is current directory (hook already runs in output dir)
			
 
				     output_dir = Path(OUTPUT_DIR)
			
 
				-    output_dir.mkdir(exist_ok=True)
			
 
				     output_path = output_dir / OUTPUT_FILE
			
 
				 
			
 
				     cmd.extend([url, str(output_path)])
			
@@ -274,7 +273,7 @@ def main(url: str, snapshot_id: str):
 
				             sys.exit(1)
			
 
				 
			
 
				         version = get_version(binary)
			
 
				-        cmd_str = f'{binary} {url} {OUTPUT_DIR}/{OUTPUT_FILE}'
			
 
				+        cmd_str = f'{binary} {url} {OUTPUT_FILE}'
			
 
				 
			
 
				         # Run extraction
			
 
				         success, output, error = save_singlefile(url, binary)
			
--- a/archivebox/plugins/ssl/on_Snapshot__23_ssl.js
+++ b/archivebox/plugins/ssl/on_Snapshot__23_ssl.js
@@ -21,9 +21,9 @@ const puppeteer = require('puppeteer-core');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'ssl';
			
 
				-const OUTPUT_DIR = 'ssl';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'ssl.json';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -60,10 +60,7 @@ function getCdpUrl() {
 
				 
			
 
				 // Extract SSL details
			
 
				 async function extractSsl(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     // Only extract SSL for HTTPS URLs
			
--- a/archivebox/plugins/staticfile/on_Snapshot__31_staticfile.py
+++ b/archivebox/plugins/staticfile/on_Snapshot__31_staticfile.py
@@ -31,8 +31,8 @@ import rich_click as click
 
				 
			
 
				 # Extractor metadata
			
 
				 EXTRACTOR_NAME = 'staticfile'
			
 
				-OUTPUT_DIR = 'staticfile'
			
 
				-CHROME_SESSION_DIR = 'chrome_session'
			
 
				+OUTPUT_DIR = '.'
			
 
				+CHROME_SESSION_DIR = '../chrome_session'
			
 
				 
			
 
				 # Content-Types that indicate static files
			
 
				 # These can't be meaningfully processed by Chrome-based extractors
			
@@ -214,9 +214,8 @@ def download_file(url: str) -> tuple[bool, str | None, str]:
 
				         if content_length and int(content_length) > max_size:
			
 
				             return False, None, f'File too large: {int(content_length)} bytes > {max_size} max'
			
 
				 
			
 
				-        # Create output directory
			
 
				+        # Output directory is current directory (hook already runs in output dir)
			
 
				         output_dir = Path(OUTPUT_DIR)
			
 
				-        output_dir.mkdir(exist_ok=True)
			
 
				 
			
 
				         # Determine filename
			
 
				         filename = get_filename_from_url(url)
			
--- a/archivebox/plugins/title/on_Snapshot__32_title.js
+++ b/archivebox/plugins/title/on_Snapshot__32_title.js
@@ -21,9 +21,9 @@ const http = require('http');
 
				 
			
 
				 // Extractor metadata
			
 
				 const EXTRACTOR_NAME = 'title';
			
 
				-const OUTPUT_DIR = 'title';
			
 
				+const OUTPUT_DIR = '.';
			
 
				 const OUTPUT_FILE = 'title.txt';
			
 
				-const CHROME_SESSION_DIR = 'chrome_session';
			
 
				+const CHROME_SESSION_DIR = '../chrome_session';
			
 
				 
			
 
				 // Parse command line arguments
			
 
				 function parseArgs() {
			
@@ -162,10 +162,7 @@ async function getTitleFromCdp(cdpUrl) {
 
				 }
			
 
				 
			
 
				 async function extractTitle(url) {
			
 
				-    // Create output directory
			
 
				-    if (!fs.existsSync(OUTPUT_DIR)) {
			
 
				-        fs.mkdirSync(OUTPUT_DIR, { recursive: true });
			
 
				-    }
			
 
				+    // Output directory is current directory (hook already runs in output dir)
			
 
				     const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
			
 
				 
			
 
				     // Try Chrome session first
			
--- a/archivebox/plugins/wget/on_Snapshot__50_wget.py
+++ b/archivebox/plugins/wget/on_Snapshot__50_wget.py
@@ -43,7 +43,7 @@ import rich_click as click
 
				 EXTRACTOR_NAME = 'wget'
			
 
				 BIN_NAME = 'wget'
			
 
				 BIN_PROVIDERS = 'apt,brew,env'
			
 
				-OUTPUT_DIR = 'wget'
			
 
				+OUTPUT_DIR = '.'
			
 
				 
			
 
				 
			
 
				 def get_env(name: str, default: str = '') -> str:
			
@@ -66,7 +66,7 @@ def get_env_int(name: str, default: int = 0) -> int:
 
				         return default
			
 
				 
			
 
				 
			
 
				-STATICFILE_DIR = 'staticfile'
			
 
				+STATICFILE_DIR = '../staticfile'
			
 
				 
			
 
				 def has_staticfile_output() -> bool:
			
 
				     """Check if staticfile extractor already downloaded this URL."""
			
--- a/archivebox/templates/admin/base.html
+++ b/archivebox/templates/admin/base.html
@@ -30,6 +30,1031 @@
 
				                 color: white;
			
 
				                 cursor: pointer;
			
 
				             }
			
 
				+
			
 
				+            /* ============================================
			
 
				+               Modern card-based admin UI (shadcn-inspired)
			
 
				+               ============================================ */
			
 
				+
			
 
				+            /* Base font improvements */
			
 
				+            body, html {
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+                -moz-osx-font-smoothing: grayscale;
			
 
				+                font-size: 15px;
			
 
				+                line-height: 1.6;
			
 
				+                color: #0f172a;
			
 
				+                background: #f8fafc;
			
 
				+            }
			
 
				+
			
 
				+            #container {
			
 
				+                background: #f8fafc;
			
 
				+            }
			
 
				+
			
 
				+            #content {
			
 
				+                padding: 24px;
			
 
				+            }
			
 
				+
			
 
				+            /* Main form container - flexbox grid */
			
 
				+            #content-main form > div,
			
 
				+            #content form > div {
			
 
				+                display: flex;
			
 
				+                flex-wrap: wrap;
			
 
				+                gap: 20px;
			
 
				+                align-items: stretch;
			
 
				+            }
			
 
				+
			
 
				+            /* Each fieldset becomes a card */
			
 
				+            #content-main form fieldset,
			
 
				+            #content form fieldset,
			
 
				+            #content-main form .module:not(.inline-group),
			
 
				+            #content form .module:not(.inline-group) {
			
 
				+                background: #fff !important;
			
 
				+                border: 1px solid #e2e8f0 !important;
			
 
				+                border-top: 1px solid #e2e8f0 !important;
			
 
				+                border-left: 1px solid #e2e8f0 !important;
			
 
				+                border-right: 1px solid #e2e8f0 !important;
			
 
				+                border-bottom: 1px solid #e2e8f0 !important;
			
 
				+                border-radius: 12px !important;
			
 
				+                padding: 0 !important;
			
 
				+                margin: 0 !important;
			
 
				+                box-shadow: 0 1px 3px rgba(0,0,0,0.04), 0 1px 2px rgba(0,0,0,0.06);
			
 
				+                flex: 1 1 340px;
			
 
				+                min-width: 320px;
			
 
				+                max-width: calc(33.33% - 14px);
			
 
				+                box-sizing: border-box;
			
 
				+                display: flex;
			
 
				+                flex-direction: column;
			
 
				+                transition: box-shadow 0.2s ease, border-color 0.2s ease;
			
 
				+                overflow: hidden;
			
 
				+            }
			
 
				+
			
 
				+            /* Wide fieldsets MUST override card max-width - placed after card rules for specificity */
			
 
				+            #content-main form fieldset.wide,
			
 
				+            #content form fieldset.wide,
			
 
				+            #content-main form fieldset:has(.field-archiveresults_list),
			
 
				+            #content form fieldset:has(.field-archiveresults_list),
			
 
				+            #content-main form fieldset:has(.field-snapshots),
			
 
				+            #content form fieldset:has(.field-snapshots) {
			
 
				+                flex: 1 1 100% !important;
			
 
				+                max-width: 100% !important;
			
 
				+                min-width: 100% !important;
			
 
				+                width: 100% !important;
			
 
				+                flex-basis: 100% !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Inline groups should NOT have card constraints */
			
 
				+            #content-main form .inline-group,
			
 
				+            #content form .inline-group,
			
 
				+            .inline-group fieldset,
			
 
				+            .inline-group .module {
			
 
				+                flex: 1 1 100% !important;
			
 
				+                max-width: 100% !important;
			
 
				+                min-width: 100% !important;
			
 
				+                width: 100% !important;
			
 
				+            }
			
 
				+
			
 
				+            #content-main form fieldset:hover,
			
 
				+            #content form fieldset:hover {
			
 
				+                box-shadow: 0 4px 6px rgba(0,0,0,0.05), 0 2px 4px rgba(0,0,0,0.06);
			
 
				+                border-color: #cbd5e1;
			
 
				+            }
			
 
				+
			
 
				+            /* Archive results list content should take full width */
			
 
				+            .field-archiveresults_list,
			
 
				+            .field-archiveresults_list .readonly,
			
 
				+            .field-snapshots,
			
 
				+            .field-snapshots .readonly {
			
 
				+                width: 100% !important;
			
 
				+                max-width: 100% !important;
			
 
				+                background: transparent !important;
			
 
				+                border: none !important;
			
 
				+                padding: 0 !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Card headers - no borders, just background */
			
 
				+            #content-main form fieldset h2,
			
 
				+            #content form fieldset h2,
			
 
				+            #content-main form .module h2,
			
 
				+            #content form .module h2 {
			
 
				+                margin: 0 !important;
			
 
				+                padding: 8px 16px !important;
			
 
				+                background: #f1f5f9 !important;
			
 
				+                color: #334155 !important;
			
 
				+                font-size: 12px !important;
			
 
				+                font-weight: 600 !important;
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important;
			
 
				+                border: none !important;
			
 
				+                border-top: none !important;
			
 
				+                border-left: none !important;
			
 
				+                border-right: none !important;
			
 
				+                border-bottom: none !important;
			
 
				+                border-radius: 0 !important;
			
 
				+                text-transform: uppercase;
			
 
				+                letter-spacing: 0.5px;
			
 
				+                flex-shrink: 0;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+                box-shadow: none !important;
			
 
				+                outline: none !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Collapse toggle styling */
			
 
				+            #content-main form fieldset h2 a.collapse-toggle,
			
 
				+            #content form fieldset h2 a.collapse-toggle {
			
 
				+                color: #64748b;
			
 
				+            }
			
 
				+
			
 
				+            /* Card content area */
			
 
				+            #content-main form fieldset > div,
			
 
				+            #content form fieldset > div {
			
 
				+                padding: 20px;
			
 
				+                flex: 1;
			
 
				+                overflow-x: hidden;
			
 
				+                overflow-y: visible;
			
 
				+                min-width: 0;
			
 
				+            }
			
 
				+
			
 
				+            /* Form rows inside cards */
			
 
				+            #content-main form fieldset .form-row,
			
 
				+            #content form fieldset .form-row {
			
 
				+                padding: 8px 0;
			
 
				+                border-bottom: 1px solid #f1f5f9;
			
 
				+                min-width: 0;
			
 
				+                min-height: auto;
			
 
				+            }
			
 
				+
			
 
				+            #content-main form fieldset .form-row:first-child,
			
 
				+            #content form fieldset .form-row:first-child {
			
 
				+                padding-top: 0;
			
 
				+            }
			
 
				+
			
 
				+            #content-main form fieldset .form-row:last-child,
			
 
				+            #content form fieldset .form-row:last-child {
			
 
				+                border-bottom: none;
			
 
				+                padding-bottom: 0;
			
 
				+            }
			
 
				+
			
 
				+            /* Remove borders from nested fieldsets and flex-containers inside cards */
			
 
				+            #content-main form fieldset fieldset,
			
 
				+            #content form fieldset fieldset,
			
 
				+            #content-main form fieldset .flex-container,
			
 
				+            #content form fieldset .flex-container,
			
 
				+            #content-main form .module fieldset,
			
 
				+            #content form .module fieldset {
			
 
				+                background: transparent !important;
			
 
				+                border: none !important;
			
 
				+                border-radius: 0 !important;
			
 
				+                box-shadow: none !important;
			
 
				+                padding: 0 !important;
			
 
				+                margin: 0 !important;
			
 
				+                min-width: 0 !important;
			
 
				+                max-width: 94% !important;
			
 
				+                flex: none !important;
			
 
				+                display: block !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Nested fieldset headers should be invisible */
			
 
				+            #content-main form fieldset fieldset h2,
			
 
				+            #content form fieldset fieldset h2,
			
 
				+            #content-main form fieldset .flex-container legend,
			
 
				+            #content form fieldset .flex-container legend {
			
 
				+                background: transparent !important;
			
 
				+                padding: 0 0 4px 0 !important;
			
 
				+                font-size: 13px !important;
			
 
				+                color: #374151 !important;
			
 
				+                text-transform: none !important;
			
 
				+                letter-spacing: normal !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Ensure form elements inside cards don't overflow */
			
 
				+            #content-main form fieldset input,
			
 
				+            #content-main form fieldset select,
			
 
				+            #content-main form fieldset textarea,
			
 
				+            #content form fieldset input,
			
 
				+            #content form fieldset select,
			
 
				+            #content form fieldset textarea {
			
 
				+                max-width: 100%;
			
 
				+                box-sizing: border-box;
			
 
				+            }
			
 
				+
			
 
				+            /* Related widget wrapper should fit within card */
			
 
				+            #content-main form fieldset .related-widget-wrapper,
			
 
				+            #content form fieldset .related-widget-wrapper {
			
 
				+                max-width: 100%;
			
 
				+            }
			
 
				+
			
 
				+            #content-main form fieldset .related-widget-wrapper select,
			
 
				+            #content form fieldset .related-widget-wrapper select {
			
 
				+                min-width: 0;
			
 
				+                flex: 1;
			
 
				+            }
			
 
				+
			
 
				+            /* Labels inside cards */
			
 
				+            #content-main form fieldset .form-row > label,
			
 
				+            #content form fieldset .form-row > label,
			
 
				+            #content-main form fieldset .form-row > .flex-container > label,
			
 
				+            #content form fieldset .form-row > .flex-container > label,
			
 
				+            #content-main form label,
			
 
				+            #content form label,
			
 
				+            .aligned label,
			
 
				+            legend {
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+                font-weight: 500;
			
 
				+                color: #374151;
			
 
				+                display: block;
			
 
				+                margin-bottom: 8px;
			
 
				+                float: none !important;
			
 
				+                width: auto !important;
			
 
				+                padding: 0 !important;
			
 
				+                font-size: 13px;
			
 
				+                letter-spacing: -0.01em;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+                -moz-osx-font-smoothing: grayscale;
			
 
				+            }
			
 
				+
			
 
				+            /* Readonly fields styling */
			
 
				+            #content-main form fieldset .readonly,
			
 
				+            #content form fieldset .readonly {
			
 
				+                background: #f8fafc;
			
 
				+                padding: 12px 14px;
			
 
				+                border-radius: 8px;
			
 
				+                font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Monaco, Consolas, monospace;
			
 
				+                font-size: 13px;
			
 
				+                word-break: break-word;
			
 
				+                line-height: 1.6;
			
 
				+                border: 1px solid #e2e8f0;
			
 
				+                color: #475569;
			
 
				+            }
			
 
				+
			
 
				+            /* Long content in readonly */
			
 
				+            #content-main form fieldset .readonly pre,
			
 
				+            #content form fieldset .readonly pre {
			
 
				+                margin: 0;
			
 
				+                white-space: pre-wrap;
			
 
				+                word-break: break-word;
			
 
				+                font-family: inherit;
			
 
				+            }
			
 
				+
			
 
				+            /* Input styling */
			
 
				+            #content-main form input[type="text"],
			
 
				+            #content-main form input[type="number"],
			
 
				+            #content-main form input[type="url"],
			
 
				+            #content-main form input[type="email"],
			
 
				+            #content-main form input[type="password"],
			
 
				+            #content form input[type="text"],
			
 
				+            #content form input[type="number"],
			
 
				+            #content form input[type="url"],
			
 
				+            #content form input[type="email"],
			
 
				+            #content form input[type="password"] {
			
 
				+                width: 100%;
			
 
				+                padding: 10px 14px;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                border-radius: 8px;
			
 
				+                font-size: 14px;
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+                box-sizing: border-box;
			
 
				+                background: #fff;
			
 
				+                color: #1e293b;
			
 
				+                transition: border-color 0.15s ease, box-shadow 0.15s ease;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+            }
			
 
				+
			
 
				+            #content-main form select,
			
 
				+            #content form select {
			
 
				+                width: 100%;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                border-radius: 8px;
			
 
				+                font-size: 14px;
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+                box-sizing: border-box;
			
 
				+                background: #fff;
			
 
				+                color: #1e293b;
			
 
				+                transition: border-color 0.15s ease, box-shadow 0.15s ease;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+            }
			
 
				+
			
 
				+            #content-main form input::placeholder,
			
 
				+            #content form input::placeholder {
			
 
				+                color: #94a3b8;
			
 
				+            }
			
 
				+
			
 
				+            /* Focus states */
			
 
				+            #content-main form input:focus,
			
 
				+            #content-main form select:focus,
			
 
				+            #content-main form textarea:focus,
			
 
				+            #content form input:focus,
			
 
				+            #content form select:focus,
			
 
				+            #content form textarea:focus {
			
 
				+                border-color: #3b82f6;
			
 
				+                outline: none;
			
 
				+                box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.15);
			
 
				+            }
			
 
				+
			
 
				+            /* Textarea styling */
			
 
				+            #content-main form textarea,
			
 
				+            #content form textarea {
			
 
				+                width: 100%;
			
 
				+                box-sizing: border-box;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                border-radius: 8px;
			
 
				+                padding: 12px 14px;
			
 
				+                font-size: 14px;
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+                line-height: 1.6;
			
 
				+                resize: vertical;
			
 
				+                min-height: 80px;
			
 
				+                color: #1e293b;
			
 
				+                transition: border-color 0.15s ease, box-shadow 0.15s ease;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+            }
			
 
				+
			
 
				+            /* Fix vTextField width */
			
 
				+            .vTextField {
			
 
				+                width: 100% !important;
			
 
				+            }
			
 
				+
			
 
				+            /* ============================================
			
 
				+               Button styling (shadcn-inspired)
			
 
				+               ============================================ */
			
 
				+
			
 
				+            /* Base button styles */
			
 
				+            input[type="submit"],
			
 
				+            button,
			
 
				+            .button,
			
 
				+            .btn,
			
 
				+            a.button,
			
 
				+            .submit-row input,
			
 
				+            .submit-row a.button {
			
 
				+                display: inline-flex;
			
 
				+                align-items: center;
			
 
				+                justify-content: center;
			
 
				+                gap: 8px;
			
 
				+                padding: 10px 18px;
			
 
				+                font-size: 14px;
			
 
				+                font-weight: 500;
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+                line-height: 1.4;
			
 
				+                border-radius: 8px;
			
 
				+                border: 1px solid transparent;
			
 
				+                cursor: pointer;
			
 
				+                transition: all 0.15s ease;
			
 
				+                text-decoration: none;
			
 
				+                white-space: nowrap;
			
 
				+                -webkit-font-smoothing: antialiased;
			
 
				+            }
			
 
				+
			
 
				+            /* Primary button (default) */
			
 
				+            input[type="submit"],
			
 
				+            button[type="submit"],
			
 
				+            .button.default,
			
 
				+            .submit-row input[type="submit"] {
			
 
				+                background: #0f172a;
			
 
				+                color: #fff;
			
 
				+                border-color: #0f172a;
			
 
				+            }
			
 
				+
			
 
				+            input[type="submit"]:hover,
			
 
				+            button[type="submit"]:hover,
			
 
				+            .button.default:hover,
			
 
				+            .submit-row input[type="submit"]:hover {
			
 
				+                background: #1e293b;
			
 
				+                border-color: #1e293b;
			
 
				+            }
			
 
				+
			
 
				+            input[type="submit"]:active,
			
 
				+            button[type="submit"]:active {
			
 
				+                background: #334155;
			
 
				+                transform: translateY(1px);
			
 
				+            }
			
 
				+
			
 
				+            /* Secondary/outline buttons */
			
 
				+            button:not([type="submit"]),
			
 
				+            .button:not(.default),
			
 
				+            a.button {
			
 
				+                background: #fff;
			
 
				+                color: #374151;
			
 
				+                border-color: #d1d5db;
			
 
				+            }
			
 
				+
			
 
				+            button:not([type="submit"]):hover,
			
 
				+            .button:not(.default):hover,
			
 
				+            a.button:hover {
			
 
				+                background: #f9fafb;
			
 
				+                border-color: #9ca3af;
			
 
				+                color: #1f2937;
			
 
				+            }
			
 
				+
			
 
				+            /* Danger button */
			
 
				+            .deletelink,
			
 
				+            a.deletelink,
			
 
				+            button.deletelink,
			
 
				+            input[name="delete"],
			
 
				+            .button.delete {
			
 
				+                background: #fff;
			
 
				+                color: #dc2626;
			
 
				+                border-color: #fecaca;
			
 
				+            }
			
 
				+
			
 
				+            .deletelink:hover,
			
 
				+            a.deletelink:hover,
			
 
				+            button.deletelink:hover,
			
 
				+            input[name="delete"]:hover,
			
 
				+            .button.delete:hover {
			
 
				+                background: #fef2f2;
			
 
				+                border-color: #f87171;
			
 
				+                color: #b91c1c;
			
 
				+            }
			
 
				+
			
 
				+            /* Small buttons */
			
 
				+            .btn-sm,
			
 
				+            .object-tools a,
			
 
				+            .datetimeshortcuts a {
			
 
				+                padding: 6px 12px;
			
 
				+                font-size: 13px;
			
 
				+                border-radius: 6px;
			
 
				+            }
			
 
				+
			
 
				+            /* Object tools (top action buttons) */
			
 
				+            .object-tools {
			
 
				+                margin-bottom: 20px;
			
 
				+            }
			
 
				+
			
 
				+            .object-tools li {
			
 
				+                margin-left: 10px;
			
 
				+            }
			
 
				+
			
 
				+            .object-tools a {
			
 
				+                background: #fff;
			
 
				+                color: #374151;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                text-decoration: none;
			
 
				+                display: inline-flex;
			
 
				+                align-items: center;
			
 
				+            }
			
 
				+
			
 
				+            .object-tools a:hover {
			
 
				+                background: #f9fafb;
			
 
				+                border-color: #9ca3af;
			
 
				+            }
			
 
				+
			
 
				+            /* Submit row styling */
			
 
				+            .submit-row {
			
 
				+                margin-top: 24px;
			
 
				+                padding: 20px;
			
 
				+                background: #fff;
			
 
				+                border-radius: 12px;
			
 
				+                border: 1px solid #e2e8f0;
			
 
				+                box-shadow: 0 1px 3px rgba(0,0,0,0.04);
			
 
				+                clear: both;
			
 
				+                flex: 1 1 100%;
			
 
				+                display: flex;
			
 
				+                gap: 12px;
			
 
				+                flex-wrap: wrap;
			
 
				+                align-items: center;
			
 
				+            }
			
 
				+
			
 
				+            .submit-row p {
			
 
				+                margin: 0;
			
 
				+            }
			
 
				+
			
 
				+            .submit-row .deletelink-box {
			
 
				+                margin-left: auto;
			
 
				+            }
			
 
				+
			
 
				+            /* Responsive: 2 columns on medium screens */
			
 
				+            @media (max-width: 1400px) {
			
 
				+                #content-main form fieldset,
			
 
				+                #content form fieldset {
			
 
				+                    max-width: calc(50% - 10px);
			
 
				+                    flex: 1 1 320px;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            /* Responsive: stack on smaller screens */
			
 
				+            @media (max-width: 900px) {
			
 
				+                #content-main form fieldset,
			
 
				+                #content form fieldset {
			
 
				+                    flex: 1 1 100%;
			
 
				+                    max-width: 100%;
			
 
				+                    min-width: auto;
			
 
				+                }
			
 
				+
			
 
				+                #content {
			
 
				+                    padding: 16px;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            /* Module content padding */
			
 
				+            #content-main form .module > div,
			
 
				+            #content form .module > div {
			
 
				+                padding: 12px;
			
 
				+            }
			
 
				+
			
 
				+            /* Fix for JSON/config editor */
			
 
				+            .field-config .readonly,
			
 
				+            .field-config textarea {
			
 
				+                width: 100%;
			
 
				+                min-height: 120px;
			
 
				+                max-height: none;
			
 
				+            }
			
 
				+
			
 
				+            /* Related widget styling */
			
 
				+            .related-widget-wrapper {
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 8px;
			
 
				+                flex-wrap: wrap;
			
 
				+            }
			
 
				+
			
 
				+            .related-widget-wrapper select {
			
 
				+                flex: 1;
			
 
				+                min-width: 150px;
			
 
				+            }
			
 
				+
			
 
				+            .related-widget-wrapper a {
			
 
				+                flex-shrink: 0;
			
 
				+                padding: 8px;
			
 
				+                border-radius: 6px;
			
 
				+                color: #64748b;
			
 
				+                transition: color 0.15s ease, background 0.15s ease;
			
 
				+            }
			
 
				+
			
 
				+            .related-widget-wrapper a:hover {
			
 
				+                color: #1e293b;
			
 
				+                background: #f1f5f9;
			
 
				+            }
			
 
				+
			
 
				+            /* Help text styling */
			
 
				+            .help {
			
 
				+                font-size: 13px;
			
 
				+                color: #64748b;
			
 
				+                margin-top: 6px;
			
 
				+                line-height: 1.5;
			
 
				+            }
			
 
				+
			
 
				+            /* Error styling */
			
 
				+            .errorlist {
			
 
				+                color: #dc2626;
			
 
				+                font-size: 13px;
			
 
				+                margin: 6px 0;
			
 
				+                padding: 0;
			
 
				+                list-style: none;
			
 
				+            }
			
 
				+
			
 
				+            .errorlist li {
			
 
				+                background: #fef2f2;
			
 
				+                padding: 8px 12px;
			
 
				+                border-radius: 6px;
			
 
				+                border: 1px solid #fecaca;
			
 
				+            }
			
 
				+
			
 
				+            /* Inline related objects - force full width */
			
 
				+            .inline-group,
			
 
				+            #archiveresult_set-group,
			
 
				+            #content-main form .inline-group,
			
 
				+            #content-main form > div > .inline-group,
			
 
				+            #content form > div > .inline-group,
			
 
				+            .change-form .inline-group,
			
 
				+            div.inline-group {
			
 
				+                flex: 1 1 100% !important;
			
 
				+                max-width: 100% !important;
			
 
				+                min-width: 100% !important;
			
 
				+                width: 100% !important;
			
 
				+                margin-top: 20px;
			
 
				+                flex-basis: 100% !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Ensure inline-group breaks out of card grid */
			
 
				+            #content-main form > div,
			
 
				+            #content form > div {
			
 
				+                flex-wrap: wrap;
			
 
				+            }
			
 
				+
			
 
				+            /* TabularInline table full width */
			
 
				+            .inline-group .tabular,
			
 
				+            .inline-group table {
			
 
				+                width: 100% !important;
			
 
				+            }
			
 
				+
			
 
				+            .inline-related {
			
 
				+                margin: 12px 0;
			
 
				+                padding: 16px;
			
 
				+                background: #fff;
			
 
				+                border-radius: 10px;
			
 
				+                border: 1px solid #e2e8f0;
			
 
				+            }
			
 
				+
			
 
				+            .inline-related h3 {
			
 
				+                margin: -16px -16px 16px -16px;
			
 
				+                padding: 12px 16px;
			
 
				+                background: #f8fafc;
			
 
				+                border-radius: 9px 9px 0 0;
			
 
				+                border-bottom: 1px solid #e2e8f0;
			
 
				+                font-size: 13px;
			
 
				+                font-weight: 600;
			
 
				+                color: #374151;
			
 
				+            }
			
 
				+
			
 
				+            /* Tabular inline styling */
			
 
				+            .tabular {
			
 
				+                border-radius: 8px;
			
 
				+                overflow: hidden;
			
 
				+                border: 1px solid #e2e8f0;
			
 
				+            }
			
 
				+
			
 
				+            .tabular td, .tabular th {
			
 
				+                padding: 12px 14px;
			
 
				+                font-size: 13px;
			
 
				+                border-bottom: 1px solid #f1f5f9;
			
 
				+            }
			
 
				+
			
 
				+            .tabular th {
			
 
				+                background: #f8fafc;
			
 
				+                font-weight: 600;
			
 
				+                color: #374151;
			
 
				+                text-align: left;
			
 
				+            }
			
 
				+
			
 
				+            .tabular tr:last-child td {
			
 
				+                border-bottom: none;
			
 
				+            }
			
 
				+
			
 
				+            /* Delete checkbox */
			
 
				+            .inline-deletelink {
			
 
				+                color: #dc2626;
			
 
				+                font-size: 13px;
			
 
				+            }
			
 
				+
			
 
				+            /* Datetime widgets */
			
 
				+            .datetimeshortcuts {
			
 
				+                margin-left: 10px;
			
 
				+            }
			
 
				+
			
 
				+            .datetimeshortcuts a {
			
 
				+                background: #f1f5f9;
			
 
				+                color: #475569;
			
 
				+                border: none;
			
 
				+                padding: 4px 10px;
			
 
				+            }
			
 
				+
			
 
				+            .datetimeshortcuts a:hover {
			
 
				+                background: #e2e8f0;
			
 
				+                color: #1e293b;
			
 
				+            }
			
 
				+
			
 
				+            /* Aligned forms - fix label positioning */
			
 
				+            .aligned .form-row > div {
			
 
				+                margin-left: 0 !important;
			
 
				+            }
			
 
				+
			
 
				+            /* Checkbox styling */
			
 
				+            input[type="checkbox"] {
			
 
				+                width: 18px;
			
 
				+                height: 18px;
			
 
				+                border-radius: 4px;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                cursor: pointer;
			
 
				+                accent-color: #3b82f6;
			
 
				+            }
			
 
				+
			
 
				+            /* Links styling */
			
 
				+            a {
			
 
				+                color: #2563eb;
			
 
				+                text-decoration: none;
			
 
				+                transition: color 0.15s ease;
			
 
				+            }
			
 
				+
			
 
				+            a:hover {
			
 
				+                color: #1d4ed8;
			
 
				+            }
			
 
				+
			
 
				+            /* Messages/alerts */
			
 
				+            .messagelist {
			
 
				+                padding: 0;
			
 
				+                margin: 0 0 20px 0;
			
 
				+            }
			
 
				+
			
 
				+            .messagelist li {
			
 
				+                padding: 14px 18px;
			
 
				+                border-radius: 10px;
			
 
				+                font-size: 14px;
			
 
				+                margin-bottom: 10px;
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 10px;
			
 
				+            }
			
 
				+
			
 
				+            ul.messagelist li.success {
			
 
				+                background: #f0fdf4 !important;
			
 
				+                background-image: none !important;
			
 
				+                border: 1px solid #bbf7d0;
			
 
				+                color: #166534;
			
 
				+            }
			
 
				+
			
 
				+            .messagelist li.warning {
			
 
				+                background: #fffbeb;
			
 
				+                border: 1px solid #fde68a;
			
 
				+                color: #92400e;
			
 
				+            }
			
 
				+
			
 
				+            .messagelist li.error {
			
 
				+                background: #fef2f2;
			
 
				+                border: 1px solid #fecaca;
			
 
				+                color: #991b1b;
			
 
				+            }
			
 
				+
			
 
				+            /* Breadcrumbs */
			
 
				+            .breadcrumbs {
			
 
				+                background: transparent;
			
 
				+                padding: 12px 24px;
			
 
				+                font-size: 13px;
			
 
				+                color: #64748b;
			
 
				+            }
			
 
				+
			
 
				+            .breadcrumbs a {
			
 
				+                color: #64748b;
			
 
				+            }
			
 
				+
			
 
				+            .breadcrumbs a:hover {
			
 
				+                color: #1e293b;
			
 
				+            }
			
 
				+
			
 
				+            /* Action buttons in cards */
			
 
				+            .card .btn,
			
 
				+            .card button {
			
 
				+                margin-top: 10px;
			
 
				+            }
			
 
				+
			
 
				+            /* Select2 overrides */
			
 
				+            .select2-container--default .select2-selection--single,
			
 
				+            .select2-container--default .select2-selection--multiple {
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                border-radius: 8px;
			
 
				+                min-height: 42px;
			
 
				+            }
			
 
				+
			
 
				+            .select2-container--default .select2-selection--single:focus,
			
 
				+            .select2-container--default .select2-selection--multiple:focus {
			
 
				+                border-color: #3b82f6;
			
 
				+                box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.15);
			
 
				+            }
			
 
				+
			
 
				+            /* ============================================
			
 
				+               Admin List/Changelist Page Styling
			
 
				+               ============================================ */
			
 
				+
			
 
				+            /* Results table container */
			
 
				+            #changelist {
			
 
				+                background: #fff;
			
 
				+                border-radius: 12px;
			
 
				+                border: 1px solid #e2e8f0;
			
 
				+                box-shadow: 0 1px 3px rgba(0,0,0,0.04);
			
 
				+                overflow: hidden;
			
 
				+            }
			
 
				+
			
 
				+            /* Table styling */
			
 
				+            #result_list {
			
 
				+                width: 100%;
			
 
				+                border-collapse: collapse;
			
 
				+                font-size: 14px;
			
 
				+            }
			
 
				+
			
 
				+            #result_list thead th {
			
 
				+                background: #f8fafc;
			
 
				+                border-bottom: 2px solid #e2e8f0;
			
 
				+                padding: 12px 16px;
			
 
				+                font-weight: 600;
			
 
				+                font-size: 13px;
			
 
				+                color: #475569;
			
 
				+                text-align: left;
			
 
				+                text-transform: uppercase;
			
 
				+                letter-spacing: 0.025em;
			
 
				+                white-space: nowrap;
			
 
				+            }
			
 
				+
			
 
				+            #result_list thead th a {
			
 
				+                color: #475569;
			
 
				+                text-decoration: none;
			
 
				+            }
			
 
				+
			
 
				+            #result_list thead th a:hover {
			
 
				+                color: #1e293b;
			
 
				+            }
			
 
				+
			
 
				+            #result_list thead th.sorted {
			
 
				+                background: #f1f5f9;
			
 
				+            }
			
 
				+
			
 
				+            #result_list thead th .text span {
			
 
				+                padding-right: 5px;
			
 
				+            }
			
 
				+
			
 
				+            #result_list tbody tr {
			
 
				+                border-bottom: 1px solid #f1f5f9;
			
 
				+                transition: background-color 0.15s ease;
			
 
				+            }
			
 
				+
			
 
				+            #result_list tbody tr:hover {
			
 
				+                background-color: #f8fafc;
			
 
				+            }
			
 
				+
			
 
				+            #result_list tbody tr.selected {
			
 
				+                background-color: #eff6ff;
			
 
				+            }
			
 
				+
			
 
				+            #result_list tbody td {
			
 
				+                padding: 12px 16px;
			
 
				+                color: #334155;
			
 
				+                vertical-align: middle;
			
 
				+            }
			
 
				+
			
 
				+            #result_list tbody td a {
			
 
				+                color: #2563eb;
			
 
				+                font-weight: 500;
			
 
				+            }
			
 
				+
			
 
				+            #result_list tbody td a:hover {
			
 
				+                color: #1d4ed8;
			
 
				+                text-decoration: underline;
			
 
				+            }
			
 
				+
			
 
				+            /* Checkbox column */
			
 
				+            #result_list .action-checkbox,
			
 
				+            #result_list th.action-checkbox-column {
			
 
				+                width: 40px;
			
 
				+                text-align: center;
			
 
				+                padding: 12px 8px;
			
 
				+            }
			
 
				+
			
 
				+            /* Pagination */
			
 
				+            .paginator {
			
 
				+                background: #f8fafc;
			
 
				+                padding: 12px 16px;
			
 
				+                border-top: 1px solid #e2e8f0;
			
 
				+                font-size: 14px;
			
 
				+                color: #64748b;
			
 
				+            }
			
 
				+
			
 
				+            .paginator a {
			
 
				+                color: #2563eb;
			
 
				+                padding: 6px 12px;
			
 
				+                border-radius: 6px;
			
 
				+                margin: 0 2px;
			
 
				+                text-decoration: none;
			
 
				+            }
			
 
				+
			
 
				+            .paginator a:hover {
			
 
				+                background: #e2e8f0;
			
 
				+            }
			
 
				+
			
 
				+            /* Toolbar / search bar */
			
 
				+            #toolbar {
			
 
				+                padding: 16px;
			
 
				+                background: #fff;
			
 
				+                border-bottom: 1px solid #e2e8f0;
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 12px;
			
 
				+            }
			
 
				+
			
 
				+            #toolbar form {
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 8px;
			
 
				+                flex: 1;
			
 
				+            }
			
 
				+
			
 
				+            #searchbar {
			
 
				+                flex: 1;
			
 
				+                max-width: 400px;
			
 
				+                padding: 10px 14px;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                border-radius: 8px;
			
 
				+                font-size: 14px;
			
 
				+            }
			
 
				+
			
 
				+            #searchbar:focus {
			
 
				+                border-color: #3b82f6;
			
 
				+                outline: none;
			
 
				+                box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.15);
			
 
				+            }
			
 
				+
			
 
				+            /* Filter sidebar */
			
 
				+            #changelist-filter {
			
 
				+                background: #fff;
			
 
				+                border: 1px solid #e2e8f0;
			
 
				+                border-radius: 12px;
			
 
				+                box-shadow: 0 1px 3px rgba(0,0,0,0.04);
			
 
				+                overflow: hidden;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter h2 {
			
 
				+                background: #f8fafc;
			
 
				+                padding: 12px 16px;
			
 
				+                font-size: 13px;
			
 
				+                font-weight: 600;
			
 
				+                color: #475569;
			
 
				+                text-transform: uppercase;
			
 
				+                letter-spacing: 0.025em;
			
 
				+                margin: 0;
			
 
				+                border-bottom: 1px solid #e2e8f0;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter h3 {
			
 
				+                padding: 12px 16px 8px;
			
 
				+                font-size: 12px;
			
 
				+                font-weight: 600;
			
 
				+                color: #64748b;
			
 
				+                text-transform: uppercase;
			
 
				+                letter-spacing: 0.05em;
			
 
				+                margin: 0;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter ul {
			
 
				+                padding: 0 8px 12px;
			
 
				+                margin: 0;
			
 
				+                list-style: none;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter li {
			
 
				+                margin: 0;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter li a {
			
 
				+                display: block;
			
 
				+                padding: 8px 12px;
			
 
				+                color: #475569;
			
 
				+                text-decoration: none;
			
 
				+                border-radius: 6px;
			
 
				+                font-size: 14px;
			
 
				+                transition: background-color 0.15s ease;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter li a:hover {
			
 
				+                background: #f1f5f9;
			
 
				+                color: #1e293b;
			
 
				+            }
			
 
				+
			
 
				+            #changelist-filter li.selected a {
			
 
				+                background: #eff6ff;
			
 
				+                color: #2563eb;
			
 
				+                font-weight: 500;
			
 
				+            }
			
 
				+
			
 
				+            /* Actions bar */
			
 
				+            .actions {
			
 
				+                padding: 12px 16px;
			
 
				+                background: #f8fafc;
			
 
				+                border-bottom: 1px solid #e2e8f0;
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 12px;
			
 
				+                flex-wrap: wrap;
			
 
				+            }
			
 
				+
			
 
				+            .actions label {
			
 
				+                font-size: 14px;
			
 
				+                color: #475569;
			
 
				+            }
			
 
				+
			
 
				+            .actions select {
			
 
				+                padding: 8px 12px;
			
 
				+                border: 1px solid #d1d5db;
			
 
				+                border-radius: 6px;
			
 
				+                font-size: 14px;
			
 
				+                background: #fff;
			
 
				+            }
			
 
				+
			
 
				+            .actions .button {
			
 
				+                padding: 8px 16px;
			
 
				+                font-size: 14px;
			
 
				+            }
			
 
				+
			
 
				+            /* Object count */
			
 
				+            .actions .action-counter {
			
 
				+                color: #64748b;
			
 
				+                font-size: 14px;
			
 
				+            }
			
 
				+
			
 
				+            /* Empty results */
			
 
				+            #changelist-form .results + p,
			
 
				+            .paginator + p {
			
 
				+                padding: 40px;
			
 
				+                text-align: center;
			
 
				+                color: #64748b;
			
 
				+                font-size: 15px;
			
 
				+            }
			
 
				+
			
 
				+            /* Date hierarchy */
			
 
				+            .xfull {
			
 
				+                padding: 12px 16px;
			
 
				+                background: #f8fafc;
			
 
				+                border-bottom: 1px solid #e2e8f0;
			
 
				+            }
			
 
				+
			
 
				+            .xfull a {
			
 
				+                color: #2563eb;
			
 
				+                margin-right: 8px;
			
 
				+            }
			
 
				         </style>
			
 
				         {% endblock %}
			
 
				         
			
--- a/archivebox/templates/admin/progress_monitor.html
+++ b/archivebox/templates/admin/progress_monitor.html
@@ -57,13 +57,24 @@
 
				         box-shadow: 0 0 8px #3fb950;
			
 
				         animation: pulse 2s infinite;
			
 
				     }
			
 
				+    #progress-monitor .status-dot.idle {
			
 
				+        background: #d29922;
			
 
				+        box-shadow: 0 0 4px #d29922;
			
 
				+    }
			
 
				     #progress-monitor .status-dot.stopped {
			
 
				-        background: #f85149;
			
 
				+        background: #6e7681;
			
 
				+    }
			
 
				+    #progress-monitor .status-dot.flash {
			
 
				+        animation: flash 0.3s ease-out;
			
 
				     }
			
 
				     @keyframes pulse {
			
 
				         0%, 100% { opacity: 1; box-shadow: 0 0 8px #3fb950; }
			
 
				         50% { opacity: 0.6; box-shadow: 0 0 4px #3fb950; }
			
 
				     }
			
 
				+    @keyframes flash {
			
 
				+        0% { transform: scale(1.5); }
			
 
				+        100% { transform: scale(1); }
			
 
				+    }
			
 
				 
			
 
				     /* Stats */
			
 
				     #progress-monitor .stats {
			
@@ -89,6 +100,19 @@
 
				     #progress-monitor .stat-value.error { color: #f85149; }
			
 
				     #progress-monitor .stat-value.warning { color: #d29922; }
			
 
				     #progress-monitor .stat-value.info { color: #58a6ff; }
			
 
				+    #progress-monitor .stat.clickable {
			
 
				+        cursor: pointer;
			
 
				+        padding: 2px 6px;
			
 
				+        margin: -2px -6px;
			
 
				+        border-radius: 4px;
			
 
				+        transition: background 0.2s;
			
 
				+    }
			
 
				+    #progress-monitor .stat.clickable:hover {
			
 
				+        background: rgba(255,255,255,0.1);
			
 
				+    }
			
 
				+    #progress-monitor .stat.clickable:active {
			
 
				+        background: rgba(255,255,255,0.2);
			
 
				+    }
			
 
				 
			
 
				     /* Toggle Button */
			
 
				     #progress-monitor .toggle-btn {
			
@@ -259,48 +283,86 @@
 
				         padding: 0 12px 8px;
			
 
				     }
			
 
				 
			
 
				-    /* Extractor List */
			
 
				+    /* Extractor List - Compact Badge Layout */
			
 
				     #progress-monitor .extractor-list {
			
 
				         padding: 8px 12px;
			
 
				         background: rgba(0,0,0,0.2);
			
 
				         border-top: 1px solid #21262d;
			
 
				+        display: flex;
			
 
				+        flex-wrap: wrap;
			
 
				+        gap: 4px;
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge {
			
 
				+        position: relative;
			
 
				+        display: inline-flex;
			
 
				+        align-items: center;
			
 
				+        gap: 4px;
			
 
				+        padding: 3px 8px;
			
 
				+        border-radius: 4px;
			
 
				+        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
			
 
				+        font-size: 10px;
			
 
				+        background: #21262d;
			
 
				+        overflow: hidden;
			
 
				+        white-space: nowrap;
			
 
				     }
			
 
				-    #progress-monitor .extractor-item {
			
 
				+    #progress-monitor .extractor-badge .progress-fill {
			
 
				+        position: absolute;
			
 
				+        top: 0;
			
 
				+        left: 0;
			
 
				+        bottom: 0;
			
 
				+        z-index: 0;
			
 
				+        transition: width 0.3s ease-out;
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge .badge-content {
			
 
				+        position: relative;
			
 
				+        z-index: 1;
			
 
				         display: flex;
			
 
				         align-items: center;
			
 
				-        gap: 8px;
			
 
				-        padding: 4px 0;
			
 
				+        gap: 4px;
			
 
				     }
			
 
				-    #progress-monitor .extractor-icon {
			
 
				-        font-size: 12px;
			
 
				-        width: 16px;
			
 
				-        text-align: center;
			
 
				+    #progress-monitor .extractor-badge.queued {
			
 
				+        color: #8b949e;
			
 
				     }
			
 
				-    #progress-monitor .extractor-icon.running {
			
 
				+    #progress-monitor .extractor-badge.queued .progress-fill {
			
 
				+        background: rgba(110, 118, 129, 0.2);
			
 
				+        width: 0%;
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge.started {
			
 
				         color: #d29922;
			
 
				-        animation: spin 1s linear infinite;
			
 
				     }
			
 
				-    #progress-monitor .extractor-icon.success {
			
 
				+    #progress-monitor .extractor-badge.started .progress-fill {
			
 
				+        background: rgba(210, 153, 34, 0.3);
			
 
				+        width: 50%;
			
 
				+        animation: progress-pulse 1.5s ease-in-out infinite;
			
 
				+    }
			
 
				+    @keyframes progress-pulse {
			
 
				+        0%, 100% { opacity: 0.5; }
			
 
				+        50% { opacity: 1; }
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge.succeeded {
			
 
				         color: #3fb950;
			
 
				     }
			
 
				-    #progress-monitor .extractor-icon.failed {
			
 
				+    #progress-monitor .extractor-badge.succeeded .progress-fill {
			
 
				+        background: rgba(63, 185, 80, 0.25);
			
 
				+        width: 100%;
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge.failed {
			
 
				         color: #f85149;
			
 
				     }
			
 
				-    #progress-monitor .extractor-icon.pending {
			
 
				-        color: #8b949e;
			
 
				+    #progress-monitor .extractor-badge.failed .progress-fill {
			
 
				+        background: rgba(248, 81, 73, 0.25);
			
 
				+        width: 100%;
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge .badge-icon {
			
 
				+        font-size: 10px;
			
 
				+    }
			
 
				+    #progress-monitor .extractor-badge.started .badge-icon {
			
 
				+        animation: spin 1s linear infinite;
			
 
				     }
			
 
				     @keyframes spin {
			
 
				         from { transform: rotate(0deg); }
			
 
				         to { transform: rotate(360deg); }
			
 
				     }
			
 
				-    #progress-monitor .extractor-name {
			
 
				-        flex: 1;
			
 
				-        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
			
 
				-        font-size: 11px;
			
 
				-    }
			
 
				-    #progress-monitor .extractor-progress {
			
 
				-        width: 60px;
			
 
				-    }
			
 
				 
			
 
				     /* Status Badge */
			
 
				     #progress-monitor .status-badge {
			
@@ -356,11 +418,11 @@
 
				                     <span class="stat-label">Queued</span>
			
 
				                     <span class="stat-value warning" id="total-queued">0</span>
			
 
				                 </div>
			
 
				-                <div class="stat">
			
 
				+                <div class="stat clickable" id="stat-succeeded" title="Click to reset counter">
			
 
				                     <span class="stat-label">Done</span>
			
 
				                     <span class="stat-value success" id="total-succeeded">0</span>
			
 
				                 </div>
			
 
				-                <div class="stat">
			
 
				+                <div class="stat clickable" id="stat-failed" title="Click to reset counter">
			
 
				                     <span class="stat-label">Failed</span>
			
 
				                     <span class="stat-value error" id="total-failed">0</span>
			
 
				                 </div>
			
@@ -390,6 +452,24 @@
 
				     let expandedCrawls = new Set(JSON.parse(localStorage.getItem('progress-monitor-expanded-crawls') || '[]'));
			
 
				     let expandedSnapshots = new Set(JSON.parse(localStorage.getItem('progress-monitor-expanded-snapshots') || '[]'));
			
 
				 
			
 
				+    // Baselines for resettable counters
			
 
				+    let succeededBaseline = parseInt(localStorage.getItem('progress-succeeded-baseline') || '0');
			
 
				+    let failedBaseline = parseInt(localStorage.getItem('progress-failed-baseline') || '0');
			
 
				+    let lastSucceeded = 0;
			
 
				+    let lastFailed = 0;
			
 
				+
			
 
				+    // Click handlers for resetting counters
			
 
				+    document.getElementById('stat-succeeded').addEventListener('click', function() {
			
 
				+        succeededBaseline = lastSucceeded;
			
 
				+        localStorage.setItem('progress-succeeded-baseline', succeededBaseline);
			
 
				+        document.getElementById('total-succeeded').textContent = '0';
			
 
				+    });
			
 
				+    document.getElementById('stat-failed').addEventListener('click', function() {
			
 
				+        failedBaseline = lastFailed;
			
 
				+        localStorage.setItem('progress-failed-baseline', failedBaseline);
			
 
				+        document.getElementById('total-failed').textContent = '0';
			
 
				+    });
			
 
				+
			
 
				     function formatUrl(url) {
			
 
				         try {
			
 
				             const u = new URL(url);
			
@@ -400,24 +480,18 @@
 
				     }
			
 
				 
			
 
				     function renderExtractor(extractor) {
			
 
				-        const iconClass = extractor.status === 'started' ? 'running' :
			
 
				-                         extractor.status === 'succeeded' ? 'success' :
			
 
				-                         extractor.status === 'failed' ? 'failed' : 'pending';
			
 
				         const icon = extractor.status === 'started' ? '&#8635;' :
			
 
				                     extractor.status === 'succeeded' ? '&#10003;' :
			
 
				                     extractor.status === 'failed' ? '&#10007;' : '&#9675;';
			
 
				 
			
 
				         return `
			
 
				-            <div class="extractor-item">
			
 
				-                <span class="extractor-icon ${iconClass}">${icon}</span>
			
 
				-                <span class="extractor-name">${extractor.extractor}</span>
			
 
				-                <div class="extractor-progress">
			
 
				-                    <div class="progress-bar-container">
			
 
				-                        <div class="progress-bar extractor ${extractor.status === 'started' ? 'indeterminate' : ''}"
			
 
				-                             style="width: ${extractor.status === 'succeeded' ? '100' : extractor.status === 'failed' ? '100' : extractor.progress}%"></div>
			
 
				-                    </div>
			
 
				-                </div>
			
 
				-            </div>
			
 
				+            <span class="extractor-badge ${extractor.status}">
			
 
				+                <span class="progress-fill"></span>
			
 
				+                <span class="badge-content">
			
 
				+                    <span class="badge-icon">${icon}</span>
			
 
				+                    <span>${extractor.extractor}</span>
			
 
				+                </span>
			
 
				+            </span>
			
 
				         `;
			
 
				     }
			
 
				 
			
@@ -427,10 +501,14 @@
 
				         const statusIcon = snapshot.status === 'started' ? '&#8635;' : '&#128196;';
			
 
				 
			
 
				         let extractorHtml = '';
			
 
				-        if (snapshot.active_extractors && snapshot.active_extractors.length > 0) {
			
 
				+        if (snapshot.all_extractors && snapshot.all_extractors.length > 0) {
			
 
				+            // Sort extractors alphabetically by name to prevent reordering on updates
			
 
				+            const sortedExtractors = [...snapshot.all_extractors].sort((a, b) =>
			
 
				+                a.extractor.localeCompare(b.extractor)
			
 
				+            );
			
 
				             extractorHtml = `
			
 
				                 <div class="extractor-list" style="${isExpanded ? '' : 'display:none'}">
			
 
				-                    ${snapshot.active_extractors.map(e => renderExtractor(e)).join('')}
			
 
				+                    ${sortedExtractors.map(e => renderExtractor(e)).join('')}
			
 
				                 </div>
			
 
				             `;
			
 
				         }
			
@@ -438,7 +516,7 @@
 
				         return `
			
 
				             <div class="snapshot-item" data-snapshot-key="${snapshotKey}">
			
 
				                 <div class="snapshot-header" onclick="window.toggleSnapshot('${snapshotKey}')">
			
 
				-                    <span class="expand-icon ${isExpanded ? 'expanded' : ''}">${snapshot.active_extractors?.length ? '&#9654;' : ''}</span>
			
 
				+                    <span class="expand-icon ${isExpanded ? 'expanded' : ''}">${snapshot.all_extractors?.length ? '&#9654;' : ''}</span>
			
 
				                     <span class="snapshot-icon">${statusIcon}</span>
			
 
				                     <div class="snapshot-info">
			
 
				                         <div class="snapshot-url">${formatUrl(snapshot.url)}</div>
			
@@ -469,6 +547,40 @@
 
				             snapshotsHtml = crawl.active_snapshots.map(s => renderSnapshot(s, crawl.id)).join('');
			
 
				         }
			
 
				 
			
 
				+        // Show warning if crawl is stuck (queued but can't start)
			
 
				+        let warningHtml = '';
			
 
				+        if (crawl.status === 'queued' && !crawl.can_start) {
			
 
				+            warningHtml = `
			
 
				+                <div style="padding: 8px 14px; background: rgba(248, 81, 73, 0.1); border-top: 1px solid #f85149; color: #f85149; font-size: 11px;">
			
 
				+                    ⚠️ Crawl cannot start: ${crawl.seed_uri ? 'unknown error' : 'no seed URI'}
			
 
				+                </div>
			
 
				+            `;
			
 
				+        } else if (crawl.status === 'queued' && crawl.retry_at_future) {
			
 
				+            // Queued but retry_at is in future (was claimed by worker, will retry)
			
 
				+            warningHtml = `
			
 
				+                <div style="padding: 8px 14px; background: rgba(88, 166, 255, 0.1); border-top: 1px solid #58a6ff; color: #58a6ff; font-size: 11px;">
			
 
				+                    🔄 Retrying in ${crawl.seconds_until_retry}s...${crawl.seed_uri ? ` (${crawl.seed_uri})` : ''}
			
 
				+                </div>
			
 
				+            `;
			
 
				+        } else if (crawl.status === 'queued' && crawl.total_snapshots === 0) {
			
 
				+            // Queued and waiting to be picked up by worker
			
 
				+            warningHtml = `
			
 
				+                <div style="padding: 8px 14px; background: rgba(210, 153, 34, 0.1); border-top: 1px solid #d29922; color: #d29922; font-size: 11px;">
			
 
				+                    ⏳ Waiting for worker to pick up...${crawl.seed_uri ? ` (${crawl.seed_uri})` : ''}
			
 
				+                </div>
			
 
				+            `;
			
 
				+        }
			
 
				+
			
 
				+        // Show snapshot info or URL count if no snapshots yet
			
 
				+        let metaText = `depth: ${crawl.max_depth}`;
			
 
				+        if (crawl.total_snapshots > 0) {
			
 
				+            metaText += ` | ${crawl.total_snapshots} snapshots`;
			
 
				+        } else if (crawl.urls_count > 0) {
			
 
				+            metaText += ` | ${crawl.urls_count} URLs`;
			
 
				+        } else if (crawl.seed_uri) {
			
 
				+            metaText += ` | ${crawl.seed_uri.substring(0, 40)}${crawl.seed_uri.length > 40 ? '...' : ''}`;
			
 
				+        }
			
 
				+
			
 
				         return `
			
 
				             <div class="crawl-item" data-crawl-id="${crawl.id}">
			
 
				                 <div class="crawl-header" onclick="window.toggleCrawl('${crawl.id}')">
			
@@ -476,10 +588,11 @@
 
				                     <span class="crawl-icon">${statusIcon}</span>
			
 
				                     <div class="crawl-info">
			
 
				                         <div class="crawl-label">${crawl.label}</div>
			
 
				-                        <div class="crawl-meta">depth: ${crawl.max_depth} | ${crawl.total_snapshots} snapshots</div>
			
 
				+                        <div class="crawl-meta">${metaText}</div>
			
 
				                     </div>
			
 
				                     <div class="crawl-stats">
			
 
				                         <span style="color:#3fb950">${crawl.completed_snapshots} done</span>
			
 
				+                        <span style="color:#d29922">${crawl.started_snapshots || 0} active</span>
			
 
				                         <span style="color:#8b949e">${crawl.pending_snapshots} pending</span>
			
 
				                     </div>
			
 
				                     <span class="status-badge ${crawl.status}">${crawl.status}</span>
			
@@ -490,6 +603,7 @@
 
				                              style="width: ${crawl.progress}%"></div>
			
 
				                     </div>
			
 
				                 </div>
			
 
				+                ${warningHtml}
			
 
				                 <div class="crawl-body" style="${isExpanded ? '' : 'display:none'}">
			
 
				                     <div class="snapshot-list">
			
 
				                         ${snapshotsHtml}
			
@@ -542,25 +656,48 @@
 
				                            data.snapshots_pending > 0 || data.snapshots_started > 0 ||
			
 
				                            data.archiveresults_pending > 0 || data.archiveresults_started > 0;
			
 
				 
			
 
				-        // Update orchestrator status
			
 
				+        // Update orchestrator status - show "Running" only when there's actual activity
			
 
				+        // Don't distinguish between "Stopped" and "Idle" since orchestrator starts/stops frequently
			
 
				         const dot = document.getElementById('orchestrator-dot');
			
 
				         const text = document.getElementById('orchestrator-text');
			
 
				-        if (data.orchestrator_running) {
			
 
				-            dot.classList.remove('stopped');
			
 
				+        const hasWorkers = data.total_workers > 0;
			
 
				+
			
 
				+        if (hasWorkers || hasActivity) {
			
 
				+            dot.classList.remove('stopped', 'idle');
			
 
				             dot.classList.add('running');
			
 
				             text.textContent = 'Running';
			
 
				         } else {
			
 
				-            dot.classList.remove('running');
			
 
				-            dot.classList.add('stopped');
			
 
				-            text.textContent = 'Stopped';
			
 
				+            // No activity - show as idle (whether orchestrator process exists or not)
			
 
				+            dot.classList.remove('stopped', 'running');
			
 
				+            dot.classList.add('idle');
			
 
				+            text.textContent = 'Idle';
			
 
				         }
			
 
				 
			
 
				+        // Pulse the dot to show we got fresh data
			
 
				+        dot.classList.add('flash');
			
 
				+        setTimeout(() => dot.classList.remove('flash'), 300);
			
 
				+
			
 
				         // Update stats
			
 
				         document.getElementById('worker-count').textContent = data.total_workers;
			
 
				         document.getElementById('total-queued').textContent =
			
 
				             data.crawls_pending + data.snapshots_pending + data.archiveresults_pending;
			
 
				-        document.getElementById('total-succeeded').textContent = data.archiveresults_succeeded;
			
 
				-        document.getElementById('total-failed').textContent = data.archiveresults_failed;
			
 
				+
			
 
				+        // Store raw values and display relative to baseline
			
 
				+        lastSucceeded = data.archiveresults_succeeded;
			
 
				+        lastFailed = data.archiveresults_failed;
			
 
				+
			
 
				+        // If baseline is higher than current (e.g. after DB reset), reset baseline
			
 
				+        if (succeededBaseline > lastSucceeded) {
			
 
				+            succeededBaseline = 0;
			
 
				+            localStorage.setItem('progress-succeeded-baseline', '0');
			
 
				+        }
			
 
				+        if (failedBaseline > lastFailed) {
			
 
				+            failedBaseline = 0;
			
 
				+            localStorage.setItem('progress-failed-baseline', '0');
			
 
				+        }
			
 
				+
			
 
				+        document.getElementById('total-succeeded').textContent = lastSucceeded - succeededBaseline;
			
 
				+        document.getElementById('total-failed').textContent = lastFailed - failedBaseline;
			
 
				 
			
 
				         // Render crawl tree
			
 
				         if (data.active_crawls.length > 0) {
			
--- a/archivebox/workers/management/commands/orchestrator.py
+++ b/archivebox/workers/management/commands/orchestrator.py
@@ -7,9 +7,14 @@ class Command(BaseCommand):
 
				     help = 'Run the archivebox orchestrator'
			
 
				 
			
 
				     def add_arguments(self, parser):
			
 
				-        parser.add_argument('--daemon', '-d', action='store_true', help="Run forever (don't exit on idle)")
			
 
				+        parser.add_argument(
			
 
				+            '--exit-on-idle',
			
 
				+            action='store_true',
			
 
				+            default=False,
			
 
				+            help="Exit when all work is complete (default: run forever)"
			
 
				+        )
			
 
				 
			
 
				     def handle(self, *args, **kwargs):
			
 
				-        daemon = kwargs.get('daemon', False)
			
 
				-        orchestrator = Orchestrator(exit_on_idle=not daemon)
			
 
				+        exit_on_idle = kwargs.get('exit_on_idle', False)
			
 
				+        orchestrator = Orchestrator(exit_on_idle=exit_on_idle)
			
 
				         orchestrator.runloop()
			
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -12,16 +12,17 @@ Architecture:
 
				         └── Each worker spawns task subprocesses via CLI
			
 
				 
			
 
				 Usage:
			
 
				-    # Embedded in other commands (exits when done)
			
 
				+    # Default: runs forever (for use as subprocess of server)
			
 
				+    orchestrator = Orchestrator(exit_on_idle=False)
			
 
				+    orchestrator.runloop()
			
 
				+
			
 
				+    # Exit when done (for embedded use in other commands)
			
 
				     orchestrator = Orchestrator(exit_on_idle=True)
			
 
				     orchestrator.runloop()
			
 
				-    
			
 
				-    # Daemon mode (runs forever)
			
 
				-    orchestrator = Orchestrator(exit_on_idle=False)
			
 
				-    orchestrator.start()  # fork and return
			
 
				-    
			
 
				+
			
 
				     # Or run via CLI
			
 
				-    archivebox orchestrator [--daemon]
			
 
				+    archivebox manage orchestrator              # runs forever
			
 
				+    archivebox manage orchestrator --exit-on-idle  # exits when done
			
 
				 """
			
 
				 
			
 
				 __package__ = 'archivebox.workers'
			
@@ -45,6 +46,14 @@ from .pid_utils import (
 
				 )
			
 
				 
			
 
				 
			
 
				+def _run_orchestrator_process(exit_on_idle: bool) -> None:
			
 
				+    """Top-level function for multiprocessing (must be picklable)."""
			
 
				+    from archivebox.config.django import setup_django
			
 
				+    setup_django()
			
 
				+    orchestrator = Orchestrator(exit_on_idle=exit_on_idle)
			
 
				+    orchestrator.runloop()
			
 
				+
			
 
				+
			
 
				 class Orchestrator:
			
 
				     """
			
 
				     Manages worker processes by polling queues and spawning workers as needed.
			
@@ -277,12 +286,12 @@ class Orchestrator:
 
				         Fork orchestrator as a background process.
			
 
				         Returns the PID of the new process.
			
 
				         """
			
 
				-        def run_orchestrator():
			
 
				-            from archivebox.config.django import setup_django
			
 
				-            setup_django()
			
 
				-            self.runloop()
			
 
				-        
			
 
				-        proc = Process(target=run_orchestrator, name='orchestrator')
			
 
				+        # Use module-level function to avoid pickle errors with local functions
			
 
				+        proc = Process(
			
 
				+            target=_run_orchestrator_process,
			
 
				+            args=(self.exit_on_idle,),
			
 
				+            name='orchestrator'
			
 
				+        )
			
 
				         proc.start()
			
 
				 
			
 
				         assert proc.pid is not None
			
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
@@ -28,7 +28,7 @@ WORKERS_DIR_NAME = "workers"
 
				 
			
 
				 ORCHESTRATOR_WORKER = {
			
 
				     "name": "worker_orchestrator",
			
 
				-    "command": "archivebox manage orchestrator",
			
 
				+    "command": "archivebox manage orchestrator",  # runs forever by default
			
 
				     "autostart": "true",
			
 
				     "autorestart": "true",
			
 
				     "stdout_logfile": "logs/worker_orchestrator.log",
			
@@ -332,14 +332,14 @@ def stop_worker(supervisor, daemon_name):
 
				 
			
 
				 def tail_worker_logs(log_path: str):
			
 
				     get_or_create_supervisord_process(daemonize=False)
			
 
				-    
			
 
				+
			
 
				     from rich.live import Live
			
 
				     from rich.table import Table
			
 
				-    
			
 
				+
			
 
				     table = Table()
			
 
				     table.add_column("TS")
			
 
				     table.add_column("URL")
			
 
				-    
			
 
				+
			
 
				     try:
			
 
				         with Live(table, refresh_per_second=1) as live:  # update 4 times a second to feel fluid
			
 
				             with open(log_path, 'r') as f:
			
@@ -352,6 +352,83 @@ def tail_worker_logs(log_path: str):
 
				     except SystemExit:
			
 
				         pass
			
 
				 
			
 
				+
			
 
				+def tail_multiple_worker_logs(log_files: list[str], follow=True):
			
 
				+    """Tail multiple log files simultaneously, interleaving their output."""
			
 
				+    import select
			
 
				+    from pathlib import Path
			
 
				+
			
 
				+    # Convert relative paths to absolute paths
			
 
				+    log_paths = []
			
 
				+    for log_file in log_files:
			
 
				+        log_path = Path(log_file)
			
 
				+        if not log_path.is_absolute():
			
 
				+            log_path = CONSTANTS.DATA_DIR / log_path
			
 
				+
			
 
				+        # Create log file if it doesn't exist
			
 
				+        if not log_path.exists():
			
 
				+            log_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+            log_path.touch()
			
 
				+
			
 
				+        log_paths.append(log_path)
			
 
				+
			
 
				+    # Open all log files
			
 
				+    file_handles = []
			
 
				+    for log_path in log_paths:
			
 
				+        try:
			
 
				+            f = open(log_path, 'r')
			
 
				+            # Seek to end of file if following
			
 
				+            if follow:
			
 
				+                f.seek(0, 2)  # Seek to end
			
 
				+            file_handles.append((log_path.name, f))
			
 
				+        except Exception as e:
			
 
				+            print(f"[yellow]Warning: Could not open {log_path}: {e}[/yellow]")
			
 
				+
			
 
				+    if not file_handles:
			
 
				+        print("[red]No log files could be opened[/red]")
			
 
				+        return
			
 
				+
			
 
				+    # Print which logs we're tailing
			
 
				+    log_names = [name for name, _ in file_handles]
			
 
				+    print(f"[dim]Tailing: {', '.join(log_names)}[/dim]")
			
 
				+    print()
			
 
				+
			
 
				+    try:
			
 
				+        while follow:
			
 
				+            # Read available lines from all files
			
 
				+            for log_name, f in file_handles:
			
 
				+                line = f.readline()
			
 
				+                if line:
			
 
				+                    # Colorize based on log source
			
 
				+                    if 'orchestrator' in log_name.lower():
			
 
				+                        color = 'cyan'
			
 
				+                    elif 'daphne' in log_name.lower():
			
 
				+                        color = 'green'
			
 
				+                    else:
			
 
				+                        color = 'white'
			
 
				+
			
 
				+                    # Strip ANSI codes if present (supervisord does this but just in case)
			
 
				+                    import re
			
 
				+                    line_clean = re.sub(r'\x1b\[[0-9;]*m', '', line.rstrip())
			
 
				+
			
 
				+                    if line_clean:
			
 
				+                        print(f'[{color}][{log_name}][/{color}] {line_clean}')
			
 
				+
			
 
				+            # Small sleep to avoid busy-waiting
			
 
				+            time.sleep(0.1)
			
 
				+
			
 
				+    except (KeyboardInterrupt, BrokenPipeError, IOError):
			
 
				+        print("\n[yellow][i] Stopped tailing logs[/i][/yellow]")
			
 
				+    except SystemExit:
			
 
				+        pass
			
 
				+    finally:
			
 
				+        # Close all file handles
			
 
				+        for _, f in file_handles:
			
 
				+            try:
			
 
				+                f.close()
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				 def watch_worker(supervisor, daemon_name, interval=5):
			
 
				     """loop continuously and monitor worker's health"""
			
 
				     while True:
			
--- a/archivebox/workers/tasks.py
+++ b/archivebox/workers/tasks.py
@@ -3,6 +3,9 @@ Background task functions for queuing work to the orchestrator.
 
				 
			
 
				 These functions queue Snapshots/Crawls for processing by setting their status
			
 
				 to QUEUED, which the orchestrator workers will pick up and process.
			
 
				+
			
 
				+NOTE: These functions do NOT start the orchestrator - they assume it's already
			
 
				+running via `archivebox server` (supervisord) or will be run inline by the CLI.
			
 
				 """
			
 
				 
			
 
				 __package__ = 'archivebox.workers'
			
@@ -10,16 +13,6 @@ __package__ = 'archivebox.workers'
 
				 from django.utils import timezone
			
 
				 
			
 
				 
			
 
				-def ensure_orchestrator_running():
			
 
				-    """Ensure the orchestrator is running to process queued items."""
			
 
				-    from .orchestrator import Orchestrator
			
 
				-
			
 
				-    if not Orchestrator.is_running():
			
 
				-        # Start orchestrator in background
			
 
				-        orchestrator = Orchestrator(exit_on_idle=True)
			
 
				-        orchestrator.start()
			
 
				-
			
 
				-
			
 
				 def bg_add(add_kwargs: dict) -> int:
			
 
				     """
			
 
				     Add URLs and queue them for archiving.
			
@@ -36,9 +29,6 @@ def bg_add(add_kwargs: dict) -> int:
 
				 
			
 
				     result = add(**add_kwargs)
			
 
				 
			
 
				-    # Ensure orchestrator is running to process the new snapshots
			
 
				-    ensure_orchestrator_running()
			
 
				-
			
 
				     return len(result) if result else 0
			
 
				 
			
 
				 
			
@@ -66,10 +56,6 @@ def bg_archive_snapshots(snapshots, kwargs: dict | None = None) -> int:
 
				             )
			
 
				             queued_count += 1
			
 
				 
			
 
				-    # Ensure orchestrator is running to process the queued snapshots
			
 
				-    if queued_count > 0:
			
 
				-        ensure_orchestrator_running()
			
 
				-
			
 
				     return queued_count
			
 
				 
			
 
				 
			
@@ -90,9 +76,6 @@ def bg_archive_snapshot(snapshot, overwrite: bool = False, methods: list | None
 
				             status=Snapshot.StatusChoices.QUEUED,
			
 
				             retry_at=timezone.now(),
			
 
				         )
			
 
				-
			
 
				-        # Ensure orchestrator is running to process the queued snapshot
			
 
				-        ensure_orchestrator_running()
			
 
				         return 1
			
 
				 
			
 
				     return 0
			
--- a/archivebox/workers/worker.py
+++ b/archivebox/workers/worker.py
@@ -67,8 +67,8 @@ class Worker:
 
				     # Configuration (can be overridden by subclasses)
			
 
				     MAX_TICK_TIME: ClassVar[int] = 60
			
 
				     MAX_CONCURRENT_TASKS: ClassVar[int] = 1
			
 
				-    POLL_INTERVAL: ClassVar[float] = 0.5
			
 
				-    IDLE_TIMEOUT: ClassVar[int] = 3  # Exit after N idle iterations (set to 0 to never exit)
			
 
				+    POLL_INTERVAL: ClassVar[float] = 1.0
			
 
				+    IDLE_TIMEOUT: ClassVar[int] = 10  # Exit after N idle iterations (10 sec at 1.0 poll interval)
			
 
				 
			
 
				     def __init__(self, worker_id: int = 0, daemon: bool = False, **kwargs: Any):
			
 
				         self.worker_id = worker_id