1 month ago · 575a595f26
--- a/TODO_archivebox_jsonl_cli.md
+++ b/TODO_archivebox_jsonl_cli.md
@@ -687,30 +687,30 @@ def create_test_snapshot_json(url: str = None, **kwargs) -> Dict[str, Any]:
 
				 ## Task Checklist
			
 
				 
			
 
				 ### Phase 1: Model Prerequisites
			
 
				-- [ ] Implement `ArchiveResult.from_json()` in `archivebox/core/models.py`
			
 
				-- [ ] Implement `ArchiveResult.from_jsonl()` in `archivebox/core/models.py`
			
 
				-- [ ] Fix `Snapshot.to_json()` to use `tags_str` instead of `tags`
			
 
				+- [x] Implement `ArchiveResult.from_json()` in `archivebox/core/models.py`
			
 
				+- [x] Implement `ArchiveResult.from_jsonl()` in `archivebox/core/models.py`
			
 
				+- [x] Fix `Snapshot.to_json()` to use `tags_str` instead of `tags`
			
 
				 
			
 
				 ### Phase 2: Shared Utilities
			
 
				-- [ ] Create `archivebox/cli/cli_utils.py` with shared `apply_filters()`
			
 
				-- [ ] Update 7 CLI files to import from `cli_utils.py`
			
 
				+- [x] Create `archivebox/cli/cli_utils.py` with shared `apply_filters()`
			
 
				+- [x] Update 7 CLI files to import from `cli_utils.py`
			
 
				 
			
 
				 ### Phase 3: Pass-Through Behavior
			
 
				-- [ ] Add pass-through to `archivebox_crawl.py` create
			
 
				-- [ ] Add pass-through to `archivebox_snapshot.py` create
			
 
				-- [ ] Add pass-through to `archivebox_archiveresult.py` create
			
 
				-- [ ] Add create-or-update to `archivebox_run.py`
			
 
				-- [ ] Add pass-through output to `archivebox_run.py`
			
 
				+- [x] Add pass-through to `archivebox_crawl.py` create
			
 
				+- [x] Add pass-through to `archivebox_snapshot.py` create
			
 
				+- [x] Add pass-through to `archivebox_archiveresult.py` create
			
 
				+- [x] Add create-or-update to `archivebox_run.py`
			
 
				+- [x] Add pass-through output to `archivebox_run.py`
			
 
				 
			
 
				 ### Phase 4: Test Infrastructure
			
 
				-- [ ] Create `archivebox/tests/conftest.py` with pytest-django fixtures
			
 
				+- [x] Create `archivebox/tests/conftest.py` with pytest-django fixtures
			
 
				 
			
 
				 ### Phase 5: Unit Tests
			
 
				-- [ ] Create `archivebox/tests/test_cli_crawl.py`
			
 
				-- [ ] Create `archivebox/tests/test_cli_snapshot.py`
			
 
				-- [ ] Create `archivebox/tests/test_cli_archiveresult.py`
			
 
				-- [ ] Create `archivebox/tests/test_cli_run.py`
			
 
				+- [x] Create `archivebox/tests/test_cli_crawl.py`
			
 
				+- [x] Create `archivebox/tests/test_cli_snapshot.py`
			
 
				+- [x] Create `archivebox/tests/test_cli_archiveresult.py`
			
 
				+- [x] Create `archivebox/tests/test_cli_run.py`
			
 
				 
			
 
				 ### Phase 6: Integration & Config
			
 
				-- [ ] Extend `archivebox/cli/tests_piping.py` with pass-through tests
			
 
				-- [ ] Update `archivebox/workers/supervisord_util.py`: orchestrator→run
			
 
				+- [x] Extend `archivebox/cli/tests_piping.py` with pass-through tests
			
 
				+- [x] Update `archivebox/workers/supervisord_util.py`: orchestrator→run
			
--- a/archivebox/cli/archivebox_archiveresult.py
+++ b/archivebox/cli/archivebox_archiveresult.py
@@ -39,21 +39,7 @@ from typing import Optional
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
@@ -69,6 +55,7 @@ def create_archiveresults(
 
				     Create ArchiveResults for Snapshots.
			
 
				 
			
 
				     Reads Snapshot records from stdin and creates ArchiveResult entries.
			
 
				+    Pass-through: Non-Snapshot/ArchiveResult records are output unchanged.
			
 
				     If --plugin is specified, only creates results for that plugin.
			
 
				     Otherwise, creates results for all pending plugins.
			
 
				 
			
@@ -78,7 +65,7 @@ def create_archiveresults(
 
				     """
			
 
				     from django.utils import timezone
			
 
				 
			
 
				-    from archivebox.misc.jsonl import read_stdin, write_record, TYPE_SNAPSHOT
			
 
				+    from archivebox.misc.jsonl import read_stdin, write_record, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
			
 
				     from archivebox.core.models import Snapshot, ArchiveResult
			
 
				 
			
 
				     is_tty = sys.stdout.isatty()
			
@@ -87,6 +74,7 @@ def create_archiveresults(
 
				     if snapshot_id:
			
 
				         try:
			
 
				             snapshots = [Snapshot.objects.get(id=snapshot_id)]
			
 
				+            pass_through_records = []
			
 
				         except Snapshot.DoesNotExist:
			
 
				             rprint(f'[red]Snapshot not found: {snapshot_id}[/red]', file=sys.stderr)
			
 
				             return 1
			
@@ -97,17 +85,44 @@ def create_archiveresults(
 
				             rprint('[yellow]No Snapshot records provided via stdin[/yellow]', file=sys.stderr)
			
 
				             return 1
			
 
				 
			
 
				-        # Filter to only Snapshot records
			
 
				+        # Separate snapshot records from pass-through records
			
 
				         snapshot_ids = []
			
 
				+        pass_through_records = []
			
 
				+
			
 
				         for record in records:
			
 
				-            if record.get('type') == TYPE_SNAPSHOT:
			
 
				+            record_type = record.get('type', '')
			
 
				+
			
 
				+            if record_type == TYPE_SNAPSHOT:
			
 
				+                # Pass through the Snapshot record itself
			
 
				+                pass_through_records.append(record)
			
 
				                 if record.get('id'):
			
 
				                     snapshot_ids.append(record['id'])
			
 
				+
			
 
				+            elif record_type == TYPE_ARCHIVERESULT:
			
 
				+                # ArchiveResult records: pass through if they have an id
			
 
				+                if record.get('id'):
			
 
				+                    pass_through_records.append(record)
			
 
				+                # If no id, we could create it, but for now just pass through
			
 
				+                else:
			
 
				+                    pass_through_records.append(record)
			
 
				+
			
 
				+            elif record_type:
			
 
				+                # Other typed records (Crawl, Tag, etc): pass through
			
 
				+                pass_through_records.append(record)
			
 
				+
			
 
				             elif record.get('id'):
			
 
				-                # Assume it's a snapshot ID if no type specified
			
 
				+                # Untyped record with id - assume it's a snapshot ID
			
 
				                 snapshot_ids.append(record['id'])
			
 
				 
			
 
				+        # Output pass-through records first
			
 
				+        if not is_tty:
			
 
				+            for record in pass_through_records:
			
 
				+                write_record(record)
			
 
				+
			
 
				         if not snapshot_ids:
			
 
				+            if pass_through_records:
			
 
				+                rprint(f'[dim]Passed through {len(pass_through_records)} records, no new snapshots to process[/dim]', file=sys.stderr)
			
 
				+                return 0
			
 
				             rprint('[yellow]No valid Snapshot IDs in input[/yellow]', file=sys.stderr)
			
 
				             return 1
			
 
				 
			
@@ -115,7 +130,7 @@ def create_archiveresults(
 
				 
			
 
				     if not snapshots:
			
 
				         rprint('[yellow]No matching snapshots found[/yellow]', file=sys.stderr)
			
 
				-        return 1
			
 
				+        return 0 if pass_through_records else 1
			
 
				 
			
 
				     created_count = 0
			
 
				     for snapshot in snapshots:
			
--- a/archivebox/cli/archivebox_binary.py
+++ b/archivebox/cli/archivebox_binary.py
@@ -34,21 +34,7 @@ from typing import Optional
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
--- a/archivebox/cli/archivebox_crawl.py
+++ b/archivebox/cli/archivebox_crawl.py
@@ -39,21 +39,7 @@ from typing import Optional, Iterable
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
@@ -71,12 +57,13 @@ def create_crawl(
 
				     Create a Crawl job from URLs.
			
 
				 
			
 
				     Takes URLs as args or stdin, creates one Crawl with all URLs, outputs JSONL.
			
 
				+    Pass-through: Records that are not URLs are output unchanged (for piping).
			
 
				 
			
 
				     Exit codes:
			
 
				         0: Success
			
 
				         1: Failure
			
 
				     """
			
 
				-    from archivebox.misc.jsonl import read_args_or_stdin, write_record
			
 
				+    from archivebox.misc.jsonl import read_args_or_stdin, write_record, TYPE_CRAWL
			
 
				     from archivebox.base_models.models import get_or_create_system_user_pk
			
 
				     from archivebox.crawls.models import Crawl
			
 
				 
			
@@ -90,14 +77,46 @@ def create_crawl(
 
				         rprint('[yellow]No URLs provided. Pass URLs as arguments or via stdin.[/yellow]', file=sys.stderr)
			
 
				         return 1
			
 
				 
			
 
				-    # Collect all URLs into a single newline-separated string
			
 
				+    # Separate pass-through records from URL records
			
 
				     url_list = []
			
 
				+    pass_through_records = []
			
 
				+
			
 
				     for record in records:
			
 
				+        record_type = record.get('type', '')
			
 
				+
			
 
				+        # Pass-through: output records that aren't URL/Crawl types
			
 
				+        if record_type and record_type != TYPE_CRAWL and not record.get('url') and not record.get('urls'):
			
 
				+            pass_through_records.append(record)
			
 
				+            continue
			
 
				+
			
 
				+        # Handle existing Crawl records (just pass through with id)
			
 
				+        if record_type == TYPE_CRAWL and record.get('id'):
			
 
				+            pass_through_records.append(record)
			
 
				+            continue
			
 
				+
			
 
				+        # Collect URLs
			
 
				         url = record.get('url')
			
 
				         if url:
			
 
				             url_list.append(url)
			
 
				 
			
 
				+        # Handle 'urls' field (newline-separated)
			
 
				+        urls_field = record.get('urls')
			
 
				+        if urls_field:
			
 
				+            for line in urls_field.split('\n'):
			
 
				+                line = line.strip()
			
 
				+                if line and not line.startswith('#'):
			
 
				+                    url_list.append(line)
			
 
				+
			
 
				+    # Output pass-through records first
			
 
				+    if not is_tty:
			
 
				+        for record in pass_through_records:
			
 
				+            write_record(record)
			
 
				+
			
 
				     if not url_list:
			
 
				+        if pass_through_records:
			
 
				+            # If we had pass-through records but no URLs, that's OK
			
 
				+            rprint(f'[dim]Passed through {len(pass_through_records)} records, no new URLs[/dim]', file=sys.stderr)
			
 
				+            return 0
			
 
				         rprint('[red]No valid URLs found[/red]', file=sys.stderr)
			
 
				         return 1
			
 
				 
			
--- a/archivebox/cli/archivebox_machine.py
+++ b/archivebox/cli/archivebox_machine.py
@@ -28,21 +28,7 @@ from typing import Optional
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
--- a/archivebox/cli/archivebox_process.py
+++ b/archivebox/cli/archivebox_process.py
@@ -31,21 +31,7 @@ from typing import Optional
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
--- a/archivebox/cli/archivebox_run.py
+++ b/archivebox/cli/archivebox_run.py
@@ -38,58 +38,110 @@ def process_stdin_records() -> int:
 
				     """
			
 
				     Process JSONL records from stdin.
			
 
				 
			
 
				-    Reads records, queues them for processing, then runs orchestrator until complete.
			
 
				-    Handles any record type: Crawl, Snapshot, ArchiveResult, etc.
			
 
				+    Create-or-update behavior:
			
 
				+    - Records WITHOUT id: Create via Model.from_json(), then queue
			
 
				+    - Records WITH id: Lookup existing, re-queue for processing
			
 
				+
			
 
				+    Outputs JSONL of all processed records (for chaining).
			
 
				+
			
 
				+    Handles any record type: Crawl, Snapshot, ArchiveResult.
			
 
				+    Auto-cascades: Crawl → Snapshots → ArchiveResults.
			
 
				 
			
 
				     Returns exit code (0 = success, 1 = error).
			
 
				     """
			
 
				     from django.utils import timezone
			
 
				 
			
 
				-    from archivebox.misc.jsonl import read_stdin, TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
			
 
				+    from archivebox.misc.jsonl import read_stdin, write_record, TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
			
 
				+    from archivebox.base_models.models import get_or_create_system_user_pk
			
 
				     from archivebox.core.models import Snapshot, ArchiveResult
			
 
				     from archivebox.crawls.models import Crawl
			
 
				     from archivebox.workers.orchestrator import Orchestrator
			
 
				 
			
 
				     records = list(read_stdin())
			
 
				+    is_tty = sys.stdout.isatty()
			
 
				 
			
 
				     if not records:
			
 
				         return 0  # Nothing to process
			
 
				 
			
 
				+    created_by_id = get_or_create_system_user_pk()
			
 
				     queued_count = 0
			
 
				+    output_records = []
			
 
				 
			
 
				     for record in records:
			
 
				-        record_type = record.get('type')
			
 
				+        record_type = record.get('type', '')
			
 
				         record_id = record.get('id')
			
 
				 
			
 
				-        if not record_id:
			
 
				-            continue
			
 
				-
			
 
				         try:
			
 
				             if record_type == TYPE_CRAWL:
			
 
				-                crawl = Crawl.objects.get(id=record_id)
			
 
				-                if crawl.status in [Crawl.StatusChoices.QUEUED, Crawl.StatusChoices.STARTED]:
			
 
				+                if record_id:
			
 
				+                    # Existing crawl - re-queue
			
 
				+                    try:
			
 
				+                        crawl = Crawl.objects.get(id=record_id)
			
 
				+                    except Crawl.DoesNotExist:
			
 
				+                        crawl = Crawl.from_json(record, overrides={'created_by_id': created_by_id})
			
 
				+                else:
			
 
				+                    # New crawl - create it
			
 
				+                    crawl = Crawl.from_json(record, overrides={'created_by_id': created_by_id})
			
 
				+
			
 
				+                if crawl:
			
 
				                     crawl.retry_at = timezone.now()
			
 
				+                    if crawl.status not in [Crawl.StatusChoices.SEALED]:
			
 
				+                        crawl.status = Crawl.StatusChoices.QUEUED
			
 
				                     crawl.save()
			
 
				+                    output_records.append(crawl.to_json())
			
 
				                     queued_count += 1
			
 
				 
			
 
				-            elif record_type == TYPE_SNAPSHOT:
			
 
				-                snapshot = Snapshot.objects.get(id=record_id)
			
 
				-                if snapshot.status in [Snapshot.StatusChoices.QUEUED, Snapshot.StatusChoices.STARTED]:
			
 
				+            elif record_type == TYPE_SNAPSHOT or (record.get('url') and not record_type):
			
 
				+                if record_id:
			
 
				+                    # Existing snapshot - re-queue
			
 
				+                    try:
			
 
				+                        snapshot = Snapshot.objects.get(id=record_id)
			
 
				+                    except Snapshot.DoesNotExist:
			
 
				+                        snapshot = Snapshot.from_json(record, overrides={'created_by_id': created_by_id})
			
 
				+                else:
			
 
				+                    # New snapshot - create it
			
 
				+                    snapshot = Snapshot.from_json(record, overrides={'created_by_id': created_by_id})
			
 
				+
			
 
				+                if snapshot:
			
 
				                     snapshot.retry_at = timezone.now()
			
 
				+                    if snapshot.status not in [Snapshot.StatusChoices.SEALED]:
			
 
				+                        snapshot.status = Snapshot.StatusChoices.QUEUED
			
 
				                     snapshot.save()
			
 
				+                    output_records.append(snapshot.to_json())
			
 
				                     queued_count += 1
			
 
				 
			
 
				             elif record_type == TYPE_ARCHIVERESULT:
			
 
				-                archiveresult = ArchiveResult.objects.get(id=record_id)
			
 
				-                if archiveresult.status in [ArchiveResult.StatusChoices.QUEUED, ArchiveResult.StatusChoices.STARTED, ArchiveResult.StatusChoices.BACKOFF]:
			
 
				+                if record_id:
			
 
				+                    # Existing archiveresult - re-queue
			
 
				+                    try:
			
 
				+                        archiveresult = ArchiveResult.objects.get(id=record_id)
			
 
				+                    except ArchiveResult.DoesNotExist:
			
 
				+                        archiveresult = ArchiveResult.from_json(record)
			
 
				+                else:
			
 
				+                    # New archiveresult - create it
			
 
				+                    archiveresult = ArchiveResult.from_json(record)
			
 
				+
			
 
				+                if archiveresult:
			
 
				                     archiveresult.retry_at = timezone.now()
			
 
				+                    if archiveresult.status in [ArchiveResult.StatusChoices.FAILED, ArchiveResult.StatusChoices.SKIPPED, ArchiveResult.StatusChoices.BACKOFF]:
			
 
				+                        archiveresult.status = ArchiveResult.StatusChoices.QUEUED
			
 
				                     archiveresult.save()
			
 
				+                    output_records.append(archiveresult.to_json())
			
 
				                     queued_count += 1
			
 
				 
			
 
				-        except (Crawl.DoesNotExist, Snapshot.DoesNotExist, ArchiveResult.DoesNotExist):
			
 
				-            rprint(f'[yellow]Record not found: {record_type} {record_id}[/yellow]', file=sys.stderr)
			
 
				+            else:
			
 
				+                # Unknown type - pass through
			
 
				+                output_records.append(record)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            rprint(f'[yellow]Error processing record: {e}[/yellow]', file=sys.stderr)
			
 
				             continue
			
 
				 
			
 
				+    # Output all processed records (for chaining)
			
 
				+    if not is_tty:
			
 
				+        for rec in output_records:
			
 
				+            write_record(rec)
			
 
				+
			
 
				     if queued_count == 0:
			
 
				         rprint('[yellow]No records to process[/yellow]', file=sys.stderr)
			
 
				         return 0
			
--- a/archivebox/cli/archivebox_snapshot.py
+++ b/archivebox/cli/archivebox_snapshot.py
@@ -36,21 +36,7 @@ from typing import Optional, Iterable
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
@@ -66,13 +52,12 @@ def create_snapshots(
 
				 ) -> int:
			
 
				     """
			
 
				     Create Snapshots from URLs or stdin JSONL (Crawl or Snapshot records).
			
 
				+    Pass-through: Records that are not Crawl/Snapshot/URL are output unchanged.
			
 
				 
			
 
				     Exit codes:
			
 
				         0: Success
			
 
				         1: Failure
			
 
				     """
			
 
				-    from django.utils import timezone
			
 
				-
			
 
				     from archivebox.misc.jsonl import (
			
 
				         read_args_or_stdin, write_record,
			
 
				         TYPE_SNAPSHOT, TYPE_CRAWL
			
@@ -93,11 +78,17 @@ def create_snapshots(
 
				 
			
 
				     # Process each record - handle Crawls and plain URLs/Snapshots
			
 
				     created_snapshots = []
			
 
				+    pass_through_count = 0
			
 
				+
			
 
				     for record in records:
			
 
				-        record_type = record.get('type')
			
 
				+        record_type = record.get('type', '')
			
 
				 
			
 
				         try:
			
 
				             if record_type == TYPE_CRAWL:
			
 
				+                # Pass through the Crawl record itself first
			
 
				+                if not is_tty:
			
 
				+                    write_record(record)
			
 
				+
			
 
				                 # Input is a Crawl - get or create it, then create Snapshots for its URLs
			
 
				                 crawl = None
			
 
				                 crawl_id = record.get('id')
			
@@ -144,11 +135,20 @@ def create_snapshots(
 
				                     if not is_tty:
			
 
				                         write_record(snapshot.to_json())
			
 
				 
			
 
				+            else:
			
 
				+                # Pass-through: output records we don't handle
			
 
				+                if not is_tty:
			
 
				+                    write_record(record)
			
 
				+                pass_through_count += 1
			
 
				+
			
 
				         except Exception as e:
			
 
				             rprint(f'[red]Error creating snapshot: {e}[/red]', file=sys.stderr)
			
 
				             continue
			
 
				 
			
 
				     if not created_snapshots:
			
 
				+        if pass_through_count > 0:
			
 
				+            rprint(f'[dim]Passed through {pass_through_count} records, no new snapshots[/dim]', file=sys.stderr)
			
 
				+            return 0
			
 
				         rprint('[red]No snapshots created[/red]', file=sys.stderr)
			
 
				         return 1
			
 
				 
			
--- a/archivebox/cli/archivebox_tag.py
+++ b/archivebox/cli/archivebox_tag.py
@@ -36,21 +36,7 @@ from typing import Optional, Iterable
 
				 import rich_click as click
			
 
				 from rich import print as rprint
			
 
				 
			
 
				-
			
 
				-def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				-    """Apply Django-style filters from CLI kwargs to a QuerySet."""
			
 
				-    filters = {}
			
 
				-    for key, value in filter_kwargs.items():
			
 
				-        if value is not None and key not in ('limit', 'offset'):
			
 
				-            filters[key] = value
			
 
				-
			
 
				-    if filters:
			
 
				-        queryset = queryset.filter(**filters)
			
 
				-
			
 
				-    if limit:
			
 
				-        queryset = queryset[:limit]
			
 
				-
			
 
				-    return queryset
			
 
				+from archivebox.cli.cli_utils import apply_filters
			
 
				 
			
 
				 
			
 
				 # =============================================================================
			
--- a/archivebox/cli/cli_utils.py
+++ b/archivebox/cli/cli_utils.py
@@ -0,0 +1,46 @@
 
				+"""
			
 
				+Shared CLI utilities for ArchiveBox commands.
			
 
				+
			
 
				+This module contains common utilities used across multiple CLI commands,
			
 
				+extracted to avoid code duplication.
			
 
				+"""
			
 
				+
			
 
				+__package__ = 'archivebox.cli'
			
 
				+
			
 
				+from typing import Optional
			
 
				+
			
 
				+
			
 
				+def apply_filters(queryset, filter_kwargs: dict, limit: Optional[int] = None):
			
 
				+    """
			
 
				+    Apply Django-style filters from CLI kwargs to a QuerySet.
			
 
				+
			
 
				+    Supports: --status=queued, --url__icontains=example, --id__in=uuid1,uuid2
			
 
				+
			
 
				+    Args:
			
 
				+        queryset: Django QuerySet to filter
			
 
				+        filter_kwargs: Dict of filter key-value pairs from CLI
			
 
				+        limit: Optional limit on results
			
 
				+
			
 
				+    Returns:
			
 
				+        Filtered QuerySet
			
 
				+
			
 
				+    Example:
			
 
				+        queryset = Snapshot.objects.all()
			
 
				+        filter_kwargs = {'status': 'queued', 'url__icontains': 'example.com'}
			
 
				+        filtered = apply_filters(queryset, filter_kwargs, limit=10)
			
 
				+    """
			
 
				+    filters = {}
			
 
				+    for key, value in filter_kwargs.items():
			
 
				+        if value is None or key in ('limit', 'offset'):
			
 
				+            continue
			
 
				+        # Handle CSV lists for __in filters
			
 
				+        if key.endswith('__in') and isinstance(value, str):
			
 
				+            value = [v.strip() for v in value.split(',')]
			
 
				+        filters[key] = value
			
 
				+
			
 
				+    if filters:
			
 
				+        queryset = queryset.filter(**filters)
			
 
				+    if limit:
			
 
				+        queryset = queryset[:limit]
			
 
				+
			
 
				+    return queryset
			
--- a/archivebox/cli/tests_piping.py
+++ b/archivebox/cli/tests_piping.py
@@ -957,5 +957,129 @@ class TestEdgeCases(unittest.TestCase):
 
				         self.assertEqual(urls[2], 'https://url3.com')
			
 
				 
			
 
				 
			
 
				+# =============================================================================
			
 
				+# Pass-Through Behavior Tests
			
 
				+# =============================================================================
			
 
				+
			
 
				+class TestPassThroughBehavior(unittest.TestCase):
			
 
				+    """Test pass-through behavior in CLI commands."""
			
 
				+
			
 
				+    def test_crawl_passes_through_other_types(self):
			
 
				+        """crawl create should pass through records with other types."""
			
 
				+        from archivebox.misc.jsonl import TYPE_CRAWL
			
 
				+
			
 
				+        # Input: a Tag record (not a Crawl or URL)
			
 
				+        tag_record = {'type': 'Tag', 'id': 'test-tag', 'name': 'example'}
			
 
				+        url_record = {'url': 'https://example.com'}
			
 
				+
			
 
				+        # Mock stdin with both records
			
 
				+        stdin = StringIO(
			
 
				+            json.dumps(tag_record) + '\n' +
			
 
				+            json.dumps(url_record)
			
 
				+        )
			
 
				+        stdin.isatty = lambda: False
			
 
				+
			
 
				+        # The Tag should be passed through, the URL should create a Crawl
			
 
				+        # (This is a unit test of the pass-through logic)
			
 
				+        from archivebox.misc.jsonl import read_args_or_stdin
			
 
				+        records = list(read_args_or_stdin((), stream=stdin))
			
 
				+
			
 
				+        self.assertEqual(len(records), 2)
			
 
				+        # First record is a Tag (other type)
			
 
				+        self.assertEqual(records[0]['type'], 'Tag')
			
 
				+        # Second record has a URL
			
 
				+        self.assertIn('url', records[1])
			
 
				+
			
 
				+    def test_snapshot_passes_through_crawl(self):
			
 
				+        """snapshot create should pass through Crawl records."""
			
 
				+        from archivebox.misc.jsonl import TYPE_CRAWL, TYPE_SNAPSHOT
			
 
				+
			
 
				+        crawl_record = {
			
 
				+            'type': TYPE_CRAWL,
			
 
				+            'id': 'test-crawl',
			
 
				+            'urls': 'https://example.com',
			
 
				+        }
			
 
				+
			
 
				+        # Crawl records should be passed through AND create snapshots
			
 
				+        # This tests the accumulation behavior
			
 
				+        self.assertEqual(crawl_record['type'], TYPE_CRAWL)
			
 
				+        self.assertIn('urls', crawl_record)
			
 
				+
			
 
				+    def test_archiveresult_passes_through_snapshot(self):
			
 
				+        """archiveresult create should pass through Snapshot records."""
			
 
				+        from archivebox.misc.jsonl import TYPE_SNAPSHOT
			
 
				+
			
 
				+        snapshot_record = {
			
 
				+            'type': TYPE_SNAPSHOT,
			
 
				+            'id': 'test-snapshot',
			
 
				+            'url': 'https://example.com',
			
 
				+        }
			
 
				+
			
 
				+        # Snapshot records should be passed through
			
 
				+        self.assertEqual(snapshot_record['type'], TYPE_SNAPSHOT)
			
 
				+        self.assertIn('url', snapshot_record)
			
 
				+
			
 
				+    def test_run_passes_through_unknown_types(self):
			
 
				+        """run should pass through records with unknown types."""
			
 
				+        unknown_record = {'type': 'Unknown', 'id': 'test', 'data': 'value'}
			
 
				+
			
 
				+        # Unknown types should be passed through unchanged
			
 
				+        self.assertEqual(unknown_record['type'], 'Unknown')
			
 
				+        self.assertIn('data', unknown_record)
			
 
				+
			
 
				+
			
 
				+class TestPipelineAccumulation(unittest.TestCase):
			
 
				+    """Test that pipelines accumulate records correctly."""
			
 
				+
			
 
				+    def test_full_pipeline_output_types(self):
			
 
				+        """Full pipeline should output all record types."""
			
 
				+        from archivebox.misc.jsonl import TYPE_CRAWL, TYPE_SNAPSHOT, TYPE_ARCHIVERESULT
			
 
				+
			
 
				+        # Simulated pipeline output after: crawl | snapshot | archiveresult | run
			
 
				+        # Should contain Crawl, Snapshot, and ArchiveResult records
			
 
				+        pipeline_output = [
			
 
				+            {'type': TYPE_CRAWL, 'id': 'c1', 'urls': 'https://example.com'},
			
 
				+            {'type': TYPE_SNAPSHOT, 'id': 's1', 'url': 'https://example.com'},
			
 
				+            {'type': TYPE_ARCHIVERESULT, 'id': 'ar1', 'plugin': 'title'},
			
 
				+        ]
			
 
				+
			
 
				+        types = {r['type'] for r in pipeline_output}
			
 
				+        self.assertIn(TYPE_CRAWL, types)
			
 
				+        self.assertIn(TYPE_SNAPSHOT, types)
			
 
				+        self.assertIn(TYPE_ARCHIVERESULT, types)
			
 
				+
			
 
				+    def test_pipeline_preserves_ids(self):
			
 
				+        """Pipeline should preserve record IDs through all stages."""
			
 
				+        records = [
			
 
				+            {'type': 'Crawl', 'id': 'c1', 'urls': 'https://example.com'},
			
 
				+            {'type': 'Snapshot', 'id': 's1', 'url': 'https://example.com'},
			
 
				+        ]
			
 
				+
			
 
				+        # All records should have IDs
			
 
				+        for record in records:
			
 
				+            self.assertIn('id', record)
			
 
				+            self.assertTrue(record['id'])
			
 
				+
			
 
				+    def test_jq_transform_pattern(self):
			
 
				+        """Test pattern for jq transforms in pipeline."""
			
 
				+        # Simulated: archiveresult list --status=failed | jq 'del(.id) | .status = "queued"'
			
 
				+        failed_record = {
			
 
				+            'type': 'ArchiveResult',
			
 
				+            'id': 'ar1',
			
 
				+            'status': 'failed',
			
 
				+            'plugin': 'wget',
			
 
				+        }
			
 
				+
			
 
				+        # Transform: delete id, set status to queued
			
 
				+        transformed = {
			
 
				+            'type': failed_record['type'],
			
 
				+            'status': 'queued',
			
 
				+            'plugin': failed_record['plugin'],
			
 
				+        }
			
 
				+
			
 
				+        self.assertNotIn('id', transformed)
			
 
				+        self.assertEqual(transformed['status'], 'queued')
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     unittest.main()
			
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1460,7 +1460,7 @@ class Snapshot(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHea
 
				             'crawl_id': str(self.crawl_id),
			
 
				             'url': self.url,
			
 
				             'title': self.title,
			
 
				-            'tags': self.tags_str(),
			
 
				+            'tags_str': self.tags_str(),
			
 
				             'bookmarked_at': self.bookmarked_at.isoformat() if self.bookmarked_at else None,
			
 
				             'created_at': self.created_at.isoformat() if self.created_at else None,
			
 
				             'timestamp': self.timestamp,
			
@@ -2418,6 +2418,96 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 
				         if process and self.process:
			
 
				             yield from self.process.to_jsonl(seen=seen, **kwargs)
			
 
				 
			
 
				+    @classmethod
			
 
				+    def from_jsonl(cls, records, overrides: Dict[str, Any] = None) -> list['ArchiveResult']:
			
 
				+        """
			
 
				+        Create/update ArchiveResults from an iterable of JSONL records.
			
 
				+        Filters to only records with type='ArchiveResult'.
			
 
				+
			
 
				+        Args:
			
 
				+            records: Iterable of dicts (JSONL records)
			
 
				+            overrides: Dict of field overrides
			
 
				+
			
 
				+        Returns:
			
 
				+            List of ArchiveResult instances (skips None results)
			
 
				+        """
			
 
				+        results = []
			
 
				+        for record in records:
			
 
				+            record_type = record.get('type', cls.JSONL_TYPE)
			
 
				+            if record_type == cls.JSONL_TYPE:
			
 
				+                instance = cls.from_json(record, overrides=overrides)
			
 
				+                if instance:
			
 
				+                    results.append(instance)
			
 
				+        return results
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def from_json(record: Dict[str, Any], overrides: Dict[str, Any] = None) -> 'ArchiveResult | None':
			
 
				+        """
			
 
				+        Create or update a single ArchiveResult from a JSON record dict.
			
 
				+
			
 
				+        Args:
			
 
				+            record: Dict with 'snapshot_id' and 'plugin' (required for create),
			
 
				+                    or 'id' (for update)
			
 
				+            overrides: Dict of field overrides (e.g., config overrides)
			
 
				+
			
 
				+        Returns:
			
 
				+            ArchiveResult instance or None if invalid
			
 
				+        """
			
 
				+        from django.utils import timezone
			
 
				+
			
 
				+        overrides = overrides or {}
			
 
				+
			
 
				+        # If 'id' is provided, lookup and update existing
			
 
				+        result_id = record.get('id')
			
 
				+        if result_id:
			
 
				+            try:
			
 
				+                result = ArchiveResult.objects.get(id=result_id)
			
 
				+                # Update fields from record
			
 
				+                if record.get('status'):
			
 
				+                    result.status = record['status']
			
 
				+                    result.retry_at = timezone.now()
			
 
				+                result.save()
			
 
				+                return result
			
 
				+            except ArchiveResult.DoesNotExist:
			
 
				+                pass  # Fall through to create
			
 
				+
			
 
				+        # Required fields for creation
			
 
				+        snapshot_id = record.get('snapshot_id')
			
 
				+        plugin = record.get('plugin')
			
 
				+
			
 
				+        if not snapshot_id or not plugin:
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            snapshot = Snapshot.objects.get(id=snapshot_id)
			
 
				+        except Snapshot.DoesNotExist:
			
 
				+            return None
			
 
				+
			
 
				+        # Check if result already exists for this snapshot+plugin
			
 
				+        existing = ArchiveResult.objects.filter(
			
 
				+            snapshot=snapshot,
			
 
				+            plugin=plugin,
			
 
				+        ).first()
			
 
				+
			
 
				+        if existing:
			
 
				+            # Update existing result if status provided
			
 
				+            if record.get('status'):
			
 
				+                existing.status = record['status']
			
 
				+                existing.retry_at = timezone.now()
			
 
				+                existing.save()
			
 
				+            return existing
			
 
				+
			
 
				+        # Create new ArchiveResult
			
 
				+        result = ArchiveResult(
			
 
				+            snapshot=snapshot,
			
 
				+            plugin=plugin,
			
 
				+            status=record.get('status', ArchiveResult.StatusChoices.QUEUED),
			
 
				+            retry_at=timezone.now(),
			
 
				+            hook_name=record.get('hook_name', ''),
			
 
				+        )
			
 
				+        result.save()
			
 
				+        return result
			
 
				+
			
 
				     def save(self, *args, **kwargs):
			
 
				         is_new = self._state.adding
			
 
				 
			
--- a/archivebox/tests/conftest.py
+++ b/archivebox/tests/conftest.py
@@ -0,0 +1,218 @@
 
				+"""archivebox/tests/conftest.py - Pytest fixtures for CLI tests."""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import json
			
 
				+import subprocess
			
 
				+from pathlib import Path
			
 
				+from typing import List, Dict, Any, Optional, Tuple
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# Fixtures
			
 
				+# =============================================================================
			
 
				+
			
 
				[email protected]
			
 
				+def isolated_data_dir(tmp_path, settings):
			
 
				+    """
			
 
				+    Create isolated DATA_DIR for each test.
			
 
				+
			
 
				+    Uses tmp_path for isolation, configures Django settings.
			
 
				+    """
			
 
				+    data_dir = tmp_path / 'archivebox_data'
			
 
				+    data_dir.mkdir()
			
 
				+
			
 
				+    # Set environment for subprocess calls
			
 
				+    os.environ['DATA_DIR'] = str(data_dir)
			
 
				+
			
 
				+    # Update Django settings
			
 
				+    settings.DATA_DIR = data_dir
			
 
				+
			
 
				+    yield data_dir
			
 
				+
			
 
				+    # Cleanup handled by tmp_path fixture
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def initialized_archive(isolated_data_dir):
			
 
				+    """
			
 
				+    Initialize ArchiveBox archive in isolated directory.
			
 
				+
			
 
				+    Runs `archivebox init` to set up database and directories.
			
 
				+    """
			
 
				+    from archivebox.cli.archivebox_init import init
			
 
				+    init(setup=True, quick=True)
			
 
				+    return isolated_data_dir
			
 
				+
			
 
				+
			
 
				[email protected]
			
 
				+def cli_env(initialized_archive):
			
 
				+    """
			
 
				+    Environment dict for CLI subprocess calls.
			
 
				+
			
 
				+    Includes DATA_DIR and disables slow extractors.
			
 
				+    """
			
 
				+    return {
			
 
				+        **os.environ,
			
 
				+        'DATA_DIR': str(initialized_archive),
			
 
				+        'USE_COLOR': 'False',
			
 
				+        'SHOW_PROGRESS': 'False',
			
 
				+        'SAVE_TITLE': 'True',
			
 
				+        'SAVE_FAVICON': 'False',
			
 
				+        'SAVE_WGET': 'False',
			
 
				+        'SAVE_WARC': 'False',
			
 
				+        'SAVE_PDF': 'False',
			
 
				+        'SAVE_SCREENSHOT': 'False',
			
 
				+        'SAVE_DOM': 'False',
			
 
				+        'SAVE_SINGLEFILE': 'False',
			
 
				+        'SAVE_READABILITY': 'False',
			
 
				+        'SAVE_MERCURY': 'False',
			
 
				+        'SAVE_GIT': 'False',
			
 
				+        'SAVE_YTDLP': 'False',
			
 
				+        'SAVE_HEADERS': 'False',
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# CLI Helpers
			
 
				+# =============================================================================
			
 
				+
			
 
				+def run_archivebox_cmd(
			
 
				+    args: List[str],
			
 
				+    stdin: Optional[str] = None,
			
 
				+    cwd: Optional[Path] = None,
			
 
				+    env: Optional[Dict[str, str]] = None,
			
 
				+    timeout: int = 60,
			
 
				+) -> Tuple[str, str, int]:
			
 
				+    """
			
 
				+    Run archivebox command, return (stdout, stderr, returncode).
			
 
				+
			
 
				+    Args:
			
 
				+        args: Command arguments (e.g., ['crawl', 'create', 'https://example.com'])
			
 
				+        stdin: Optional string to pipe to stdin
			
 
				+        cwd: Working directory (defaults to DATA_DIR from env)
			
 
				+        env: Environment variables (defaults to os.environ with DATA_DIR)
			
 
				+        timeout: Command timeout in seconds
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple of (stdout, stderr, returncode)
			
 
				+    """
			
 
				+    cmd = [sys.executable, '-m', 'archivebox'] + args
			
 
				+
			
 
				+    env = env or {**os.environ}
			
 
				+    cwd = cwd or Path(env.get('DATA_DIR', '.'))
			
 
				+
			
 
				+    result = subprocess.run(
			
 
				+        cmd,
			
 
				+        input=stdin,
			
 
				+        capture_output=True,
			
 
				+        text=True,
			
 
				+        cwd=cwd,
			
 
				+        env=env,
			
 
				+        timeout=timeout,
			
 
				+    )
			
 
				+
			
 
				+    return result.stdout, result.stderr, result.returncode
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# Output Assertions
			
 
				+# =============================================================================
			
 
				+
			
 
				+def parse_jsonl_output(stdout: str) -> List[Dict[str, Any]]:
			
 
				+    """Parse JSONL output into list of dicts."""
			
 
				+    records = []
			
 
				+    for line in stdout.strip().split('\n'):
			
 
				+        line = line.strip()
			
 
				+        if line and line.startswith('{'):
			
 
				+            try:
			
 
				+                records.append(json.loads(line))
			
 
				+            except json.JSONDecodeError:
			
 
				+                pass
			
 
				+    return records
			
 
				+
			
 
				+
			
 
				+def assert_jsonl_contains_type(stdout: str, record_type: str, min_count: int = 1):
			
 
				+    """Assert output contains at least min_count records of type."""
			
 
				+    records = parse_jsonl_output(stdout)
			
 
				+    matching = [r for r in records if r.get('type') == record_type]
			
 
				+    assert len(matching) >= min_count, \
			
 
				+        f"Expected >= {min_count} {record_type}, got {len(matching)}"
			
 
				+    return matching
			
 
				+
			
 
				+
			
 
				+def assert_jsonl_pass_through(stdout: str, input_records: List[Dict[str, Any]]):
			
 
				+    """Assert that input records appear in output (pass-through behavior)."""
			
 
				+    output_records = parse_jsonl_output(stdout)
			
 
				+    output_ids = {r.get('id') for r in output_records if r.get('id')}
			
 
				+
			
 
				+    for input_rec in input_records:
			
 
				+        input_id = input_rec.get('id')
			
 
				+        if input_id:
			
 
				+            assert input_id in output_ids, \
			
 
				+                f"Input record {input_id} not found in output (pass-through failed)"
			
 
				+
			
 
				+
			
 
				+def assert_record_has_fields(record: Dict[str, Any], required_fields: List[str]):
			
 
				+    """Assert record has all required fields with non-None values."""
			
 
				+    for field in required_fields:
			
 
				+        assert field in record, f"Record missing field: {field}"
			
 
				+        assert record[field] is not None, f"Record field is None: {field}"
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# Database Assertions
			
 
				+# =============================================================================
			
 
				+
			
 
				+def assert_db_count(model_class, filters: Dict[str, Any], expected: int):
			
 
				+    """Assert database count matches expected."""
			
 
				+    actual = model_class.objects.filter(**filters).count()
			
 
				+    assert actual == expected, \
			
 
				+        f"Expected {expected} {model_class.__name__}, got {actual}"
			
 
				+
			
 
				+
			
 
				+def assert_db_exists(model_class, **filters):
			
 
				+    """Assert at least one record exists matching filters."""
			
 
				+    assert model_class.objects.filter(**filters).exists(), \
			
 
				+        f"No {model_class.__name__} found matching {filters}"
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# Test Data Factories
			
 
				+# =============================================================================
			
 
				+
			
 
				+def create_test_url(domain: str = 'example.com', path: str = None) -> str:
			
 
				+    """Generate unique test URL."""
			
 
				+    import uuid
			
 
				+    path = path or uuid.uuid4().hex[:8]
			
 
				+    return f'https://{domain}/{path}'
			
 
				+
			
 
				+
			
 
				+def create_test_crawl_json(urls: List[str] = None, **kwargs) -> Dict[str, Any]:
			
 
				+    """Create Crawl JSONL record for testing."""
			
 
				+    from archivebox.misc.jsonl import TYPE_CRAWL
			
 
				+
			
 
				+    urls = urls or [create_test_url()]
			
 
				+    return {
			
 
				+        'type': TYPE_CRAWL,
			
 
				+        'urls': '\n'.join(urls),
			
 
				+        'max_depth': kwargs.get('max_depth', 0),
			
 
				+        'tags_str': kwargs.get('tags_str', ''),
			
 
				+        'status': kwargs.get('status', 'queued'),
			
 
				+        **{k: v for k, v in kwargs.items() if k not in ('max_depth', 'tags_str', 'status')},
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def create_test_snapshot_json(url: str = None, **kwargs) -> Dict[str, Any]:
			
 
				+    """Create Snapshot JSONL record for testing."""
			
 
				+    from archivebox.misc.jsonl import TYPE_SNAPSHOT
			
 
				+
			
 
				+    return {
			
 
				+        'type': TYPE_SNAPSHOT,
			
 
				+        'url': url or create_test_url(),
			
 
				+        'tags_str': kwargs.get('tags_str', ''),
			
 
				+        'status': kwargs.get('status', 'queued'),
			
 
				+        **{k: v for k, v in kwargs.items() if k not in ('tags_str', 'status')},
			
 
				+    }
			
--- a/archivebox/tests/test_cli_archiveresult.py
+++ b/archivebox/tests/test_cli_archiveresult.py
@@ -0,0 +1,264 @@
 
				+"""
			
 
				+Tests for archivebox archiveresult CLI command.
			
 
				+
			
 
				+Tests cover:
			
 
				+- archiveresult create (from Snapshot JSONL, with --plugin, pass-through)
			
 
				+- archiveresult list (with filters)
			
 
				+- archiveresult update
			
 
				+- archiveresult delete
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import pytest
			
 
				+
			
 
				+from archivebox.tests.conftest import (
			
 
				+    run_archivebox_cmd,
			
 
				+    parse_jsonl_output,
			
 
				+    create_test_url,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class TestArchiveResultCreate:
			
 
				+    """Tests for `archivebox archiveresult create`."""
			
 
				+
			
 
				+    def test_create_from_snapshot_jsonl(self, cli_env, initialized_archive):
			
 
				+        """Create archive results from Snapshot JSONL input."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        # Create a snapshot first
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        # Pipe snapshot to archiveresult create
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        # Should have the Snapshot passed through and ArchiveResult created
			
 
				+        types = [r.get('type') for r in records]
			
 
				+        assert 'Snapshot' in types
			
 
				+        assert 'ArchiveResult' in types
			
 
				+
			
 
				+        ar = next(r for r in records if r['type'] == 'ArchiveResult')
			
 
				+        assert ar['plugin'] == 'title'
			
 
				+
			
 
				+    def test_create_with_specific_plugin(self, cli_env, initialized_archive):
			
 
				+        """Create archive result for specific plugin."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=screenshot'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        ar_records = [r for r in records if r.get('type') == 'ArchiveResult']
			
 
				+        assert len(ar_records) >= 1
			
 
				+        assert ar_records[0]['plugin'] == 'screenshot'
			
 
				+
			
 
				+    def test_create_pass_through_crawl(self, cli_env, initialized_archive):
			
 
				+        """Pass-through Crawl records unchanged."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        # Create crawl and snapshot
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, _, _ = run_archivebox_cmd(
			
 
				+            ['snapshot', 'create'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        # Now pipe all to archiveresult create
			
 
				+        stdout3, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=stdout2,
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout3)
			
 
				+
			
 
				+        types = [r.get('type') for r in records]
			
 
				+        assert 'Crawl' in types
			
 
				+        assert 'Snapshot' in types
			
 
				+        assert 'ArchiveResult' in types
			
 
				+
			
 
				+    def test_create_pass_through_only_when_no_snapshots(self, cli_env, initialized_archive):
			
 
				+        """Only pass-through records but no new snapshots returns success."""
			
 
				+        crawl_record = {'type': 'Crawl', 'id': 'fake-id', 'urls': 'https://example.com'}
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create'],
			
 
				+            stdin=json.dumps(crawl_record),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Passed through' in stderr
			
 
				+
			
 
				+
			
 
				+class TestArchiveResultList:
			
 
				+    """Tests for `archivebox archiveresult list`."""
			
 
				+
			
 
				+    def test_list_empty(self, cli_env, initialized_archive):
			
 
				+        """List with no archive results returns empty."""
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'list'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Listed 0 archive results' in stderr
			
 
				+
			
 
				+    def test_list_filter_by_status(self, cli_env, initialized_archive):
			
 
				+        """Filter archive results by status."""
			
 
				+        # Create snapshot and archive result
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+        run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'list', '--status=queued'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        for r in records:
			
 
				+            assert r['status'] == 'queued'
			
 
				+
			
 
				+    def test_list_filter_by_plugin(self, cli_env, initialized_archive):
			
 
				+        """Filter archive results by plugin."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+        run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'list', '--plugin=title'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        for r in records:
			
 
				+            assert r['plugin'] == 'title'
			
 
				+
			
 
				+    def test_list_with_limit(self, cli_env, initialized_archive):
			
 
				+        """Limit number of results."""
			
 
				+        # Create multiple archive results
			
 
				+        for _ in range(3):
			
 
				+            url = create_test_url()
			
 
				+            stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+            snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+            run_archivebox_cmd(
			
 
				+                ['archiveresult', 'create', '--plugin=title'],
			
 
				+                stdin=json.dumps(snapshot),
			
 
				+                env=cli_env,
			
 
				+            )
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'list', '--limit=2'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 2
			
 
				+
			
 
				+
			
 
				+class TestArchiveResultUpdate:
			
 
				+    """Tests for `archivebox archiveresult update`."""
			
 
				+
			
 
				+    def test_update_status(self, cli_env, initialized_archive):
			
 
				+        """Update archive result status."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, _, _ = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
			
 
				+
			
 
				+        stdout3, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'update', '--status=failed'],
			
 
				+            stdin=json.dumps(ar),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Updated 1 archive results' in stderr
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout3)
			
 
				+        assert records[0]['status'] == 'failed'
			
 
				+
			
 
				+
			
 
				+class TestArchiveResultDelete:
			
 
				+    """Tests for `archivebox archiveresult delete`."""
			
 
				+
			
 
				+    def test_delete_requires_yes(self, cli_env, initialized_archive):
			
 
				+        """Delete requires --yes flag."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, _, _ = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'delete'],
			
 
				+            stdin=json.dumps(ar),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 1
			
 
				+        assert '--yes' in stderr
			
 
				+
			
 
				+    def test_delete_with_yes(self, cli_env, initialized_archive):
			
 
				+        """Delete with --yes flag works."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, _, _ = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'delete', '--yes'],
			
 
				+            stdin=json.dumps(ar),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Deleted 1 archive results' in stderr
			
--- a/archivebox/tests/test_cli_crawl.py
+++ b/archivebox/tests/test_cli_crawl.py
@@ -0,0 +1,261 @@
 
				+"""
			
 
				+Tests for archivebox crawl CLI command.
			
 
				+
			
 
				+Tests cover:
			
 
				+- crawl create (with URLs, from stdin, pass-through)
			
 
				+- crawl list (with filters)
			
 
				+- crawl update
			
 
				+- crawl delete
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import pytest
			
 
				+
			
 
				+from archivebox.tests.conftest import (
			
 
				+    run_archivebox_cmd,
			
 
				+    parse_jsonl_output,
			
 
				+    assert_jsonl_contains_type,
			
 
				+    create_test_url,
			
 
				+    create_test_crawl_json,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class TestCrawlCreate:
			
 
				+    """Tests for `archivebox crawl create`."""
			
 
				+
			
 
				+    def test_create_from_url_args(self, cli_env, initialized_archive):
			
 
				+        """Create crawl from URL arguments."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'create', url],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+        assert 'Created crawl' in stderr
			
 
				+
			
 
				+        # Check JSONL output
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 1
			
 
				+        assert records[0]['type'] == 'Crawl'
			
 
				+        assert url in records[0]['urls']
			
 
				+
			
 
				+    def test_create_from_stdin_urls(self, cli_env, initialized_archive):
			
 
				+        """Create crawl from stdin URLs (one per line)."""
			
 
				+        urls = [create_test_url() for _ in range(3)]
			
 
				+        stdin = '\n'.join(urls)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'create'],
			
 
				+            stdin=stdin,
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 1
			
 
				+        crawl = records[0]
			
 
				+        assert crawl['type'] == 'Crawl'
			
 
				+        # All URLs should be in the crawl
			
 
				+        for url in urls:
			
 
				+            assert url in crawl['urls']
			
 
				+
			
 
				+    def test_create_with_depth(self, cli_env, initialized_archive):
			
 
				+        """Create crawl with --depth flag."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'create', '--depth=2', url],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert records[0]['max_depth'] == 2
			
 
				+
			
 
				+    def test_create_with_tag(self, cli_env, initialized_archive):
			
 
				+        """Create crawl with --tag flag."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'create', '--tag=test-tag', url],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert 'test-tag' in records[0].get('tags_str', '')
			
 
				+
			
 
				+    def test_create_pass_through_other_types(self, cli_env, initialized_archive):
			
 
				+        """Pass-through records of other types unchanged."""
			
 
				+        tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
			
 
				+        url = create_test_url()
			
 
				+        stdin = json.dumps(tag_record) + '\n' + json.dumps({'url': url})
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'create'],
			
 
				+            stdin=stdin,
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+
			
 
				+        # Should have both the passed-through Tag and the new Crawl
			
 
				+        types = [r.get('type') for r in records]
			
 
				+        assert 'Tag' in types
			
 
				+        assert 'Crawl' in types
			
 
				+
			
 
				+    def test_create_pass_through_existing_crawl(self, cli_env, initialized_archive):
			
 
				+        """Existing Crawl records (with id) are passed through."""
			
 
				+        # First create a crawl
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        # Now pipe it back - should pass through
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'create'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        assert len(records) == 1
			
 
				+        assert records[0]['id'] == crawl['id']
			
 
				+
			
 
				+
			
 
				+class TestCrawlList:
			
 
				+    """Tests for `archivebox crawl list`."""
			
 
				+
			
 
				+    def test_list_empty(self, cli_env, initialized_archive):
			
 
				+        """List with no crawls returns empty."""
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'list'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Listed 0 crawls' in stderr
			
 
				+
			
 
				+    def test_list_returns_created(self, cli_env, initialized_archive):
			
 
				+        """List returns previously created crawls."""
			
 
				+        url = create_test_url()
			
 
				+        run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'list'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) >= 1
			
 
				+        assert any(url in r.get('urls', '') for r in records)
			
 
				+
			
 
				+    def test_list_filter_by_status(self, cli_env, initialized_archive):
			
 
				+        """Filter crawls by status."""
			
 
				+        url = create_test_url()
			
 
				+        run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'list', '--status=queued'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        for r in records:
			
 
				+            assert r['status'] == 'queued'
			
 
				+
			
 
				+    def test_list_with_limit(self, cli_env, initialized_archive):
			
 
				+        """Limit number of results."""
			
 
				+        # Create multiple crawls
			
 
				+        for _ in range(3):
			
 
				+            run_archivebox_cmd(['crawl', 'create', create_test_url()], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'list', '--limit=2'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 2
			
 
				+
			
 
				+
			
 
				+class TestCrawlUpdate:
			
 
				+    """Tests for `archivebox crawl update`."""
			
 
				+
			
 
				+    def test_update_status(self, cli_env, initialized_archive):
			
 
				+        """Update crawl status."""
			
 
				+        # Create a crawl
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        # Update it
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'update', '--status=started'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Updated 1 crawls' in stderr
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        assert records[0]['status'] == 'started'
			
 
				+
			
 
				+
			
 
				+class TestCrawlDelete:
			
 
				+    """Tests for `archivebox crawl delete`."""
			
 
				+
			
 
				+    def test_delete_requires_yes(self, cli_env, initialized_archive):
			
 
				+        """Delete requires --yes flag."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'delete'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 1
			
 
				+        assert '--yes' in stderr
			
 
				+
			
 
				+    def test_delete_with_yes(self, cli_env, initialized_archive):
			
 
				+        """Delete with --yes flag works."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'delete', '--yes'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Deleted 1 crawls' in stderr
			
 
				+
			
 
				+    def test_delete_dry_run(self, cli_env, initialized_archive):
			
 
				+        """Dry run shows what would be deleted."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['crawl', 'delete', '--dry-run'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Would delete' in stderr
			
 
				+        assert 'dry run' in stderr.lower()
			
--- a/archivebox/tests/test_cli_run.py
+++ b/archivebox/tests/test_cli_run.py
@@ -0,0 +1,254 @@
 
				+"""
			
 
				+Tests for archivebox run CLI command.
			
 
				+
			
 
				+Tests cover:
			
 
				+- run with stdin JSONL (Crawl, Snapshot, ArchiveResult)
			
 
				+- create-or-update behavior (records with/without id)
			
 
				+- pass-through output (for chaining)
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import pytest
			
 
				+
			
 
				+from archivebox.tests.conftest import (
			
 
				+    run_archivebox_cmd,
			
 
				+    parse_jsonl_output,
			
 
				+    create_test_url,
			
 
				+    create_test_crawl_json,
			
 
				+    create_test_snapshot_json,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class TestRunWithCrawl:
			
 
				+    """Tests for `archivebox run` with Crawl input."""
			
 
				+
			
 
				+    def test_run_with_new_crawl(self, cli_env, initialized_archive):
			
 
				+        """Run creates and processes a new Crawl (no id)."""
			
 
				+        crawl_record = create_test_crawl_json()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(crawl_record),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+
			
 
				+        # Should output the created Crawl
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        crawl_records = [r for r in records if r.get('type') == 'Crawl']
			
 
				+        assert len(crawl_records) >= 1
			
 
				+        assert crawl_records[0].get('id')  # Should have an id now
			
 
				+
			
 
				+    def test_run_with_existing_crawl(self, cli_env, initialized_archive):
			
 
				+        """Run re-queues an existing Crawl (with id)."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        # First create a crawl
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        # Run with the existing crawl
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        assert len(records) >= 1
			
 
				+
			
 
				+
			
 
				+class TestRunWithSnapshot:
			
 
				+    """Tests for `archivebox run` with Snapshot input."""
			
 
				+
			
 
				+    def test_run_with_new_snapshot(self, cli_env, initialized_archive):
			
 
				+        """Run creates and processes a new Snapshot (no id, just url)."""
			
 
				+        snapshot_record = create_test_snapshot_json()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(snapshot_record),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        snapshot_records = [r for r in records if r.get('type') == 'Snapshot']
			
 
				+        assert len(snapshot_records) >= 1
			
 
				+        assert snapshot_records[0].get('id')
			
 
				+
			
 
				+    def test_run_with_existing_snapshot(self, cli_env, initialized_archive):
			
 
				+        """Run re-queues an existing Snapshot (with id)."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        # First create a snapshot
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        # Run with the existing snapshot
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        assert len(records) >= 1
			
 
				+
			
 
				+    def test_run_with_plain_url(self, cli_env, initialized_archive):
			
 
				+        """Run accepts plain URL records (no type field)."""
			
 
				+        url = create_test_url()
			
 
				+        url_record = {'url': url}
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(url_record),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) >= 1
			
 
				+
			
 
				+
			
 
				+class TestRunWithArchiveResult:
			
 
				+    """Tests for `archivebox run` with ArchiveResult input."""
			
 
				+
			
 
				+    def test_run_requeues_failed_archiveresult(self, cli_env, initialized_archive):
			
 
				+        """Run re-queues a failed ArchiveResult."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        # Create snapshot and archive result
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, _, _ = run_archivebox_cmd(
			
 
				+            ['archiveresult', 'create', '--plugin=title'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+        ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
			
 
				+
			
 
				+        # Update to failed
			
 
				+        ar['status'] = 'failed'
			
 
				+        run_archivebox_cmd(
			
 
				+            ['archiveresult', 'update', '--status=failed'],
			
 
				+            stdin=json.dumps(ar),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        # Now run should re-queue it
			
 
				+        stdout3, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(ar),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout3)
			
 
				+        ar_records = [r for r in records if r.get('type') == 'ArchiveResult']
			
 
				+        assert len(ar_records) >= 1
			
 
				+
			
 
				+
			
 
				+class TestRunPassThrough:
			
 
				+    """Tests for pass-through behavior in `archivebox run`."""
			
 
				+
			
 
				+    def test_run_passes_through_unknown_types(self, cli_env, initialized_archive):
			
 
				+        """Run passes through records with unknown types."""
			
 
				+        unknown_record = {'type': 'Unknown', 'id': 'fake-id', 'data': 'test'}
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(unknown_record),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        unknown_records = [r for r in records if r.get('type') == 'Unknown']
			
 
				+        assert len(unknown_records) == 1
			
 
				+        assert unknown_records[0]['data'] == 'test'
			
 
				+
			
 
				+    def test_run_outputs_all_processed_records(self, cli_env, initialized_archive):
			
 
				+        """Run outputs all processed records for chaining."""
			
 
				+        url = create_test_url()
			
 
				+        crawl_record = create_test_crawl_json(urls=[url])
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(crawl_record),
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        # Should have at least the Crawl in output
			
 
				+        assert len(records) >= 1
			
 
				+
			
 
				+
			
 
				+class TestRunMixedInput:
			
 
				+    """Tests for `archivebox run` with mixed record types."""
			
 
				+
			
 
				+    def test_run_handles_mixed_types(self, cli_env, initialized_archive):
			
 
				+        """Run handles mixed Crawl/Snapshot/ArchiveResult input."""
			
 
				+        crawl = create_test_crawl_json()
			
 
				+        snapshot = create_test_snapshot_json()
			
 
				+        unknown = {'type': 'Tag', 'id': 'fake', 'name': 'test'}
			
 
				+
			
 
				+        stdin = '\n'.join([
			
 
				+            json.dumps(crawl),
			
 
				+            json.dumps(snapshot),
			
 
				+            json.dumps(unknown),
			
 
				+        ])
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=stdin,
			
 
				+            env=cli_env,
			
 
				+            timeout=120,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+
			
 
				+        types = set(r.get('type') for r in records)
			
 
				+        # Should have processed Crawl and Snapshot, passed through Tag
			
 
				+        assert 'Crawl' in types or 'Snapshot' in types or 'Tag' in types
			
 
				+
			
 
				+
			
 
				+class TestRunEmpty:
			
 
				+    """Tests for `archivebox run` edge cases."""
			
 
				+
			
 
				+    def test_run_empty_stdin(self, cli_env, initialized_archive):
			
 
				+        """Run with empty stdin returns success."""
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin='',
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+
			
 
				+    def test_run_no_records_to_process(self, cli_env, initialized_archive):
			
 
				+        """Run with only pass-through records shows message."""
			
 
				+        unknown = {'type': 'Unknown', 'id': 'fake'}
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['run'],
			
 
				+            stdin=json.dumps(unknown),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'No records to process' in stderr
			
--- a/archivebox/tests/test_cli_snapshot.py
+++ b/archivebox/tests/test_cli_snapshot.py
@@ -0,0 +1,274 @@
 
				+"""
			
 
				+Tests for archivebox snapshot CLI command.
			
 
				+
			
 
				+Tests cover:
			
 
				+- snapshot create (from URLs, from Crawl JSONL, pass-through)
			
 
				+- snapshot list (with filters)
			
 
				+- snapshot update
			
 
				+- snapshot delete
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import pytest
			
 
				+
			
 
				+from archivebox.tests.conftest import (
			
 
				+    run_archivebox_cmd,
			
 
				+    parse_jsonl_output,
			
 
				+    assert_jsonl_contains_type,
			
 
				+    create_test_url,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class TestSnapshotCreate:
			
 
				+    """Tests for `archivebox snapshot create`."""
			
 
				+
			
 
				+    def test_create_from_url_args(self, cli_env, initialized_archive):
			
 
				+        """Create snapshot from URL arguments."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'create', url],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+        assert 'Created' in stderr
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 1
			
 
				+        assert records[0]['type'] == 'Snapshot'
			
 
				+        assert records[0]['url'] == url
			
 
				+
			
 
				+    def test_create_from_crawl_jsonl(self, cli_env, initialized_archive):
			
 
				+        """Create snapshots from Crawl JSONL input."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        # First create a crawl
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
			
 
				+        crawl = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        # Pipe crawl to snapshot create
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'create'],
			
 
				+            stdin=json.dumps(crawl),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0, f"Command failed: {stderr}"
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        # Should have the Crawl passed through and the Snapshot created
			
 
				+        types = [r.get('type') for r in records]
			
 
				+        assert 'Crawl' in types
			
 
				+        assert 'Snapshot' in types
			
 
				+
			
 
				+        snapshot = next(r for r in records if r['type'] == 'Snapshot')
			
 
				+        assert snapshot['url'] == url
			
 
				+
			
 
				+    def test_create_with_tag(self, cli_env, initialized_archive):
			
 
				+        """Create snapshot with --tag flag."""
			
 
				+        url = create_test_url()
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'create', '--tag=test-tag', url],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert 'test-tag' in records[0].get('tags_str', '')
			
 
				+
			
 
				+    def test_create_pass_through_other_types(self, cli_env, initialized_archive):
			
 
				+        """Pass-through records of other types unchanged."""
			
 
				+        tag_record = {'type': 'Tag', 'id': 'fake-tag-id', 'name': 'test'}
			
 
				+        url = create_test_url()
			
 
				+        stdin = json.dumps(tag_record) + '\n' + json.dumps({'url': url})
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'create'],
			
 
				+            stdin=stdin,
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+
			
 
				+        types = [r.get('type') for r in records]
			
 
				+        assert 'Tag' in types
			
 
				+        assert 'Snapshot' in types
			
 
				+
			
 
				+    def test_create_multiple_urls(self, cli_env, initialized_archive):
			
 
				+        """Create snapshots from multiple URLs."""
			
 
				+        urls = [create_test_url() for _ in range(3)]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'create'] + urls,
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 3
			
 
				+
			
 
				+        created_urls = {r['url'] for r in records}
			
 
				+        for url in urls:
			
 
				+            assert url in created_urls
			
 
				+
			
 
				+
			
 
				+class TestSnapshotList:
			
 
				+    """Tests for `archivebox snapshot list`."""
			
 
				+
			
 
				+    def test_list_empty(self, cli_env, initialized_archive):
			
 
				+        """List with no snapshots returns empty."""
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'list'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Listed 0 snapshots' in stderr
			
 
				+
			
 
				+    def test_list_returns_created(self, cli_env, initialized_archive):
			
 
				+        """List returns previously created snapshots."""
			
 
				+        url = create_test_url()
			
 
				+        run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'list'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) >= 1
			
 
				+        assert any(r.get('url') == url for r in records)
			
 
				+
			
 
				+    def test_list_filter_by_status(self, cli_env, initialized_archive):
			
 
				+        """Filter snapshots by status."""
			
 
				+        url = create_test_url()
			
 
				+        run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'list', '--status=queued'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        for r in records:
			
 
				+            assert r['status'] == 'queued'
			
 
				+
			
 
				+    def test_list_filter_by_url_contains(self, cli_env, initialized_archive):
			
 
				+        """Filter snapshots by URL contains."""
			
 
				+        url = create_test_url(domain='unique-domain-12345.com')
			
 
				+        run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'list', '--url__icontains=unique-domain-12345'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 1
			
 
				+        assert 'unique-domain-12345' in records[0]['url']
			
 
				+
			
 
				+    def test_list_with_limit(self, cli_env, initialized_archive):
			
 
				+        """Limit number of results."""
			
 
				+        for _ in range(3):
			
 
				+            run_archivebox_cmd(['snapshot', 'create', create_test_url()], env=cli_env)
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'list', '--limit=2'],
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        records = parse_jsonl_output(stdout)
			
 
				+        assert len(records) == 2
			
 
				+
			
 
				+
			
 
				+class TestSnapshotUpdate:
			
 
				+    """Tests for `archivebox snapshot update`."""
			
 
				+
			
 
				+    def test_update_status(self, cli_env, initialized_archive):
			
 
				+        """Update snapshot status."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'update', '--status=started'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Updated 1 snapshots' in stderr
			
 
				+
			
 
				+        records = parse_jsonl_output(stdout2)
			
 
				+        assert records[0]['status'] == 'started'
			
 
				+
			
 
				+    def test_update_add_tag(self, cli_env, initialized_archive):
			
 
				+        """Update snapshot by adding tag."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout2, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'update', '--tag=new-tag'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Updated 1 snapshots' in stderr
			
 
				+
			
 
				+
			
 
				+class TestSnapshotDelete:
			
 
				+    """Tests for `archivebox snapshot delete`."""
			
 
				+
			
 
				+    def test_delete_requires_yes(self, cli_env, initialized_archive):
			
 
				+        """Delete requires --yes flag."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'delete'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 1
			
 
				+        assert '--yes' in stderr
			
 
				+
			
 
				+    def test_delete_with_yes(self, cli_env, initialized_archive):
			
 
				+        """Delete with --yes flag works."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'delete', '--yes'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Deleted 1 snapshots' in stderr
			
 
				+
			
 
				+    def test_delete_dry_run(self, cli_env, initialized_archive):
			
 
				+        """Dry run shows what would be deleted."""
			
 
				+        url = create_test_url()
			
 
				+        stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
			
 
				+        snapshot = parse_jsonl_output(stdout1)[0]
			
 
				+
			
 
				+        stdout, stderr, code = run_archivebox_cmd(
			
 
				+            ['snapshot', 'delete', '--dry-run'],
			
 
				+            stdin=json.dumps(snapshot),
			
 
				+            env=cli_env,
			
 
				+        )
			
 
				+
			
 
				+        assert code == 0
			
 
				+        assert 'Would delete' in stderr
			
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
@@ -32,7 +32,7 @@ _supervisord_proc = None
 
				 
			
 
				 ORCHESTRATOR_WORKER = {
			
 
				     "name": "worker_orchestrator",
			
 
				-    "command": "archivebox manage orchestrator",  # runs forever by default
			
 
				+    "command": "archivebox run",  # runs forever by default
			
 
				     "autostart": "true",
			
 
				     "autorestart": "true",
			
 
				     "stdout_logfile": "logs/worker_orchestrator.log",