2 months ago · c1335fed37
--- a/archivebox/api/admin.py
+++ b/archivebox/api/admin.py
@@ -3,16 +3,16 @@ __package__ = 'archivebox.api'
 
				 from signal_webhooks.admin import WebhookAdmin
			
 
				 from signal_webhooks.utils import get_webhook_model
			
 
				 
			
 
				-from archivebox.base_models.admin import ABIDModelAdmin
			
 
				+from archivebox.base_models.admin import BaseModelAdmin
			
 
				 
			
 
				 from api.models import APIToken
			
 
				 
			
 
				 
			
 
				-class APITokenAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('created_at', 'abid', 'created_by', 'token_redacted', 'expires')
			
 
				-    sort_fields = ('abid', 'created_at', 'created_by', 'expires')
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info')
			
 
				-    search_fields = ('id', 'abid', 'created_by__username', 'token')
			
 
				+class APITokenAdmin(BaseModelAdmin):
			
 
				+    list_display = ('created_at', 'id', 'created_by', 'token_redacted', 'expires')
			
 
				+    sort_fields = ('id', 'created_at', 'created_by', 'expires')
			
 
				+    readonly_fields = ('created_at', 'modified_at')
			
 
				+    search_fields = ('id', 'created_by__username', 'token')
			
 
				     fields = ('created_by', 'token', 'expires', *readonly_fields)
			
 
				 
			
 
				     list_filter = ('created_by',)
			
@@ -20,10 +20,10 @@ class APITokenAdmin(ABIDModelAdmin):
 
				     list_per_page = 100
			
 
				 
			
 
				 
			
 
				-class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
			
 
				-    list_display = ('created_at', 'created_by', 'abid', *WebhookAdmin.list_display)
			
 
				-    sort_fields = ('created_at', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error')
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info', *WebhookAdmin.readonly_fields)
			
 
				+class CustomWebhookAdmin(WebhookAdmin, BaseModelAdmin):
			
 
				+    list_display = ('created_at', 'created_by', 'id', *WebhookAdmin.list_display)
			
 
				+    sort_fields = ('created_at', 'created_by', 'id', 'referenced_model', 'endpoint', 'last_success', 'last_error')
			
 
				+    readonly_fields = ('created_at', 'modified_at', *WebhookAdmin.readonly_fields)
			
 
				 
			
 
				 
			
 
				 def register_admin(admin_site):
			
--- a/archivebox/api/models.py
+++ b/archivebox/api/models.py
@@ -1,44 +1,25 @@
 
				 __package__ = 'archivebox.api'
			
 
				 
			
 
				 import secrets
			
 
				+from uuid import uuid7
			
 
				 from datetime import timedelta
			
 
				 
			
 
				 from django.conf import settings
			
 
				 from django.db import models
			
 
				 from django.utils import timezone
			
 
				-
			
 
				-from signal_webhooks.models import WebhookBase
			
 
				-
			
 
				 from django_stubs_ext.db.models import TypedModelMeta
			
 
				-
			
 
				-from archivebox.base_models.models import ABIDModel, ABIDField, AutoDateTimeField
			
 
				-
			
 
				+from signal_webhooks.models import WebhookBase
			
 
				 
			
 
				 
			
 
				 def generate_secret_token() -> str:
			
 
				-    # returns cryptographically secure string with len() == 32
			
 
				     return secrets.token_hex(16)
			
 
				 
			
 
				 
			
 
				-class APIToken(ABIDModel):
			
 
				-    """
			
 
				-    A secret key generated by a User that's used to authenticate REST API requests to ArchiveBox.
			
 
				-    """
			
 
				-    # ABID: apt_<created_ts>_<token_hash>_<user_id_hash>_<uuid_rand>
			
 
				-    abid_prefix = 'apt_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.created_by_id'
			
 
				-    abid_subtype_src = '"01"'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-
			
 
				+class APIToken(models.Model):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				-
			
 
				     token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
			
 
				     expires = models.DateTimeField(null=True, blank=True)
			
 
				 
			
@@ -49,79 +30,22 @@ class APIToken(ABIDModel):
 
				     def __str__(self) -> str:
			
 
				         return self.token
			
 
				 
			
 
				-    def __repr__(self) -> str:
			
 
				-        return f'<APIToken user={self.created_by.username} token={self.token_redacted}>'
			
 
				-
			
 
				-    def __json__(self) -> dict:
			
 
				-        return {
			
 
				-            "TYPE":             "APIToken",    
			
 
				-            "id":               str(self.pk),
			
 
				-            "abid":             str(self.ABID),
			
 
				-            "created_by_id":    str(self.created_by_id),
			
 
				-            "token":            self.token,
			
 
				-            "created_at":       self.created_at.isoformat(),
			
 
				-            "expires":          self.expires_as_iso8601,
			
 
				-        }
			
 
				-
			
 
				-    @property
			
 
				-    def expires_as_iso8601(self):
			
 
				-        """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
			
 
				-        expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100))
			
 
				-
			
 
				-        return expiry_date.isoformat()
			
 
				-    
			
 
				     @property
			
 
				     def token_redacted(self):
			
 
				         return f'************{self.token[-4:]}'
			
 
				 
			
 
				     def is_valid(self, for_date=None):
			
 
				-        for_date = for_date or timezone.now()
			
 
				+        return not self.expires or self.expires >= (for_date or timezone.now())
			
 
				 
			
 
				-        if self.expires and self.expires < for_date:
			
 
				-            return False
			
 
				-
			
 
				-        return True
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-# monkey patch django-signals-webhooks to change how it shows up in Admin UI
			
 
				-
			
 
				-class OutboundWebhook(ABIDModel, WebhookBase):
			
 
				-    """
			
 
				-    Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
			
 
				-        settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
			
 
				-    """
			
 
				-    abid_prefix = 'whk_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.endpoint'
			
 
				-    abid_subtype_src = 'self.ref'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				 
			
 
				+class OutboundWebhook(models.Model, WebhookBase):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				 
			
 
				-    # More fields here: WebhookBase...
			
 
				-
			
 
				-    WebhookBase._meta.get_field('name').help_text = (
			
 
				-        'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
			
 
				-    WebhookBase._meta.get_field('signal').help_text = (
			
 
				-        'The type of event the webhook should fire for (e.g. Create, Update, Delete).')
			
 
				-    WebhookBase._meta.get_field('ref').help_text = (
			
 
				-        'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).')
			
 
				-    WebhookBase._meta.get_field('endpoint').help_text = (
			
 
				-        'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).')
			
 
				-
			
 
				     class Meta(WebhookBase.Meta):
			
 
				         verbose_name = 'API Outbound Webhook'
			
 
				 
			
 
				-
			
 
				     def __str__(self) -> str:
			
 
				-        return f'[{self.abid}] {self.ref} -> {self.endpoint}'
			
 
				+        return f'[{self.id}] {self.ref} -> {self.endpoint}'
			
--- a/archivebox/api/v1_api.py
+++ b/archivebox/api/v1_api.py
@@ -70,7 +70,7 @@ class NinjaAPIWithIOCapture(NinjaAPI):
 
				 
			
 
				         response['X-ArchiveBox-Auth-Method'] = getattr(request, '_api_auth_method', None) or 'None'
			
 
				         response['X-ArchiveBox-Auth-Expires'] = token_expiry
			
 
				-        response['X-ArchiveBox-Auth-Token-Id'] = api_token.abid if api_token else 'None'
			
 
				+        response['X-ArchiveBox-Auth-Token-Id'] = str(api_token.id) if api_token else 'None'
			
 
				         response['X-ArchiveBox-Auth-User-Id'] = request.user.pk if request.user.pk else 'None'
			
 
				         response['X-ArchiveBox-Auth-User-Username'] = request.user.username if request.user.pk else 'None'
			
 
				 
			
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@@ -15,24 +15,18 @@ from ninja.pagination import paginate, PaginationBase
 
				 from ninja.errors import HttpError
			
 
				 
			
 
				 from core.models import Snapshot, ArchiveResult, Tag
			
 
				-from api.models import APIToken, OutboundWebhook
			
 
				 from api.v1_crawls import CrawlSchema, SeedSchema
			
 
				 
			
 
				-# from .auth import API_AUTH_METHODS
			
 
				-
			
 
				-
			
 
				 
			
 
				 router = Router(tags=['Core Models'])
			
 
				 
			
 
				 
			
 
				-
			
 
				 class CustomPagination(PaginationBase):
			
 
				     class Input(Schema):
			
 
				         limit: int = 200
			
 
				         offset: int = 0
			
 
				         page: int = 0
			
 
				 
			
 
				-
			
 
				     class Output(Schema):
			
 
				         total_items: int
			
 
				         total_pages: int
			
@@ -64,87 +58,67 @@ class CustomPagination(PaginationBase):
 
				 
			
 
				 class MinimalArchiveResultSchema(Schema):
			
 
				     TYPE: str = 'core.models.ArchiveResult'
			
 
				-
			
 
				     id: UUID
			
 
				-    abid: str
			
 
				-
			
 
				     created_at: datetime | None
			
 
				     modified_at: datetime | None
			
 
				     created_by_id: str
			
 
				     created_by_username: str
			
 
				-
			
 
				     status: str
			
 
				     retry_at: datetime | None
			
 
				-    
			
 
				     extractor: str
			
 
				     cmd_version: str | None
			
 
				     cmd: list[str] | None
			
 
				     pwd: str | None
			
 
				     output: str | None
			
 
				-
			
 
				     start_ts: datetime | None
			
 
				     end_ts: datetime | None
			
 
				 
			
 
				     @staticmethod
			
 
				     def resolve_created_by_id(obj):
			
 
				         return str(obj.created_by_id)
			
 
				-    
			
 
				+
			
 
				     @staticmethod
			
 
				     def resolve_created_by_username(obj) -> str:
			
 
				         User = get_user_model()
			
 
				         return User.objects.filter(pk=obj.created_by_id).values_list('username', flat=True)[0]
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def resolve_abid(obj):
			
 
				-        return str(obj.ABID)
			
 
				+
			
 
				+class ArchiveResultSchema(MinimalArchiveResultSchema):
			
 
				+    TYPE: str = 'core.models.ArchiveResult'
			
 
				+    snapshot_id: UUID
			
 
				+    snapshot_timestamp: str
			
 
				+    snapshot_url: str
			
 
				+    snapshot_tags: List[str]
			
 
				 
			
 
				     @staticmethod
			
 
				     def resolve_snapshot_timestamp(obj):
			
 
				         return obj.snapshot.timestamp
			
 
				-    
			
 
				+
			
 
				     @staticmethod
			
 
				     def resolve_snapshot_url(obj):
			
 
				         return obj.snapshot.url
			
 
				 
			
 
				     @staticmethod
			
 
				     def resolve_snapshot_id(obj):
			
 
				-        return str(obj.snapshot_id)
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def resolve_snapshot_abid(obj):
			
 
				-        return str(obj.snapshot.ABID)
			
 
				+        return obj.snapshot_id
			
 
				 
			
 
				     @staticmethod
			
 
				     def resolve_snapshot_tags(obj):
			
 
				         return sorted(tag.name for tag in obj.snapshot.tags.all())
			
 
				 
			
 
				-class ArchiveResultSchema(MinimalArchiveResultSchema):
			
 
				-    TYPE: str = 'core.models.ArchiveResult'
			
 
				-
			
 
				-    # ... Extends MinimalArchiveResultSchema fields ...
			
 
				-
			
 
				-    snapshot_id: UUID
			
 
				-    snapshot_abid: str
			
 
				-    snapshot_timestamp: str
			
 
				-    snapshot_url: str
			
 
				-    snapshot_tags: List[str]
			
 
				-
			
 
				 
			
 
				 class ArchiveResultFilterSchema(FilterSchema):
			
 
				-    id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
			
 
				-
			
 
				-    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
			
 
				-    snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
			
 
				+    id: Optional[str] = Field(None, q=['id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])
			
 
				+    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'snapshot__id__startswith', 'snapshot__timestamp__startswith'])
			
 
				+    snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__timestamp__startswith'])
			
 
				     snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
			
 
				     snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
			
 
				-    
			
 
				     status: Optional[str] = Field(None, q='status')
			
 
				     output: Optional[str] = Field(None, q='output__icontains')
			
 
				     extractor: Optional[str] = Field(None, q='extractor__icontains')
			
 
				     cmd: Optional[str] = Field(None, q='cmd__0__icontains')
			
 
				     pwd: Optional[str] = Field(None, q='pwd__icontains')
			
 
				     cmd_version: Optional[str] = Field(None, q='cmd_version')
			
 
				-
			
 
				     created_at: Optional[datetime] = Field(None, q='created_at')
			
 
				     created_at__gte: Optional[datetime] = Field(None, q='created_at__gte')
			
 
				     created_at__lt: Optional[datetime] = Field(None, q='created_at__lt')
			
@@ -154,99 +128,49 @@ class ArchiveResultFilterSchema(FilterSchema):
 
				 @paginate(CustomPagination)
			
 
				 def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
			
 
				     """List all ArchiveResult entries matching these filters."""
			
 
				-    qs = ArchiveResult.objects.all()
			
 
				-    results = filters.filter(qs).distinct()
			
 
				-    return results
			
 
				+    return filters.filter(ArchiveResult.objects.all()).distinct()
			
 
				 
			
 
				 
			
 
				 @router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
			
 
				 def get_archiveresult(request, archiveresult_id: str):
			
 
				-    """Get a specific ArchiveResult by id or abid."""
			
 
				-    return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id))
			
 
				-
			
 
				-
			
 
				-# @router.post("/archiveresult", response=ArchiveResultSchema)
			
 
				-# def create_archiveresult(request, payload: ArchiveResultSchema):
			
 
				-#     archiveresult = ArchiveResult.objects.create(**payload.dict())
			
 
				-#     return archiveresult
			
 
				-#
			
 
				-# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
			
 
				-# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
			
 
				-#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
			
 
				-#   
			
 
				-#     for attr, value in payload.dict().items():
			
 
				-#         setattr(archiveresult, attr, value)
			
 
				-#     archiveresult.save()
			
 
				-#
			
 
				-#     return archiveresult
			
 
				-#
			
 
				-# @router.delete("/archiveresult/{archiveresult_id}")
			
 
				-# def delete_archiveresult(request, archiveresult_id: str):
			
 
				-#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
			
 
				-#     archiveresult.delete()
			
 
				-#     return {"success": True}
			
 
				-
			
 
				-
			
 
				-
			
 
				+    """Get a specific ArchiveResult by id."""
			
 
				+    return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id))
			
 
				 
			
 
				 
			
 
				 ### Snapshot #########################################################################
			
 
				 
			
 
				-
			
 
				 class SnapshotSchema(Schema):
			
 
				     TYPE: str = 'core.models.Snapshot'
			
 
				-
			
 
				     id: UUID
			
 
				-    abid: str
			
 
				-
			
 
				     created_by_id: str
			
 
				     created_by_username: str
			
 
				     created_at: datetime
			
 
				     modified_at: datetime
			
 
				-    
			
 
				     status: str
			
 
				     retry_at: datetime | None
			
 
				-
			
 
				     bookmarked_at: datetime
			
 
				     downloaded_at: Optional[datetime]
			
 
				-
			
 
				     url: str
			
 
				     tags: List[str]
			
 
				     title: Optional[str]
			
 
				     timestamp: str
			
 
				     archive_path: str
			
 
				-
			
 
				-    # url_for_admin: str
			
 
				-    # url_for_view: str
			
 
				-
			
 
				     num_archiveresults: int
			
 
				     archiveresults: List[MinimalArchiveResultSchema]
			
 
				 
			
 
				     @staticmethod
			
 
				     def resolve_created_by_id(obj):
			
 
				         return str(obj.created_by_id)
			
 
				-    
			
 
				+
			
 
				     @staticmethod
			
 
				     def resolve_created_by_username(obj):
			
 
				         User = get_user_model()
			
 
				         return User.objects.get(id=obj.created_by_id).username
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def resolve_abid(obj):
			
 
				-        return str(obj.ABID)
			
 
				-
			
 
				     @staticmethod
			
 
				     def resolve_tags(obj):
			
 
				         return sorted(tag.name for tag in obj.tags.all())
			
 
				 
			
 
				-    # @staticmethod
			
 
				-    # def resolve_url_for_admin(obj):
			
 
				-    #     return f"/admin/core/snapshot/{obj.id}/change/"
			
 
				-    
			
 
				-    # @staticmethod
			
 
				-    # def resolve_url_for_view(obj):
			
 
				-    #     return f"/{obj.archive_path}"
			
 
				-
			
 
				     @staticmethod
			
 
				     def resolve_num_archiveresults(obj, context):
			
 
				         return obj.archiveresult_set.all().distinct().count()
			
@@ -259,98 +183,51 @@ class SnapshotSchema(Schema):
 
				 
			
 
				 
			
 
				 class SnapshotFilterSchema(FilterSchema):
			
 
				-    id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'timestamp__startswith'])
			
 
				-    abid: Optional[str] = Field(None, q='abid__icontains')
			
 
				-
			
 
				+    id: Optional[str] = Field(None, q=['id__icontains', 'timestamp__startswith'])
			
 
				     created_by_id: str = Field(None, q='created_by_id')
			
 
				     created_by_username: str = Field(None, q='created_by__username__icontains')
			
 
				-
			
 
				     created_at__gte: datetime = Field(None, q='created_at__gte')
			
 
				     created_at__lt: datetime = Field(None, q='created_at__lt')
			
 
				     created_at: datetime = Field(None, q='created_at')
			
 
				     modified_at: datetime = Field(None, q='modified_at')
			
 
				     modified_at__gte: datetime = Field(None, q='modified_at__gte')
			
 
				     modified_at__lt: datetime = Field(None, q='modified_at__lt')
			
 
				-
			
 
				-    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith'])
			
 
				+    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'timestamp__startswith'])
			
 
				     url: Optional[str] = Field(None, q='url')
			
 
				     tag: Optional[str] = Field(None, q='tags__name')
			
 
				     title: Optional[str] = Field(None, q='title__icontains')
			
 
				     timestamp: Optional[str] = Field(None, q='timestamp__startswith')
			
 
				-    
			
 
				     bookmarked_at__gte: Optional[datetime] = Field(None, q='bookmarked_at__gte')
			
 
				     bookmarked_at__lt: Optional[datetime] = Field(None, q='bookmarked_at__lt')
			
 
				 
			
 
				 
			
 
				-
			
 
				 @router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
			
 
				 @paginate(CustomPagination)
			
 
				-def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
			
 
				+def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool = False):
			
 
				     """List all Snapshot entries matching these filters."""
			
 
				     request.with_archiveresults = with_archiveresults
			
 
				+    return filters.filter(Snapshot.objects.all()).distinct()
			
 
				 
			
 
				-    qs = Snapshot.objects.all()
			
 
				-    results = filters.filter(qs).distinct()
			
 
				-    return results
			
 
				 
			
 
				 @router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
			
 
				-def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
			
 
				-    """Get a specific Snapshot by abid or id."""
			
 
				+def get_snapshot(request, snapshot_id: str, with_archiveresults: bool = True):
			
 
				+    """Get a specific Snapshot by id."""
			
 
				     request.with_archiveresults = with_archiveresults
			
 
				-    snapshot = None
			
 
				     try:
			
 
				-        snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
			
 
				+        return Snapshot.objects.get(Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
			
 
				     except Snapshot.DoesNotExist:
			
 
				-        pass
			
 
				-
			
 
				-    try:
			
 
				-        snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
			
 
				-    except Snapshot.DoesNotExist:
			
 
				-        pass
			
 
				-
			
 
				-    if not snapshot:
			
 
				-        raise Snapshot.DoesNotExist
			
 
				-
			
 
				-    return snapshot
			
 
				-
			
 
				-
			
 
				-# @router.post("/snapshot", response=SnapshotSchema)
			
 
				-# def create_snapshot(request, payload: SnapshotSchema):
			
 
				-#     snapshot = Snapshot.objects.create(**payload.dict())
			
 
				-#     return snapshot
			
 
				-#
			
 
				-# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
			
 
				-# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
			
 
				-#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
			
 
				-#
			
 
				-#     for attr, value in payload.dict().items():
			
 
				-#         setattr(snapshot, attr, value)
			
 
				-#     snapshot.save()
			
 
				-#
			
 
				-#     return snapshot
			
 
				-#
			
 
				-# @router.delete("/snapshot/{snapshot_id}")
			
 
				-# def delete_snapshot(request, snapshot_id: str):
			
 
				-#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
			
 
				-#     snapshot.delete()
			
 
				-#     return {"success": True}
			
 
				-
			
 
				+        return Snapshot.objects.get(Q(id__icontains=snapshot_id))
			
 
				 
			
 
				 
			
 
				 ### Tag #########################################################################
			
 
				 
			
 
				-
			
 
				 class TagSchema(Schema):
			
 
				     TYPE: str = 'core.models.Tag'
			
 
				-
			
 
				     id: UUID
			
 
				-    abid: str
			
 
				-
			
 
				     modified_at: datetime
			
 
				     created_at: datetime
			
 
				     created_by_id: str
			
 
				     created_by_username: str
			
 
				-
			
 
				     name: str
			
 
				     slug: str
			
 
				     num_snapshots: int
			
@@ -359,12 +236,12 @@ class TagSchema(Schema):
 
				     @staticmethod
			
 
				     def resolve_created_by_id(obj):
			
 
				         return str(obj.created_by_id)
			
 
				-    
			
 
				+
			
 
				     @staticmethod
			
 
				     def resolve_created_by_username(obj):
			
 
				         User = get_user_model()
			
 
				         return User.objects.get(id=obj.created_by_id).username
			
 
				-    
			
 
				+
			
 
				     @staticmethod
			
 
				     def resolve_num_snapshots(obj, context):
			
 
				         return obj.snapshot_set.all().distinct().count()
			
@@ -375,6 +252,7 @@ class TagSchema(Schema):
 
				             return obj.snapshot_set.all().distinct()
			
 
				         return Snapshot.objects.none()
			
 
				 
			
 
				+
			
 
				 @router.get("/tags", response=List[TagSchema], url_name="get_tags")
			
 
				 @paginate(CustomPagination)
			
 
				 def get_tags(request):
			
@@ -382,65 +260,45 @@ def get_tags(request):
 
				     request.with_archiveresults = False
			
 
				     return Tag.objects.all().distinct()
			
 
				 
			
 
				+
			
 
				 @router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
			
 
				-def get_tag(request, tag_id: str, with_snapshots: bool=True):
			
 
				+def get_tag(request, tag_id: str, with_snapshots: bool = True):
			
 
				     request.with_snapshots = with_snapshots
			
 
				     request.with_archiveresults = False
			
 
				-    tag = None
			
 
				     try:
			
 
				-        tag = Tag.objects.get(abid__icontains=tag_id)
			
 
				+        return Tag.objects.get(id__icontains=tag_id)
			
 
				     except (Tag.DoesNotExist, ValidationError):
			
 
				-        pass
			
 
				+        return Tag.objects.get(slug__icontains=tag_id)
			
 
				 
			
 
				-    try:
			
 
				-        tag = tag or Tag.objects.get(id__icontains=tag_id)
			
 
				-    except (Tag.DoesNotExist, ValidationError):
			
 
				-        pass
			
 
				-    return tag
			
 
				 
			
 
				[email protected]("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)")
			
 
				-def get_any(request, abid: str):
			
 
				-    """Get any object by its ABID or ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
			
 
				-    
			
 
				[email protected]("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
			
 
				+def get_any(request, id: str):
			
 
				+    """Get any object by its ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
			
 
				     request.with_snapshots = False
			
 
				     request.with_archiveresults = False
			
 
				 
			
 
				-    if abid.startswith(APIToken.abid_prefix):
			
 
				-        raise HttpError(403, 'APIToken objects are not accessible via REST API')
			
 
				-    
			
 
				-    if abid.startswith(OutboundWebhook.abid_prefix):
			
 
				-        raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
			
 
				-    
			
 
				-    response = None
			
 
				-    try:
			
 
				-        response = response or get_snapshot(request, abid)
			
 
				-    except Exception:
			
 
				-        pass
			
 
				-
			
 
				-    try:
			
 
				-        response = response or get_archiveresult(request, abid)
			
 
				-    except Exception:
			
 
				-        pass
			
 
				+    for getter in [get_snapshot, get_archiveresult, get_tag]:
			
 
				+        try:
			
 
				+            response = getter(request, id)
			
 
				+            if response:
			
 
				+                return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
			
 
				+        except Exception:
			
 
				+            pass
			
 
				 
			
 
				-    try:
			
 
				-        response = response or get_tag(request, abid)
			
 
				-    except Exception:
			
 
				-        pass
			
 
				-    
			
 
				     try:
			
 
				         from api.v1_crawls import get_seed
			
 
				-        response = response or get_seed(request, abid)
			
 
				+        response = get_seed(request, id)
			
 
				+        if response:
			
 
				+            return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
			
 
				     except Exception:
			
 
				         pass
			
 
				-    
			
 
				+
			
 
				     try:
			
 
				         from api.v1_crawls import get_crawl
			
 
				-        response = response or get_crawl(request, abid)
			
 
				+        response = get_crawl(request, id)
			
 
				+        if response:
			
 
				+            return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
			
 
				     except Exception:
			
 
				         pass
			
 
				-    
			
 
				-    if response:
			
 
				-        app_label, model_name = response._meta.app_label, response._meta.model_name
			
 
				-        return redirect(f"/api/v1/{app_label}/{model_name}/{response.abid}?{request.META['QUERY_STRING']}")
			
 
				 
			
 
				-    raise HttpError(404, 'Object with given ABID not found')
			
 
				+    raise HttpError(404, 'Object with given ID not found')
			
--- a/archivebox/api/v1_crawls.py
+++ b/archivebox/api/v1_crawls.py
@@ -21,7 +21,6 @@ class SeedSchema(Schema):
 
				     TYPE: str = 'crawls.models.Seed'
			
 
				 
			
 
				     id: UUID
			
 
				-    abid: str
			
 
				     
			
 
				     modified_at: datetime
			
 
				     created_at: datetime
			
@@ -52,7 +51,7 @@ def get_seed(request, seed_id: str):
 
				     request.with_archiveresults = False
			
 
				     
			
 
				     try:
			
 
				-        seed = Seed.objects.get(Q(abid__icontains=seed_id) | Q(id__icontains=seed_id))
			
 
				+        seed = Seed.objects.get(Q(id__icontains=seed_id))
			
 
				     except Exception:
			
 
				         pass
			
 
				     return seed
			
@@ -62,7 +61,6 @@ class CrawlSchema(Schema):
 
				     TYPE: str = 'crawls.models.Crawl'
			
 
				 
			
 
				     id: UUID
			
 
				-    abid: str
			
 
				 
			
 
				     modified_at: datetime
			
 
				     created_at: datetime
			
@@ -99,21 +97,10 @@ def get_crawls(request):
 
				 
			
 
				 @router.get("/crawl/{crawl_id}", response=CrawlSchema | str, url_name="get_crawl")
			
 
				 def get_crawl(request, crawl_id: str, as_rss: bool=False, with_snapshots: bool=False, with_archiveresults: bool=False):
			
 
				-    """Get a specific Crawl by id or abid."""
			
 
				-    
			
 
				-    crawl = None
			
 
				+    """Get a specific Crawl by id."""
			
 
				     request.with_snapshots = with_snapshots
			
 
				     request.with_archiveresults = with_archiveresults
			
 
				-    
			
 
				-    try:
			
 
				-        crawl = Crawl.objects.get(abid__icontains=crawl_id)
			
 
				-    except Exception:
			
 
				-        pass
			
 
				-
			
 
				-    try:
			
 
				-        crawl = crawl or Crawl.objects.get(id__icontains=crawl_id)
			
 
				-    except Exception:
			
 
				-        pass
			
 
				+    crawl = Crawl.objects.get(id__icontains=crawl_id)
			
 
				     
			
 
				     if crawl and as_rss:
			
 
				         # return snapshots as XML rss feed
			
--- a/archivebox/api/v1_workers.py
+++ b/archivebox/api/v1_workers.py
@@ -13,9 +13,8 @@ router = Router(tags=['Workers and Tasks'])
 
				 
			
 
				 class TaskSchema(Schema):
			
 
				     TYPE: str
			
 
				-    
			
 
				+
			
 
				     id: UUID
			
 
				-    abid: str
			
 
				     description: str
			
 
				 
			
 
				     status: str
			
--- a/archivebox/base_models/abid.py
+++ b/archivebox/base_models/abid.py
@@ -1,223 +0,0 @@
 
				-__package__ = 'archivebox.base_models'
			
 
				-
			
 
				-from typing import NamedTuple, Any, Union, Dict
			
 
				-
			
 
				-import ulid
			
 
				-import uuid6
			
 
				-import hashlib
			
 
				-from urllib.parse import urlparse
			
 
				-
			
 
				-from uuid import UUID
			
 
				-from typeid import TypeID            # type: ignore[import-untyped]
			
 
				-from datetime import datetime
			
 
				-
			
 
				-from archivebox.misc.util import enforce_types
			
 
				-
			
 
				-
			
 
				-ABID_PREFIX_LEN = 4
			
 
				-ABID_SUFFIX_LEN = 26
			
 
				-ABID_LEN = 30
			
 
				-ABID_TS_LEN = 10
			
 
				-ABID_URI_LEN = 8
			
 
				-ABID_SUBTYPE_LEN = 2
			
 
				-ABID_RAND_LEN = 6
			
 
				-
			
 
				-DEFAULT_ABID_PREFIX = 'obj_'
			
 
				-
			
 
				-# allows people to keep their uris secret on a per-instance basis by changing the salt.
			
 
				-# the default means everyone can share the same namespace for URI hashes,
			
 
				-# meaning anyone who has a URI and wants to check if you have it can guess the ABID
			
 
				-DEFAULT_ABID_URI_SALT = '687c2fff14e3a7780faa5a40c237b19b5b51b089'
			
 
				-
			
 
				-
			
 
				-class ABID(NamedTuple):
			
 
				-    """
			
 
				-    e.g. ABID('obj_01HX9FPYTRE4A5CCD901ZYEBQE')
			
 
				-    """
			
 
				-    prefix: str            # e.g. obj_
			
 
				-    ts: str                # e.g. 01HX9FPYTR
			
 
				-    uri: str               # e.g. E4A5CCD9
			
 
				-    subtype: str           # e.g. 01
			
 
				-    rand: str              # e.g. ZYEBQE
			
 
				-    
			
 
				-    # salt: str = DEFAULT_ABID_URI_SALT
			
 
				-
			
 
				-    def __getattr__(self, attr: str) -> Any:
			
 
				-        return getattr(self.ulid, attr)
			
 
				-
			
 
				-    def __eq__(self, other: Any) -> bool:
			
 
				-        try:
			
 
				-            return self.ulid == other.ulid
			
 
				-        except AttributeError:
			
 
				-            return NotImplemented
			
 
				-
			
 
				-    def __str__(self) -> str:
			
 
				-        return self.prefix + self.suffix
			
 
				-
			
 
				-    def __len__(self) -> int:
			
 
				-        return len(self.prefix + self.suffix)
			
 
				-
			
 
				-    @classmethod
			
 
				-    def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
			
 
				-        assert buffer, f'Attempted to create ABID from null value {buffer}'
			
 
				-
			
 
				-        buffer = str(buffer)
			
 
				-        if '_' in buffer:
			
 
				-            prefix, suffix = buffer.split('_')
			
 
				-        else:
			
 
				-            prefix, suffix = prefix.strip('_'), buffer
			
 
				-
			
 
				-        assert len(prefix) == ABID_PREFIX_LEN - 1   # length without trailing _
			
 
				-        assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
			
 
				-
			
 
				-        return cls(
			
 
				-            prefix=abid_part_from_prefix(prefix),
			
 
				-            ts=suffix[0:10].upper(),
			
 
				-            uri=suffix[10:18].upper(),
			
 
				-            subtype=suffix[18:20].upper(),
			
 
				-            rand=suffix[20:26].upper(),
			
 
				-        )
			
 
				-    
			
 
				-    @property
			
 
				-    def uri_salt(self) -> str:
			
 
				-        return DEFAULT_ABID_URI_SALT
			
 
				-
			
 
				-    @property
			
 
				-    def suffix(self):
			
 
				-        return ''.join((self.ts, self.uri, self.subtype, self.rand))
			
 
				-    
			
 
				-    @property
			
 
				-    def ulid(self) -> ulid.ULID:
			
 
				-        return ulid.parse(self.suffix)
			
 
				-
			
 
				-    @property
			
 
				-    def uuid(self) -> UUID:
			
 
				-        return self.ulid.uuid
			
 
				-
			
 
				-    @property
			
 
				-    def uuid6(self) -> uuid6.UUID:
			
 
				-        return uuid6.UUID(hex=self.uuid.hex)
			
 
				-
			
 
				-    @property
			
 
				-    def typeid(self) -> TypeID:
			
 
				-        return TypeID.from_uuid(prefix=self.prefix.strip('_'), suffix=self.uuid6)
			
 
				-
			
 
				-    @property
			
 
				-    def datetime(self) -> datetime:
			
 
				-        return self.ulid.timestamp().datetime
			
 
				-
			
 
				-
			
 
				-
			
 
				-####################################################
			
 
				-
			
 
				-
			
 
				-@enforce_types
			
 
				-def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
			
 
				-    """
			
 
				-    https://example.com -> 'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25' (example.com)
			
 
				-    """
			
 
				-    if isinstance(uri, bytes):
			
 
				-        uri_str: str = uri.decode()
			
 
				-    else:
			
 
				-        uri_str = str(uri)
			
 
				-
			
 
				-    # only hash the domain part of URLs
			
 
				-    if '://' in uri_str:
			
 
				-        try:
			
 
				-            domain = urlparse(uri_str).netloc
			
 
				-            if domain:
			
 
				-                uri_str = domain
			
 
				-        except AttributeError:
			
 
				-            pass
			
 
				-    
			
 
				-    # the uri hash is the sha256 of the domain + salt
			
 
				-    uri_bytes = uri_str.encode('utf-8') + salt.encode('utf-8')
			
 
				-
			
 
				-    return hashlib.sha256(uri_bytes).hexdigest().upper()
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_part_from_prefix(prefix: str) -> str:
			
 
				-    """
			
 
				-    'snp_'
			
 
				-    """
			
 
				-    # if prefix is None:
			
 
				-    #     return 'obj_'
			
 
				-
			
 
				-    prefix = prefix.strip('_').lower()
			
 
				-    assert len(prefix) == 3
			
 
				-    return prefix + '_'
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_part_from_uri(uri: Any, salt: str=DEFAULT_ABID_URI_SALT) -> str:
			
 
				-    """
			
 
				-    'E4A5CCD9'     # takes first 8 characters of sha256(url)
			
 
				-    """
			
 
				-    uri = str(uri).strip()
			
 
				-    assert uri not in ('None', '')
			
 
				-    return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_part_from_ts(ts: datetime) -> str:
			
 
				-    """
			
 
				-    '01HX9FPYTR'   # produces 10 character Timestamp section of ulid based on added date
			
 
				-    """
			
 
				-    return str(ulid.from_timestamp(ts))[:ABID_TS_LEN]
			
 
				-
			
 
				-@enforce_types
			
 
				-def ts_from_abid(abid: str) -> datetime:
			
 
				-    return ulid.parse(abid.split('_', 1)[-1]).timestamp().datetime
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_part_from_subtype(subtype: str | int) -> str:
			
 
				-    """
			
 
				-    Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
			
 
				-    Also allows us to change the ulid spec later by putting special sigil values here.
			
 
				-    """
			
 
				-    subtype = str(subtype)
			
 
				-    if len(subtype) == ABID_SUBTYPE_LEN:
			
 
				-        return subtype
			
 
				-
			
 
				-    return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
			
 
				-    """
			
 
				-    'ZYEBQE'   # takes last 6 characters of randomness from existing legacy uuid db field
			
 
				-    """
			
 
				-    if rand is None:
			
 
				-        # if it's None we generate a new random 6 character hex string
			
 
				-        return str(ulid.new())[-ABID_RAND_LEN:]
			
 
				-    elif isinstance(rand, UUID):
			
 
				-        # if it's a uuid we take the last 6 characters of the ULID represation of it
			
 
				-        return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
			
 
				-    elif isinstance(rand, int):
			
 
				-        # if it's a BigAutoInteger field we convert it from an int to a 0-padded string
			
 
				-        rand_str = str(rand)[-ABID_RAND_LEN:]
			
 
				-        padding_needed = ABID_RAND_LEN - len(rand_str)
			
 
				-        rand_str = ('0'*padding_needed) + rand_str
			
 
				-        return rand_str
			
 
				-
			
 
				-    # otherwise treat it as a string, take the last 6 characters of it verbatim
			
 
				-    return str(rand)[-ABID_RAND_LEN:].upper()
			
 
				-
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_hashes_from_values(prefix: str, ts: datetime, uri: Any, subtype: str | int, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]:
			
 
				-    return {
			
 
				-        'prefix': abid_part_from_prefix(prefix),
			
 
				-        'ts': abid_part_from_ts(ts),
			
 
				-        'uri': abid_part_from_uri(uri, salt=salt),
			
 
				-        'subtype': abid_part_from_subtype(subtype),
			
 
				-        'rand': abid_part_from_rand(rand),
			
 
				-        # 'salt': don't add this, salt combined with uri above to form a single hash
			
 
				-    }
			
 
				-
			
 
				-@enforce_types
			
 
				-def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID:
			
 
				-    """
			
 
				-    Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
			
 
				-    """
			
 
				-
			
 
				-    abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt))
			
 
				-    assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
			
 
				-    return abid
			
--- a/archivebox/base_models/admin.py
+++ b/archivebox/base_models/admin.py
@@ -1,174 +1,17 @@
 
				-__package__ = 'archivebox.base_models'
			
 
				-
			
 
				-from typing import Any
			
 
				-
			
 
				-from django.contrib import admin, messages
			
 
				-from django.core.exceptions import ValidationError
			
 
				-from django.utils.html import format_html
			
 
				-from django.utils.safestring import mark_safe
			
 
				-from django.shortcuts import redirect
			
 
				-
			
 
				-from django_object_actions import DjangoObjectActions, action
			
 
				-
			
 
				-from archivebox.misc.util import parse_date
			
 
				-
			
 
				-from .abid import ABID
			
 
				-
			
 
				-
			
 
				-def highlight_diff(display_val: Any, compare_val: Any, invert: bool=False, color_same: str | None=None, color_diff: str | None=None):
			
 
				-    """highlight each character in red that differs with the char at the same index in compare_val"""
			
 
				-
			
 
				-    display_val = str(display_val)
			
 
				-    compare_val = str(compare_val)
			
 
				-
			
 
				-    if len(compare_val) < len(display_val):
			
 
				-        compare_val += ' ' * (len(display_val) - len(compare_val))
			
 
				-
			
 
				-    similar_color, highlighted_color = color_same or 'inherit', color_diff or 'red'
			
 
				-    if invert:
			
 
				-        similar_color, highlighted_color = color_same or 'green', color_diff or 'inherit'
			
 
				-
			
 
				-    return mark_safe(''.join(
			
 
				-        format_html('<span style="color: {};">{}</span>', highlighted_color, display_val[i])
			
 
				-        if display_val[i] != compare_val[i] else
			
 
				-        format_html('<span style="color: {};">{}</span>', similar_color, display_val[i])
			
 
				-        for i in range(len(display_val))
			
 
				-    ))
			
 
				-
			
 
				-def get_abid_info(self, obj, request=None):
			
 
				-    from archivebox.api.auth import get_or_create_api_token
			
 
				-    
			
 
				-    try:
			
 
				-        #abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
			
 
				-
			
 
				-        fresh_values = obj.ABID_FRESH_VALUES
			
 
				-        fresh_hashes = obj.ABID_FRESH_HASHES
			
 
				-        fresh_diffs = obj.ABID_FRESH_DIFFS
			
 
				-        fresh_abid = ABID(**fresh_hashes)
			
 
				-        
			
 
				-        fresh_abid_diff = f'❌ != &nbsp; .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)}' if str(fresh_abid) != str(obj.ABID) else '✅'
			
 
				-        fresh_uuid_diff = f'❌ != &nbsp; .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)}' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
			
 
				-
			
 
				-        id_pk_diff = f'❌ !=  .pk: {highlight_diff(obj.pk, obj.id)}' if str(obj.pk) != str(obj.id) else '✅'
			
 
				+"""Base admin classes for models using UUIDv7."""
			
 
				 
			
 
				-        fresh_ts = parse_date(fresh_values['ts']) or None
			
 
				-        ts_diff = f'❌ != {highlight_diff( fresh_hashes["ts"], obj.ABID.ts)}' if  fresh_hashes["ts"] != obj.ABID.ts else '✅'
			
 
				-
			
 
				-        derived_uri = fresh_hashes['uri']
			
 
				-        uri_diff = f'❌ != {highlight_diff(derived_uri, obj.ABID.uri)}' if derived_uri != obj.ABID.uri else '✅'
			
 
				-
			
 
				-        derived_subtype = fresh_hashes['subtype']
			
 
				-        subtype_diff = f'❌ != {highlight_diff(derived_subtype, obj.ABID.subtype)}' if derived_subtype != obj.ABID.subtype else '✅'
			
 
				-
			
 
				-        derived_rand = fresh_hashes['rand']
			
 
				-        rand_diff = f'❌ != {highlight_diff(derived_rand, obj.ABID.rand)}' if derived_rand != obj.ABID.rand else '✅'
			
 
				-
			
 
				-        return format_html(
			
 
				-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
			
 
				-            '''
			
 
				-            <a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> &nbsp; &nbsp; <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
			
 
				-            <br/><hr/>
			
 
				-            <div style="opacity: 0.8">
			
 
				-            &nbsp; &nbsp; <small style="opacity: 0.8">.id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp; {}</small><br/>
			
 
				-            &nbsp; &nbsp; <small style="opacity: 0.8">.abid.uuid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp; {}</small><br/>
			
 
				-            &nbsp; &nbsp; <small style="opacity: 0.8">.abid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</small><br/>
			
 
				-            <hr/>
			
 
				-            &nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; {}</code> &nbsp; &nbsp; &nbsp;&nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code><br/>
			
 
				-            &nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; {}</code> &nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; <code style="font-size: 10px;"><b>{}</b></code> <span style="display:inline-block; vertical-align: -4px; width: 330px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}: <code style="user-select: all">{}</code></span><br/>
			
 
				-            &nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code><br/>
			
 
				-            &nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code></code>
			
 
				-            <br/><hr/>
			
 
				-            <span style="color: #f375a0">{}</span> <code style="color: red"><b>{}</b></code> {}
			
 
				-            </div>
			
 
				-            ''',
			
 
				-            obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
			
 
				-            highlight_diff(obj.id, obj.ABID.uuid, invert=True), mark_safe(id_pk_diff),
			
 
				-            highlight_diff(obj.ABID.uuid, obj.id, invert=True), mark_safe(fresh_uuid_diff),
			
 
				-            highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
			
 
				-            # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
			
 
				-            # str(fresh_abid), mark_safe(fresh_abid_diff),
			
 
				-            highlight_diff(obj.ABID.ts,  fresh_hashes['ts']), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, fresh_ts and fresh_ts.isoformat(),
			
 
				-            highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(fresh_values['uri']),
			
 
				-            highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(fresh_values['subtype']),
			
 
				-            highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(fresh_values['rand'])[-7:],
			
 
				-            'Some values the ABID depends on have changed since the ABID was issued:' if fresh_diffs else '',
			
 
				-            ", ".join(diff['abid_src'] for diff in fresh_diffs.values()),
			
 
				-            '(clicking "Regenerate ABID" in the upper right will assign a new ABID, breaking any external references to the old ABID)' if fresh_diffs else '',
			
 
				-        )
			
 
				-    except Exception as e:
			
 
				-        # import ipdb; ipdb.set_trace()
			
 
				-        return str(e)
			
 
				+__package__ = 'archivebox.base_models'
			
 
				 
			
 
				+from django.contrib import admin
			
 
				+from django_object_actions import DjangoObjectActions
			
 
				 
			
 
				-class ABIDModelAdmin(DjangoObjectActions, admin.ModelAdmin):
			
 
				-    list_display = ('created_at', 'created_by', 'abid')
			
 
				-    sort_fields = ('created_at', 'created_by', 'abid')
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info')
			
 
				-    # fields = [*readonly_fields]
			
 
				-    
			
 
				-    change_actions = ("regenerate_abid",)
			
 
				-    # changelist_actions = ("regenerate_abid",)
			
 
				 
			
 
				-    def _get_obj_does_not_exist_redirect(self, request, opts, object_id):
			
 
				-        try:
			
 
				-            object_pk = self.model.id_from_abid(object_id)
			
 
				-            return redirect(self.request.path.replace(object_id, object_pk), permanent=False)
			
 
				-        except (self.model.DoesNotExist, ValidationError):
			
 
				-            pass
			
 
				-        return super()._get_obj_does_not_exist_redirect(request, opts, object_id)       # type: ignore
			
 
				-    
			
 
				-    def queryset(self, request):
			
 
				-        self.request = request
			
 
				-        return super().queryset(request)                                                # type: ignore
			
 
				-    
			
 
				-    def change_view(self, request, object_id, form_url="", extra_context=None):
			
 
				-        self.request = request
			
 
				-        return super().change_view(request, object_id, form_url, extra_context)
			
 
				+class BaseModelAdmin(DjangoObjectActions, admin.ModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'created_by')
			
 
				+    readonly_fields = ('id', 'created_at', 'modified_at')
			
 
				 
			
 
				     def get_form(self, request, obj=None, **kwargs):
			
 
				-        self.request = request
			
 
				         form = super().get_form(request, obj, **kwargs)
			
 
				         if 'created_by' in form.base_fields:
			
 
				             form.base_fields['created_by'].initial = request.user
			
 
				-            
			
 
				-        if obj:
			
 
				-            if obj.ABID_FRESH_DIFFS:
			
 
				-                messages.warning(request, "The ABID is not in sync with the object! See the API Identifiers section below for more info...")
			
 
				-
			
 
				         return form
			
 
				-
			
 
				-    def get_formset(self, request, formset=None, obj=None, **kwargs):
			
 
				-        formset = super().get_formset(request, formset, obj, **kwargs)                  # type: ignore
			
 
				-        formset.form.base_fields['created_at'].disabled = True
			
 
				-        
			
 
				-        return formset
			
 
				-
			
 
				-    def save_model(self, request, obj, form, change):
			
 
				-        self.request = request
			
 
				-
			
 
				-        old_abid = getattr(obj, '_previous_abid', None) or obj.abid
			
 
				-
			
 
				-        super().save_model(request, obj, form, change)
			
 
				-        obj.refresh_from_db()
			
 
				-
			
 
				-        new_abid = obj.abid
			
 
				-        if new_abid != old_abid:
			
 
				-            messages.warning(request, f"The object's ABID has been updated! {old_abid} -> {new_abid} (any external references to the old ABID will need to be updated manually)")
			
 
				-        # import ipdb; ipdb.set_trace()
			
 
				-
			
 
				-    @admin.display(description='API Identifiers')
			
 
				-    def abid_info(self, obj):
			
 
				-        return get_abid_info(self, obj, request=self.request)
			
 
				-
			
 
				-    @action(label="Regenerate ABID", description="Re-Generate the ABID based on fresh values")
			
 
				-    def regenerate_abid(self, request, obj):
			
 
				-        old_abid = str(obj.abid)
			
 
				-        obj.abid = obj.issue_new_abid(overwrite=True)
			
 
				-        obj.save()
			
 
				-        obj.refresh_from_db()
			
 
				-        new_abid = str(obj.abid)
			
 
				-
			
 
				-        if new_abid != old_abid:
			
 
				-            messages.warning(request, f"The object's ABID has been updated! {old_abid} -> {new_abid} (any external references to the old ABID will need to be updated manually)")
			
 
				-        else:
			
 
				-            messages.success(request, "The ABID was not regenerated, it is already up-to-date with the object.")
			
--- a/archivebox/base_models/models.py
+++ b/archivebox/base_models/models.py
--- a/archivebox/cli/archivebox_extract.py
+++ b/archivebox/cli/archivebox_extract.py
@@ -22,7 +22,7 @@ ORCHESTRATOR = None
 
				 
			
 
				 @enforce_types
			
 
				 def extract(archiveresult_id: str) -> Generator['ArchiveResult', None, None]:
			
 
				-    archiveresult = ArchiveResult.objects.get(Q(id=archiveresult_id) | Q(abid=archiveresult_id))
			
 
				+    archiveresult = ArchiveResult.objects.get(id=archiveresult_id)
			
 
				     if not archiveresult:
			
 
				         raise Exception(f'ArchiveResult {archiveresult_id} not found')
			
 
				     
			
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -16,7 +16,7 @@ import abx
 
				 from archivebox.config import DATA_DIR
			
 
				 from archivebox.config.common import SERVER_CONFIG
			
 
				 from archivebox.misc.paginators import AccelleratedPaginator
			
 
				-from archivebox.base_models.admin import ABIDModelAdmin
			
 
				+from archivebox.base_models.admin import BaseModelAdmin
			
 
				 
			
 
				 
			
 
				 from core.models import ArchiveResult, Snapshot
			
@@ -50,7 +50,7 @@ class ArchiveResultInline(admin.TabularInline):
 
				         try:
			
 
				             return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
			
 
				         except (self.parent_model.DoesNotExist, ValidationError):
			
 
				-            return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
			
 
				+            return None
			
 
				 
			
 
				     @admin.display(
			
 
				         description='Completed',
			
@@ -60,7 +60,7 @@ class ArchiveResultInline(admin.TabularInline):
 
				         return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
			
 
				 
			
 
				     def result_id(self, obj):
			
 
				-        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
			
 
				+        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), str(obj.id)[:8])
			
 
				     
			
 
				     def command(self, obj):
			
 
				         return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
			
@@ -103,11 +103,11 @@ class ArchiveResultInline(admin.TabularInline):
 
				 
			
 
				 
			
 
				 
			
 
				-class ArchiveResultAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
			
 
				-    sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
			
 
				-    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
			
 
				-    search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
			
 
				+class ArchiveResultAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
			
 
				+    sort_fields = ('id', 'created_by', 'created_at', 'extractor', 'status')
			
 
				+    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'output_summary')
			
 
				+    search_fields = ('id', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
			
 
				     fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
			
 
				     autocomplete_fields = ['snapshot']
			
 
				 
			
@@ -135,7 +135,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
 
				         return format_html(
			
 
				             '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
			
 
				             result.snapshot.timestamp,
			
 
				-            result.snapshot.abid,
			
 
				+            str(result.snapshot.id)[:8],
			
 
				             result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
			
 
				             result.snapshot.url[:128],
			
 
				         )
			
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@@ -22,7 +22,7 @@ from archivebox.search.admin import SearchResultsAdminMixin
 
				 from archivebox.index.html import snapshot_icons
			
 
				 from archivebox.extractors import archive_links
			
 
				 
			
 
				-from archivebox.base_models.admin import ABIDModelAdmin
			
 
				+from archivebox.base_models.admin import BaseModelAdmin
			
 
				 from archivebox.workers.tasks import bg_archive_links, bg_add
			
 
				 
			
 
				 from core.models import Tag
			
@@ -53,11 +53,11 @@ class SnapshotActionForm(ActionForm):
 
				     # )
			
 
				 
			
 
				 
			
 
				-class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
			
 
				+class SnapshotAdmin(SearchResultsAdminMixin, BaseModelAdmin):
			
 
				     list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
			
 
				     sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
			
 
				-    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
			
 
				-    search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
			
 
				+    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'link_dir')
			
 
				+    search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
			
 
				     list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
			
 
				     fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
			
 
				     ordering = ['-created_at']
			
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@@ -6,7 +6,7 @@ from django.utils.html import format_html, mark_safe
 
				 import abx
			
 
				 
			
 
				 from archivebox.misc.paginators import AccelleratedPaginator
			
 
				-from archivebox.base_models.admin import ABIDModelAdmin
			
 
				+from archivebox.base_models.admin import BaseModelAdmin
			
 
				 
			
 
				 from core.models import Tag
			
 
				 
			
@@ -47,12 +47,12 @@ class TagInline(admin.TabularInline):
 
				 #         return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))
			
 
				 
			
 
				     
			
 
				-class TagAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
			
 
				+class TagAdmin(BaseModelAdmin):
			
 
				+    list_display = ('created_at', 'created_by', 'id', 'name', 'num_snapshots', 'snapshots')
			
 
				     list_filter = ('created_at', 'created_by')
			
 
				-    sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
			
 
				-    readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
			
 
				-    search_fields = ('abid', 'name', 'slug')
			
 
				+    sort_fields = ('name', 'slug', 'id', 'created_by', 'created_at')
			
 
				+    readonly_fields = ('slug', 'id', 'created_at', 'modified_at', 'snapshots')
			
 
				+    search_fields = ('id', 'name', 'slug')
			
 
				     fields = ('name', 'created_by', *readonly_fields)
			
 
				     actions = ['delete_selected', 'merge_tags']
			
 
				     ordering = ['-created_at']
			
--- a/archivebox/core/admin_users.py
+++ b/archivebox/core/admin_users.py
@@ -21,7 +21,7 @@ class CustomUserAdmin(UserAdmin):
 
				             format_html(
			
 
				                 '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
			
 
				                 snap.pk,
			
 
				-                snap.abid,
			
 
				+                str(snap.id)[:8],
			
 
				                 snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
			
 
				                 snap.url[:64],
			
 
				             )
			
@@ -35,7 +35,7 @@ class CustomUserAdmin(UserAdmin):
 
				             format_html(
			
 
				                 '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
			
 
				                 result.pk,
			
 
				-                result.abid,
			
 
				+                str(result.id)[:8],
			
 
				                 result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
			
 
				                 result.extractor,
			
 
				                 result.snapshot.url[:64],
			
@@ -62,7 +62,7 @@ class CustomUserAdmin(UserAdmin):
 
				             format_html(
			
 
				                 '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
			
 
				                 apitoken.pk,
			
 
				-                apitoken.abid,
			
 
				+                str(apitoken.id)[:8],
			
 
				                 apitoken.token_redacted[:64],
			
 
				                 apitoken.expires,
			
 
				             )
			
@@ -76,7 +76,7 @@ class CustomUserAdmin(UserAdmin):
 
				             format_html(
			
 
				                 '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
			
 
				                 outboundwebhook.pk,
			
 
				-                outboundwebhook.abid,
			
 
				+                str(outboundwebhook.id)[:8],
			
 
				                 outboundwebhook.referenced_model,
			
 
				                 outboundwebhook.endpoint,
			
 
				             )
			
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -1,27 +1,23 @@
 
				 __package__ = 'archivebox.core'
			
 
				 
			
 
				-
			
 
				 from typing import Optional, Dict, Iterable, Any
			
 
				+from uuid import uuid7
			
 
				 from django_stubs_ext.db.models import TypedModelMeta
			
 
				 
			
 
				 import os
			
 
				 import json
			
 
				-
			
 
				 from pathlib import Path
			
 
				 
			
 
				 from django.db import models
			
 
				-from django.db.models import QuerySet
			
 
				-from django.core.validators import MinValueValidator, MaxValueValidator
			
 
				+from django.db.models import QuerySet, Value, Case, When, IntegerField
			
 
				 from django.utils.functional import cached_property
			
 
				 from django.utils.text import slugify
			
 
				 from django.utils import timezone
			
 
				 from django.core.cache import cache
			
 
				 from django.urls import reverse, reverse_lazy
			
 
				-from django.db.models import Case, When, IntegerField
			
 
				 from django.contrib import admin
			
 
				 from django.conf import settings
			
 
				 
			
 
				-
			
 
				 import abx
			
 
				 
			
 
				 from archivebox.config import CONSTANTS
			
@@ -32,46 +28,25 @@ from archivebox.index.schema import Link
 
				 from archivebox.index.html import snapshot_icons
			
 
				 from archivebox.extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE
			
 
				 from archivebox.base_models.models import (
			
 
				-    ABIDModel, ABIDField, AutoDateTimeField, get_or_create_system_user_pk,
			
 
				-    ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags,  # ModelWithStateMachine
			
 
				-    ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats
			
 
				+    ModelWithUUID, ModelWithSerializers, ModelWithOutputDir,
			
 
				+    ModelWithConfig, ModelWithNotes, ModelWithHealthStats,
			
 
				+    get_or_create_system_user_pk,
			
 
				 )
			
 
				 from workers.models import ModelWithStateMachine
			
 
				 from workers.tasks import bg_archive_snapshot
			
 
				-from tags.models import KVTag
			
 
				-# from machine.models import Machine, NetworkInterface
			
 
				-
			
 
				-from crawls.models import Seed, Crawl, CrawlSchedule
			
 
				-
			
 
				-
			
 
				-class Tag(ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ABIDModel):
			
 
				-    """
			
 
				-    Old tag model, loosely based on django-taggit model + ABID base.
			
 
				-    
			
 
				-    Being phazed out in favor of archivebox.tags.models.ATag
			
 
				-    """
			
 
				-    abid_prefix = 'tag_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.slug'
			
 
				-    abid_subtype_src = '"03"'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-    
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'created_by', 'slug')
			
 
				-
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				+from crawls.models import Crawl
			
 
				+from machine.models import NetworkInterface
			
 
				+
			
 
				 
			
 
				+class Tag(ModelWithSerializers):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False, related_name='tag_set')
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				-
			
 
				     name = models.CharField(unique=True, blank=False, max_length=100)
			
 
				     slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False)
			
 
				-    # slug is autoset on save from name, never set it manually
			
 
				 
			
 
				     snapshot_set: models.Manager['Snapshot']
			
 
				-    # crawl_set: models.Manager['Crawl']
			
 
				 
			
 
				     class Meta(TypedModelMeta):
			
 
				         verbose_name = "Tag"
			
@@ -80,52 +55,26 @@ class Tag(ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ABIDMode
 
				     def __str__(self):
			
 
				         return self.name
			
 
				 
			
 
				-    def slugify(self, tag, i=None):
			
 
				-        slug = slugify(tag)
			
 
				-        if i is not None:
			
 
				-            slug += "_%d" % i
			
 
				-        return slug
			
 
				-    
			
 
				-    def clean(self, *args, **kwargs):
			
 
				-        self.slug = self.slug or self.slugify(self.name)
			
 
				-        super().clean(*args, **kwargs)
			
 
				-
			
 
				     def save(self, *args, **kwargs):
			
 
				         if self._state.adding:
			
 
				-            self.slug = self.slugify(self.name)
			
 
				-
			
 
				-            # if name is different but slug conficts with another tags slug, append a counter
			
 
				-            # with transaction.atomic():
			
 
				-            slugs = set(
			
 
				-                type(self)
			
 
				-                ._default_manager.filter(slug__startswith=self.slug)
			
 
				-                .values_list("slug", flat=True)
			
 
				-            )
			
 
				-
			
 
				+            self.slug = slugify(self.name)
			
 
				+            existing = set(Tag.objects.filter(slug__startswith=self.slug).values_list("slug", flat=True))
			
 
				             i = None
			
 
				             while True:
			
 
				-                slug = self.slugify(self.name, i)
			
 
				-                if slug not in slugs:
			
 
				+                slug = f"{slugify(self.name)}_{i}" if i else slugify(self.name)
			
 
				+                if slug not in existing:
			
 
				                     self.slug = slug
			
 
				-                    return super().save(*args, **kwargs)
			
 
				-                i = 1 if i is None else i+1
			
 
				-        else:
			
 
				-            return super().save(*args, **kwargs)
			
 
				-        
			
 
				-    @property
			
 
				-    def api_url(self) -> str:
			
 
				-        # /api/v1/core/snapshot/{uulid}
			
 
				-        return reverse_lazy('api-1:get_tag', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
			
 
				+                    break
			
 
				+                i = (i or 0) + 1
			
 
				+        super().save(*args, **kwargs)
			
 
				 
			
 
				     @property
			
 
				-    def api_docs_url(self) -> str:
			
 
				-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_tag'
			
 
				-
			
 
				+    def api_url(self) -> str:
			
 
				+        return reverse_lazy('api-1:get_tag', args=[self.id])
			
 
				 
			
 
				 
			
 
				 class SnapshotTag(models.Model):
			
 
				     id = models.AutoField(primary_key=True)
			
 
				-
			
 
				     snapshot = models.ForeignKey('Snapshot', db_column='snapshot_id', on_delete=models.CASCADE, to_field='id')
			
 
				     tag = models.ForeignKey(Tag, db_column='tag_id', on_delete=models.CASCADE, to_field='id')
			
 
				 
			
@@ -134,636 +83,209 @@ class SnapshotTag(models.Model):
 
				         unique_together = [('snapshot', 'tag')]
			
 
				 
			
 
				 
			
 
				-
			
 
				-def validate_timestamp(value):
			
 
				-    assert isinstance(value, str) and value, f'timestamp must be a non-empty string, got: "{value}"'
			
 
				-    assert value.replace('.', '').isdigit(), f'timestamp must be a float str, got: "{value}"'
			
 
				-
			
 
				 class SnapshotManager(models.Manager):
			
 
				     def filter(self, *args, **kwargs):
			
 
				-        """add support for .filter(domain='example.com') to Snapshot queryset"""
			
 
				         domain = kwargs.pop('domain', None)
			
 
				         qs = super().filter(*args, **kwargs)
			
 
				         if domain:
			
 
				             qs = qs.filter(url__icontains=f'://{domain}')
			
 
				         return qs
			
 
				-    
			
 
				+
			
 
				     def get_queryset(self):
			
 
				-        return (
			
 
				-            super().get_queryset()
			
 
				-                .prefetch_related('tags', 'archiveresult_set') 
			
 
				-                # .annotate(archiveresult_count=models.Count('archiveresult')).distinct()
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-class Snapshot(
			
 
				-    ModelWithReadOnlyFields,
			
 
				-    ModelWithSerializers,
			
 
				-    ModelWithUUID,
			
 
				-    ModelWithKVTags,
			
 
				-    ABIDModel,
			
 
				-    ModelWithOutputDir,
			
 
				-    ModelWithConfig,
			
 
				-    ModelWithNotes,
			
 
				-    ModelWithHealthStats,
			
 
				-    ModelWithStateMachine,
			
 
				-):
			
 
				-    
			
 
				-    ### ModelWithSerializers
			
 
				-    # cls.from_dict() -> Self
			
 
				-    # self.as_json() -> dict[str, Any]
			
 
				-    # self.as_jsonl_row() -> str
			
 
				-    # self.as_csv_row() -> str
			
 
				-    # self.as_html_icon(), .as_html_embed(), .as_html_row(), ...
			
 
				-    
			
 
				-    ### ModelWithReadOnlyFields
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'created_by_id', 'url', 'timestamp', 'bookmarked_at', 'crawl_id')
			
 
				-    
			
 
				-    ### Immutable fields:
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				+        return super().get_queryset().prefetch_related('tags', 'archiveresult_set')
			
 
				+
			
 
				+
			
 
				+class Snapshot(ModelWithSerializers, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, ModelWithStateMachine):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='snapshot_set', db_index=True)
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)  # loaded from self._init_timestamp
			
 
				-    
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				+    modified_at = models.DateTimeField(auto_now=True)
			
 
				+
			
 
				     url = models.URLField(unique=True, db_index=True)
			
 
				-    timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False, validators=[validate_timestamp])
			
 
				-    bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True)
			
 
				+    timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
			
 
				+    bookmarked_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     crawl: Crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set', db_index=True)  # type: ignore
			
 
				-    
			
 
				-    ### Mutable fields:
			
 
				+
			
 
				     title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
			
 
				     downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True)
			
 
				-    modified_at = models.DateTimeField(auto_now=True)
			
 
				-    
			
 
				-    ### ModelWithStateMachine
			
 
				+
			
 
				     retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
			
 
				-    status = ModelWithStateMachine.StatusField(choices=StatusChoices, default=StatusChoices.QUEUED)
			
 
				-    
			
 
				-    ### ModelWithConfig
			
 
				+    status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
			
 
				     config = models.JSONField(default=dict, null=False, blank=False, editable=True)
			
 
				-    
			
 
				-    ### ModelWithNotes
			
 
				-    notes = models.TextField(blank=True, null=False, default='', help_text='Any extra notes this snapshot should have')
			
 
				-
			
 
				-    ### ModelWithOutputDir
			
 
				+    notes = models.TextField(blank=True, null=False, default='')
			
 
				     output_dir = models.FilePathField(path=CONSTANTS.ARCHIVE_DIR, recursive=True, match='.*', default=None, null=True, blank=True, editable=True)
			
 
				-    # self.output_dir_parent -> str 'archive/snapshots/<YYYY-MM-DD>/<example.com>'
			
 
				-    # self.output_dir_name -> '<abid>'
			
 
				-    # self.output_dir_str -> 'archive/snapshots/<YYYY-MM-DD>/<example.com>/<abid>'
			
 
				-    # self.OUTPUT_DIR -> Path('/data/archive/snapshots/<YYYY-MM-DD>/<example.com>/<abid>')
			
 
				-    # self.save(): creates OUTPUT_DIR, writes index.json, writes indexes
			
 
				-    
			
 
				-    # old-style tags (dedicated ManyToMany Tag model above):
			
 
				+
			
 
				     tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
			
 
				-    
			
 
				-    # new-style tags (new key-value tags defined by tags.models.KVTag & ModelWithKVTags):
			
 
				-    kvtag_set = tag_set = GenericRelation(
			
 
				-        KVTag,
			
 
				-        related_query_name="snapshot",
			
 
				-        content_type_field="obj_type",
			
 
				-        object_id_field="obj_id",
			
 
				-        order_by=('created_at',),
			
 
				-    )
			
 
				-    
			
 
				-    ### ABIDModel
			
 
				-    abid_prefix = 'snp_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.url'
			
 
				-    abid_subtype_src = '"01"'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-    # self.clean() -> sets self._timestamp
			
 
				-    # self.save() -> issues new ABID if creating new, otherwise uses existing ABID
			
 
				-    # self.ABID -> ABID
			
 
				-    # self.api_url -> '/api/v1/core/snapshot/{uulid}'
			
 
				-    # self.api_docs_url -> '/api/v1/docs#/Core%20Models/api_v1_core_get_snapshot'
			
 
				-    # self.admin_change_url -> '/admin/core/snapshot/{pk}/change/'
			
 
				-    # self.get_absolute_url() -> '/{self.archive_path}'
			
 
				-    # self.update_for_workers() -> bool
			
 
				-    
			
 
				-    ### ModelWithStateMachine
			
 
				+
			
 
				     state_machine_name = 'core.statemachines.SnapshotMachine'
			
 
				     state_field_name = 'status'
			
 
				     retry_at_field_name = 'retry_at'
			
 
				     StatusChoices = ModelWithStateMachine.StatusChoices
			
 
				     active_state = StatusChoices.STARTED
			
 
				-    
			
 
				-    ### Relations & Managers
			
 
				+
			
 
				     objects = SnapshotManager()
			
 
				     archiveresult_set: models.Manager['ArchiveResult']
			
 
				-    
			
 
				+
			
 
				+    class Meta(TypedModelMeta):
			
 
				+        verbose_name = "Snapshot"
			
 
				+        verbose_name_plural = "Snapshots"
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return f'[{self.id}] {self.url[:64]}'
			
 
				+
			
 
				     def save(self, *args, **kwargs):
			
 
				-        print(f'Snapshot[{self.ABID}].save()')
			
 
				-        if self.pk:
			
 
				-            existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
			
 
				-            if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
			
 
				-                if self.as_json() != existing_snapshot.as_json():
			
 
				-                    raise Exception(f'Snapshot {self.pk} is already sealed, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_snapshot.as_json()}')
			
 
				-        
			
 
				         if not self.bookmarked_at:
			
 
				-            self.bookmarked_at = self.created_at or self._init_timestamp
			
 
				-            
			
 
				+            self.bookmarked_at = self.created_at or timezone.now()
			
 
				         if not self.timestamp:
			
 
				             self.timestamp = str(self.bookmarked_at.timestamp())
			
 
				-
			
 
				         super().save(*args, **kwargs)
			
 
				-        
			
 
				-        # make sure the crawl has this url in its urls log
			
 
				         if self.crawl and self.url not in self.crawl.urls:
			
 
				             self.crawl.urls += f'\n{self.url}'
			
 
				             self.crawl.save()
			
 
				-            
			
 
				-            
			
 
				+
			
 
				     def output_dir_parent(self) -> str:
			
 
				         return 'archive'
			
 
				-    
			
 
				+
			
 
				     def output_dir_name(self) -> str:
			
 
				         return str(self.timestamp)
			
 
				 
			
 
				     def archive(self, overwrite=False, methods=None):
			
 
				-        result = bg_archive_snapshot(self, overwrite=overwrite, methods=methods)
			
 
				-        return result
			
 
				-
			
 
				-    def __repr__(self) -> str:
			
 
				-        url = self.url or '<no url set>'
			
 
				-        created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
			
 
				-        if self.id and self.url:
			
 
				-            return f'[{self.ABID}] {url[:64]} @ {created_at}'
			
 
				-        return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} @ {created_at}'
			
 
				-
			
 
				-    def __str__(self) -> str:
			
 
				-        return repr(self)
			
 
				-
			
 
				-    @classmethod
			
 
				-    def from_json(cls, fields: dict[str, Any]) -> Self:
			
 
				-        # print('LEGACY from_json()')
			
 
				-        return cls.from_dict(fields)
			
 
				-
			
 
				-    def as_json(self, *args, **kwargs) -> dict:
			
 
				-        json_dict = super().as_json(*args, **kwargs)
			
 
				-        if 'tags' in json_dict:
			
 
				-            json_dict['tags'] = self.tags_str(nocache=False)
			
 
				-        return json_dict
			
 
				+        return bg_archive_snapshot(self, overwrite=overwrite, methods=methods)
			
 
				 
			
 
				     def as_link(self) -> Link:
			
 
				         return Link.from_json(self.as_json())
			
 
				 
			
 
				-    def as_link_with_details(self) -> Link:
			
 
				-        from ..index import load_link_details
			
 
				-        return load_link_details(self.as_link())
			
 
				-
			
 
				     @admin.display(description='Tags')
			
 
				     def tags_str(self, nocache=True) -> str | None:
			
 
				         calc_tags_str = lambda: ','.join(sorted(tag.name for tag in self.tags.all()))
			
 
				-        cache_key = f'{self.pk}-{(self.downloaded_at or self.bookmarked_at).timestamp()}-tags'
			
 
				-        
			
 
				         if hasattr(self, '_prefetched_objects_cache') and 'tags' in self._prefetched_objects_cache:
			
 
				-            # tags are pre-fetched already, use them directly (best because db is always freshest)
			
 
				-            tags_str = calc_tags_str()
			
 
				-            return tags_str
			
 
				-        
			
 
				-        if nocache:
			
 
				-            tags_str = calc_tags_str()
			
 
				-            cache.set(cache_key, tags_str)
			
 
				-            return tags_str
			
 
				-        return cache.get_or_set(cache_key, calc_tags_str)
			
 
				+            return calc_tags_str()
			
 
				+        cache_key = f'{self.pk}-tags'
			
 
				+        return cache.get_or_set(cache_key, calc_tags_str) if not nocache else calc_tags_str()
			
 
				 
			
 
				     def icons(self) -> str:
			
 
				         return snapshot_icons(self)
			
 
				-    
			
 
				+
			
 
				     @property
			
 
				     def api_url(self) -> str:
			
 
				-        # /api/v1/core/snapshot/{uulid}
			
 
				-        return reverse_lazy('api-1:get_snapshot', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
			
 
				-    
			
 
				-    @property
			
 
				-    def api_docs_url(self) -> str:
			
 
				-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_snapshot'
			
 
				-    
			
 
				+        return reverse_lazy('api-1:get_snapshot', args=[self.id])
			
 
				+
			
 
				     def get_absolute_url(self):
			
 
				         return f'/{self.archive_path}'
			
 
				-    
			
 
				-    @cached_property
			
 
				-    def title_stripped(self) -> str:
			
 
				-        return (self.title or '').replace("\n", " ").replace("\r", "")
			
 
				-
			
 
				-    @cached_property
			
 
				-    def extension(self) -> str:
			
 
				-        from archivebox.misc.util import extension
			
 
				-        return extension(self.url)
			
 
				 
			
 
				-    @cached_property
			
 
				-    def bookmarked(self):
			
 
				-        return parse_date(self.timestamp)
			
 
				-
			
 
				-    @cached_property
			
 
				-    def bookmarked_date(self):
			
 
				-        # TODO: remove this
			
 
				-        return self.bookmarked
			
 
				-    
			
 
				     @cached_property
			
 
				     def domain(self) -> str:
			
 
				         return url_domain(self.url)
			
 
				 
			
 
				-    @cached_property
			
 
				-    def is_archived(self):
			
 
				-        return self.as_link().is_archived
			
 
				-
			
 
				-    @cached_property
			
 
				-    def num_outputs(self) -> int:
			
 
				-        # DONT DO THIS: it will trigger a separate query for every snapshot
			
 
				-        # return self.archiveresult_set.filter(status='succeeded').count()
			
 
				-        # this is better:
			
 
				-        return sum((1 for result in self.archiveresult_set.all() if result.status == 'succeeded'))
			
 
				-
			
 
				-    @cached_property
			
 
				-    def base_url(self):
			
 
				-        return base_url(self.url)
			
 
				-
			
 
				     @cached_property
			
 
				     def link_dir(self):
			
 
				         return str(CONSTANTS.ARCHIVE_DIR / self.timestamp)
			
 
				 
			
 
				     @cached_property
			
 
				     def archive_path(self):
			
 
				-        return '{}/{}'.format(CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp)
			
 
				+        return f'{CONSTANTS.ARCHIVE_DIR_NAME}/{self.timestamp}'
			
 
				 
			
 
				     @cached_property
			
 
				     def archive_size(self):
			
 
				-        cache_key = f'{str(self.pk)[:12]}-{(self.downloaded_at or self.bookmarked_at).timestamp()}-size'
			
 
				-
			
 
				-        def calc_dir_size():
			
 
				-            try:
			
 
				-                return get_dir_size(self.link_dir)[0]
			
 
				-            except Exception:
			
 
				-                return 0
			
 
				-
			
 
				-        return cache.get_or_set(cache_key, calc_dir_size)
			
 
				-
			
 
				-    @cached_property
			
 
				-    def thumbnail_url(self) -> Optional[str]:
			
 
				-        if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
			
 
				-            result = (sorted(
			
 
				-                (
			
 
				-                    result
			
 
				-                    for result in self.archiveresult_set.all()
			
 
				-                    if result.extractor == 'screenshot' and result.status =='succeeded' and result.output
			
 
				-                ),
			
 
				-                key=lambda result: result.created_at,
			
 
				-            ) or [None])[-1]
			
 
				-        else:
			
 
				-            result = self.archiveresult_set.filter(
			
 
				-                extractor='screenshot',
			
 
				-                status='succeeded'
			
 
				-            ).only('output').last()
			
 
				-
			
 
				-        if result:
			
 
				-            return reverse('Snapshot', args=[f'{str(self.timestamp)}/{result.output}'])
			
 
				-        return None
			
 
				-
			
 
				-    @cached_property
			
 
				-    def headers(self) -> Optional[Dict[str, str]]:
			
 
				         try:
			
 
				-            return json.loads((Path(self.link_dir) / 'headers.json').read_text(encoding='utf-8').strip())
			
 
				+            return get_dir_size(self.link_dir)[0]
			
 
				         except Exception:
			
 
				-            pass
			
 
				-        return None
			
 
				-
			
 
				-    @cached_property
			
 
				-    def status_code(self) -> Optional[str]:
			
 
				-        return self.headers.get('Status-Code') if self.headers else None
			
 
				-
			
 
				-    @cached_property
			
 
				-    def history(self) -> dict:
			
 
				-        # TODO: use ArchiveResult for this instead of json
			
 
				-        return self.as_link_with_details().history
			
 
				-
			
 
				-    @cached_property
			
 
				-    def latest_title(self) -> Optional[str]:
			
 
				-        if self.title:
			
 
				-            return self.title   # whoopdedoo that was easy
			
 
				-
			
 
				-        # check if ArchiveResult set has already been prefetched, if so use it instead of fetching it from db again
			
 
				-        if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
			
 
				-            try:
			
 
				-                return (sorted(
			
 
				-                    (
			
 
				-                        result.output.strip()
			
 
				-                        for result in self.archiveresult_set.all()
			
 
				-                        if result.extractor == 'title' and result.status =='succeeded' and result.output
			
 
				-                    ),
			
 
				-                    key=lambda title: len(title),
			
 
				-                ) or [None])[-1]
			
 
				-            except IndexError:
			
 
				-                pass
			
 
				-
			
 
				+            return 0
			
 
				 
			
 
				-        try:
			
 
				-            # take longest successful title from ArchiveResult db history
			
 
				-            return sorted(
			
 
				-                self.archiveresult_set\
			
 
				-                    .filter(extractor='title', status='succeeded', output__isnull=False)\
			
 
				-                    .values_list('output', flat=True),
			
 
				-                key=lambda r: len(r),
			
 
				-            )[-1]
			
 
				-        except IndexError:
			
 
				-            pass
			
 
				-
			
 
				-        try:
			
 
				-            # take longest successful title from Link json index file history
			
 
				-            return sorted(
			
 
				-                (
			
 
				-                    result.output.strip()
			
 
				-                    for result in self.history['title']
			
 
				-                    if result.status == 'succeeded' and result.output.strip()
			
 
				-                ),
			
 
				-                key=lambda r: len(r),
			
 
				-            )[-1]
			
 
				-        except (KeyError, IndexError):
			
 
				-            pass
			
 
				-
			
 
				-        return None
			
 
				-    
			
 
				-    def save_tags(self, tags: Iterable[str]=()) -> None:
			
 
				-        tags_id = []
			
 
				-        for tag in tags:
			
 
				-            if tag.strip():
			
 
				-                tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
			
 
				+    def save_tags(self, tags: Iterable[str] = ()) -> None:
			
 
				+        tags_id = [Tag.objects.get_or_create(name=tag)[0].pk for tag in tags if tag.strip()]
			
 
				         self.tags.clear()
			
 
				         self.tags.add(*tags_id)
			
 
				-        
			
 
				+
			
 
				     def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
			
 
				-        pending_archiveresults = self.archiveresult_set.exclude(status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES)
			
 
				-        return pending_archiveresults
			
 
				-    
			
 
				+        return self.archiveresult_set.exclude(status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES)
			
 
				+
			
 
				     def create_pending_archiveresults(self) -> list['ArchiveResult']:
			
 
				         ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget']
			
 
				-        
			
 
				-        # config = get_scope_config(snapshot=self)
			
 
				-        config = {'EXTRACTORS': ','.join(ALL_EXTRACTORS)}
			
 
				-        
			
 
				-        if config.get('EXTRACTORS', 'auto') == 'auto':
			
 
				-            EXTRACTORS = ALL_EXTRACTORS
			
 
				-        else:
			
 
				-            EXTRACTORS = config.get('EXTRACTORS', '').split(',')
			
 
				-        
			
 
				         archiveresults = []
			
 
				-        for extractor in EXTRACTORS:
			
 
				-            if not extractor:
			
 
				-                continue
			
 
				+        for extractor in ALL_EXTRACTORS:
			
 
				             if ArchiveResult.objects.filter(snapshot=self, extractor=extractor).exists():
			
 
				                 continue
			
 
				-            archiveresult, created = ArchiveResult.objects.get_or_create(
			
 
				-                snapshot=self,
			
 
				-                extractor=extractor,
			
 
				-                defaults={
			
 
				-                    'status': ArchiveResult.INITIAL_STATE,
			
 
				-                    'retry_at': timezone.now(),
			
 
				-                },
			
 
				+            archiveresult, _ = ArchiveResult.objects.get_or_create(
			
 
				+                snapshot=self, extractor=extractor,
			
 
				+                defaults={'status': ArchiveResult.INITIAL_STATE, 'retry_at': timezone.now()},
			
 
				             )
			
 
				             if archiveresult.status == ArchiveResult.INITIAL_STATE:
			
 
				                 archiveresults.append(archiveresult)
			
 
				         return archiveresults
			
 
				-    
			
 
				-
			
 
				-    # def migrate_output_dir(self):
			
 
				-    #     """Move the output files to the new folder structure if needed"""
			
 
				-    #     print(f'{self}.migrate_output_dir()')
			
 
				-    #     self.migrate_from_0_7_2()
			
 
				-    #     self.migrate_from_0_8_6()
			
 
				-    #     # ... future migrations here
			
 
				-    
			
 
				-    # def migrate_from_0_7_2(self):
			
 
				-    #     """Migrate the folder structure from 0.7.2 to the current version"""
			
 
				-    #     # migrate any existing output_dir into data/archiveresults/<extractor>/YYYY-MM-DD/<domain>/<abid>
			
 
				-    #     # create self.output_dir if it doesn't exist
			
 
				-    #     # move loose files in snapshot_dir into self.output_dir
			
 
				-    #     # update self.pwd = self.output_dir
			
 
				-    #     print(f'{self}.migrate_from_0_7_2()')
			
 
				-    
			
 
				-    # def migrate_from_0_8_6(self):
			
 
				-    #     """Migrate the folder structure from 0.8.6 to the current version"""
			
 
				-    #     # ... future migration code here ...
			
 
				-    #     print(f'{self}.migrate_from_0_8_6()')
			
 
				-            
			
 
				-    # def save_json_index(self):
			
 
				-    #     """Save the json index file to ./.index.json"""
			
 
				-    #     print(f'{self}.save_json_index()')
			
 
				-    #     pass
			
 
				-    
			
 
				-    # def save_symlinks_index(self):
			
 
				-    #     """Update the symlink farm idnexes to point to the new location of self.output_dir"""
			
 
				-    #     # ln -s self.output_dir data/index/results_by_type/wget/YYYY-MM-DD/example.com/<abid>
			
 
				-    #     # ln -s self.output_dir data/index/results_by_day/YYYY-MM-DD/example.com/wget/<abid>
			
 
				-    #     # ln -s self.output_dir data/index/results_by_domain/example.com/YYYY-MM-DD/wget/<abid>
			
 
				-    #     # ln -s self.output_dir data/index/results_by_abid/<abid>
			
 
				-    #     # ln -s self.output_dir data/archive/<snapshot_timestamp>/<extractor>
			
 
				-    #     print(f'{self}.save_symlinks_index()')
			
 
				-    
			
 
				-    # def save_html_index(self):
			
 
				-    #     """Save the html index file to ./.index.html"""
			
 
				-    #     print(f'{self}.save_html_index()')
			
 
				-    #     pass
			
 
				-
			
 
				-    # def save_merkle_index(self):
			
 
				-    #     """Calculate the recursive sha256 of all the files in the output path and save it to ./.checksum.json"""
			
 
				-    #     print(f'{self}.save_merkle_index()')
			
 
				-    #     pass
			
 
				-
			
 
				-    # def save_search_index(self):
			
 
				-    #     """Pass any indexable text to the search backend indexer (e.g. sonic, SQLiteFTS5, etc.)"""
			
 
				-    #     print(f'{self}.save_search_index()')
			
 
				-    #     pass
			
 
				-
			
 
				-    # def get_storage_dir(self, create=True, symlink=True) -> Path:
			
 
				-    #     date_str = self.bookmarked_at.strftime('%Y%m%d')
			
 
				-    #     domain_str = domain(self.url)
			
 
				-    #     abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
			
 
				-
			
 
				-    #     if create and not abs_storage_dir.is_dir():
			
 
				-    #         abs_storage_dir.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-    #     if symlink:
			
 
				-    #         LINK_PATHS = [
			
 
				-    #             Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
			
 
				-    #             # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
			
 
				-    #             Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
			
 
				-    #             Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
			
 
				-    #         ]
			
 
				-    #         for link_path in LINK_PATHS:
			
 
				-    #             link_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				-    #             try:
			
 
				-    #                 link_path.symlink_to(abs_storage_dir)
			
 
				-    #             except FileExistsError:
			
 
				-    #                 link_path.unlink()
			
 
				-    #                 link_path.symlink_to(abs_storage_dir)
			
 
				-
			
 
				-    #     return abs_storage_dir
			
 
				 
			
 
				 
			
 
				 class ArchiveResultManager(models.Manager):
			
 
				     def indexable(self, sorted: bool = True):
			
 
				-        """Return only ArchiveResults containing text suitable for full-text search (sorted in order of typical result quality)"""
			
 
				-
			
 
				-        INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
			
 
				+        INDEXABLE_METHODS = [r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE]
			
 
				         qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS, status='succeeded')
			
 
				-
			
 
				         if sorted:
			
 
				-            precedence = [
			
 
				-                When(extractor=method, then=Value(precedence))
			
 
				-                for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE
			
 
				-            ]
			
 
				-            qs = qs.annotate(
			
 
				-                indexing_precedence=Case(
			
 
				-                    *precedence,
			
 
				-                    default=Value(1000),
			
 
				-                    output_field=IntegerField()
			
 
				-                )
			
 
				-            ).order_by('indexing_precedence')
			
 
				+            precedence = [When(extractor=method, then=Value(p)) for method, p in ARCHIVE_METHODS_INDEXING_PRECEDENCE]
			
 
				+            qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000), output_field=IntegerField())).order_by('indexing_precedence')
			
 
				         return qs
			
 
				 
			
 
				 
			
 
				-class ArchiveResult(
			
 
				-    ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags, ABIDModel,
			
 
				-    ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, ModelWithStateMachine
			
 
				-):
			
 
				-    ### ABIDModel
			
 
				-    abid_prefix = 'res_'
			
 
				-    abid_ts_src = 'self.snapshot.created_at'
			
 
				-    abid_uri_src = 'self.snapshot.url'
			
 
				-    abid_subtype_src = 'self.extractor'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-    
			
 
				-    ### ModelWithStateMachine
			
 
				+class ArchiveResult(ModelWithSerializers, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, ModelWithStateMachine):
			
 
				     class StatusChoices(models.TextChoices):
			
 
				-        QUEUED = 'queued', 'Queued'                     # pending, initial
			
 
				-        STARTED = 'started', 'Started'                  # active
			
 
				-        
			
 
				-        BACKOFF = 'backoff', 'Waiting to retry'         # pending
			
 
				-        SUCCEEDED = 'succeeded', 'Succeeded'            # final
			
 
				-        FAILED = 'failed', 'Failed'                     # final
			
 
				-        SKIPPED = 'skipped', 'Skipped'                  # final
			
 
				-        
			
 
				-    state_machine_name = 'core.statemachines.ArchiveResultMachine'
			
 
				-    retry_at_field_name = 'retry_at'
			
 
				-    state_field_name = 'status'
			
 
				-    active_state = StatusChoices.STARTED
			
 
				-    
			
 
				+        QUEUED = 'queued', 'Queued'
			
 
				+        STARTED = 'started', 'Started'
			
 
				+        BACKOFF = 'backoff', 'Waiting to retry'
			
 
				+        SUCCEEDED = 'succeeded', 'Succeeded'
			
 
				+        FAILED = 'failed', 'Failed'
			
 
				+        SKIPPED = 'skipped', 'Skipped'
			
 
				+
			
 
				     EXTRACTOR_CHOICES = (
			
 
				-        ('htmltotext', 'htmltotext'),
			
 
				-        ('git', 'git'),
			
 
				-        ('singlefile', 'singlefile'),
			
 
				-        ('media', 'media'),
			
 
				-        ('archive_org', 'archive_org'),
			
 
				-        ('readability', 'readability'),
			
 
				-        ('mercury', 'mercury'),
			
 
				-        ('favicon', 'favicon'),
			
 
				-        ('pdf', 'pdf'),
			
 
				-        ('headers', 'headers'),
			
 
				-        ('screenshot', 'screenshot'),
			
 
				-        ('dom', 'dom'),
			
 
				-        ('title', 'title'),
			
 
				-        ('wget', 'wget'),
			
 
				+        ('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'),
			
 
				+        ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'),
			
 
				+        ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'),
			
 
				+        ('dom', 'dom'), ('title', 'title'), ('wget', 'wget'),
			
 
				     )
			
 
				-    
			
 
				-    ### ModelWithReadOnlyFields
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'created_by', 'snapshot', 'extractor', 'pwd')
			
 
				-
			
 
				-    ### Immutable fields:
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				 
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='archiveresult_set', db_index=True)
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				-    
			
 
				-    snapshot: Snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)   # type: ignore
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				+    modified_at = models.DateTimeField(auto_now=True)
			
 
				+
			
 
				+    snapshot: Snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)  # type: ignore
			
 
				     extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32, blank=False, null=False, db_index=True)
			
 
				     pwd = models.CharField(max_length=256, default=None, null=True, blank=True)
			
 
				-    
			
 
				-
			
 
				-    ### Mutable fields:
			
 
				     cmd = models.JSONField(default=None, null=True, blank=True)
			
 
				-    modified_at = models.DateTimeField(auto_now=True)
			
 
				     cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
			
 
				     output = models.CharField(max_length=1024, default=None, null=True, blank=True)
			
 
				     start_ts = models.DateTimeField(default=None, null=True, blank=True)
			
 
				     end_ts = models.DateTimeField(default=None, null=True, blank=True)
			
 
				-    
			
 
				-    ### ModelWithStateMachine
			
 
				+
			
 
				     status = ModelWithStateMachine.StatusField(choices=StatusChoices.choices, default=StatusChoices.QUEUED)
			
 
				     retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
			
 
				-
			
 
				-    ### ModelWithNotes
			
 
				-    notes = models.TextField(blank=True, null=False, default='', help_text='Any extra notes this ArchiveResult should have')
			
 
				-
			
 
				-    ### ModelWithHealthStats
			
 
				-    # ...
			
 
				-
			
 
				-    ### ModelWithKVTags
			
 
				-    # tag_set = GenericRelation(KVTag, related_query_name='archiveresult')
			
 
				-
			
 
				-    ### ModelWithOutputDir
			
 
				+    notes = models.TextField(blank=True, null=False, default='')
			
 
				     output_dir = models.CharField(max_length=256, default=None, null=True, blank=True)
			
 
				+    iface = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True)
			
 
				 
			
 
				-    # machine = models.ForeignKey(Machine, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Machine Used')
			
 
				-    iface = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
			
 
				+    state_machine_name = 'core.statemachines.ArchiveResultMachine'
			
 
				+    retry_at_field_name = 'retry_at'
			
 
				+    state_field_name = 'status'
			
 
				+    active_state = StatusChoices.STARTED
			
 
				 
			
 
				     objects = ArchiveResultManager()
			
 
				-    
			
 
				-    keys = ('snapshot_id', 'extractor', 'cmd', 'pwd', 'cmd_version', 'output', 'start_ts', 'end_ts', 'created_at', 'status', 'retry_at', 'abid', 'id')
			
 
				 
			
 
				     class Meta(TypedModelMeta):
			
 
				         verbose_name = 'Archive Result'
			
 
				         verbose_name_plural = 'Archive Results Log'
			
 
				 
			
 
				-    def __repr__(self):
			
 
				-        snapshot_id = getattr(self, 'snapshot_id', None)
			
 
				-        url = self.snapshot.url if snapshot_id else '<no url set>'
			
 
				-        created_at = self.snapshot.created_at.strftime("%Y-%m-%d %H:%M") if snapshot_id else '<no timestamp set>'
			
 
				-        extractor = self.extractor or '<no extractor set>'
			
 
				-        if self.id and snapshot_id:
			
 
				-            return f'[{self.ABID}] {url[:64]} @ {created_at} -> {extractor}'
			
 
				-        return f'[{self.abid_prefix}****not*saved*yet****] {url} @ {created_at} -> {extractor}'
			
 
				-
			
 
				     def __str__(self):
			
 
				-        return repr(self)
			
 
				-    
			
 
				-    def save(self, *args, write_indexes: bool=False, **kwargs):
			
 
				-        print(f'ArchiveResult[{self.ABID}].save()')
			
 
				-        # if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
			
 
				-        #     raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
			
 
				-        if self.pk:
			
 
				-            existing_archiveresult = self.__class__.objects.filter(pk=self.pk).first()
			
 
				-            if existing_archiveresult and existing_archiveresult.status in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]:
			
 
				-                if self.as_json() != existing_archiveresult.as_json():
			
 
				-                    raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_archiveresult.as_json()}')
			
 
				-        super().save(*args, **kwargs)
			
 
				-        # DONT DO THIS:
			
 
				-        # self.snapshot.update_for_workers()   # this should be done manually wherever its needed, not in here as a side-effect on save()
			
 
				-
			
 
				-
			
 
				-    # TODO: finish connecting machine.models
			
 
				-    # @cached_property
			
 
				-    # def machine(self):
			
 
				-    #     return self.iface.machine if self.iface else None
			
 
				+        return f'[{self.id}] {self.snapshot.url[:64]} -> {self.extractor}'
			
 
				 
			
 
				     @cached_property
			
 
				     def snapshot_dir(self):
			
 
				         return Path(self.snapshot.link_dir)
			
 
				-    
			
 
				+
			
 
				     @cached_property
			
 
				     def url(self):
			
 
				         return self.snapshot.url
			
 
				 
			
 
				     @property
			
 
				     def api_url(self) -> str:
			
 
				-        # /api/v1/core/archiveresult/{uulid}
			
 
				-        return reverse_lazy('api-1:get_archiveresult', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
			
 
				-
			
 
				-    @property
			
 
				-    def api_docs_url(self) -> str:
			
 
				-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_archiveresult'
			
 
				+        return reverse_lazy('api-1:get_archiveresult', args=[self.id])
			
 
				 
			
 
				     def get_absolute_url(self):
			
 
				         return f'/{self.snapshot.archive_path}/{self.extractor}'
			
@@ -772,252 +294,24 @@ class ArchiveResult(
 
				     def extractor_module(self) -> Any | None:
			
 
				         return abx.as_dict(abx.pm.hook.get_EXTRACTORS()).get(self.extractor, None)
			
 
				 
			
 
				-    @property
			
 
				-    def EXTRACTOR(self) -> object:
			
 
				-        # return self.extractor_module
			
 
				-        return self.extractor_module(archiveresult=self)
			
 
				-
			
 
				-    def embed_path(self) -> str | None:
			
 
				-        """
			
 
				-        return the actual runtime-calculated path to the file on-disk that
			
 
				-        should be used for user-facing iframe embeds of this result
			
 
				-        """
			
 
				-
			
 
				-        try:
			
 
				-            return self.extractor_module.get_embed_path(self)
			
 
				-        except Exception as e:
			
 
				-            print(f'Error getting embed path for {self.extractor} extractor: {e}')
			
 
				-            return None
			
 
				-
			
 
				-    def legacy_output_path(self):
			
 
				-        return self.canonical_outputs().get(f'{self.extractor}_path')
			
 
				-
			
 
				     def output_exists(self) -> bool:
			
 
				-        output_path = Path(self.snapshot_dir) / self.extractor
			
 
				-        return os.path.exists(output_path)
			
 
				-            
			
 
				+        return os.path.exists(Path(self.snapshot_dir) / self.extractor)
			
 
				+
			
 
				     def create_output_dir(self):
			
 
				         output_dir = Path(self.snapshot_dir) / self.extractor
			
 
				         output_dir.mkdir(parents=True, exist_ok=True)
			
 
				         return output_dir
			
 
				-        
			
 
				-    def canonical_outputs(self) -> Dict[str, Optional[str]]:
			
 
				-        """Predict the expected output paths that should be present after archiving"""
			
 
				-        # You'll need to implement the actual logic based on your requirements
			
 
				-        # TODO: banish this awful duplication from the codebase and import these
			
 
				-        # from their respective extractor files
			
 
				-
			
 
				-
			
 
				-        from abx_plugin_favicon.config import FAVICON_CONFIG
			
 
				-        canonical = {
			
 
				-            'index_path': 'index.html',
			
 
				-            'favicon_path': 'favicon.ico',
			
 
				-            'google_favicon_path': FAVICON_CONFIG.FAVICON_PROVIDER.format(self.domain),
			
 
				-            'wget_path': f'warc/{self.timestamp}',
			
 
				-            'warc_path': 'warc/',
			
 
				-            'singlefile_path': 'singlefile.html',
			
 
				-            'readability_path': 'readability/content.html',
			
 
				-            'mercury_path': 'mercury/content.html',
			
 
				-            'htmltotext_path': 'htmltotext.txt',
			
 
				-            'pdf_path': 'output.pdf',
			
 
				-            'screenshot_path': 'screenshot.png',
			
 
				-            'dom_path': 'output.html',
			
 
				-            'archive_org_path': f'https://web.archive.org/web/{self.base_url}',
			
 
				-            'git_path': 'git/',
			
 
				-            'media_path': 'media/',
			
 
				-            'headers_path': 'headers.json',
			
 
				-        }
			
 
				-        
			
 
				-        if self.is_static:
			
 
				-            static_path = f'warc/{self.timestamp}'
			
 
				-            canonical.update({
			
 
				-                'title': self.basename,
			
 
				-                'wget_path': static_path,
			
 
				-                'pdf_path': static_path,
			
 
				-                'screenshot_path': static_path,
			
 
				-                'dom_path': static_path,
			
 
				-                'singlefile_path': static_path,
			
 
				-                'readability_path': static_path,
			
 
				-                'mercury_path': static_path,
			
 
				-                'htmltotext_path': static_path,
			
 
				-            })
			
 
				-        return canonical
			
 
				-        
			
 
				+
			
 
				     @property
			
 
				     def output_dir_name(self) -> str:
			
 
				         return self.extractor
			
 
				-        
			
 
				+
			
 
				     @property
			
 
				     def output_dir_parent(self) -> str:
			
 
				         return str(self.snapshot.OUTPUT_DIR.relative_to(CONSTANTS.DATA_DIR))
			
 
				-        
			
 
				-    @cached_property
			
 
				-    def output_files(self) -> dict[str, dict]:
			
 
				-        dir_info = get_dir_info(self.OUTPUT_DIR, max_depth=6)
			
 
				-        with open(self.OUTPUT_DIR / '.hashes.json', 'w') as f:
			
 
				-            json.dump(dir_info, f)
			
 
				-        return dir_info
			
 
				-    
			
 
				-    def announce_event(self, output_type: str, event: dict):
			
 
				-        event = {
			
 
				-            **event,
			
 
				-            'type': output_type,
			
 
				-        }
			
 
				-        
			
 
				-        # if event references a file, make sure it exists on disk
			
 
				-        if 'path' in event:
			
 
				-            file_path = Path(self.OUTPUT_DIR) / event['path']
			
 
				-            assert file_path.exists(), f'ArchiveResult[{self.ABID}].announce_event(): File does not exist: {file_path} ({event})'
			
 
				-            
			
 
				-        with open(self.OUTPUT_DIR / '.events.jsonl', 'a') as f:
			
 
				-            f.write(json.dumps(event, sort_keys=True, default=str) + '\n')
			
 
				-            
			
 
				-    def events(self, filter_type: str | None=None) -> list[dict]:
			
 
				-        events = []
			
 
				-        try:
			
 
				-            with open(self.OUTPUT_DIR / '.events.jsonl', 'r') as f:
			
 
				-                for line in f:
			
 
				-                    event = json.loads(line)
			
 
				-                    if filter_type is None or event['type'] == filter_type:
			
 
				-                        events.append(event)
			
 
				-        except FileNotFoundError:
			
 
				-            pass
			
 
				-        return events
			
 
				-        
			
 
				+
			
 
				     def write_indexes(self):
			
 
				-        """Write the ArchiveResult json, html, and merkle indexes to output dir, and pass searchable text to the search backend"""
			
 
				         super().write_indexes()
			
 
				-        self.save_search_index()
			
 
				-        # self.save_outlinks_to_crawl()
			
 
				-        
			
 
				-    # def save_outlinks_to_crawl(self):
			
 
				-    #     """Save the output of this ArchiveResult to the Crawl's urls field"""
			
 
				-    #     if self.output_urls:
			
 
				-    #     self.snapshot.crawl.urls += f'\n{self.url}'
			
 
				-    #     self.snapshot.crawl.save()
			
 
				-
			
 
				-    # def migrate_output_dir(self):
			
 
				-    #     """Move the output files to the new folder structure if needed"""
			
 
				-    #     print(f'{self}.migrate_output_dir()')
			
 
				-    #     self.migrate_from_0_7_2()
			
 
				-    #     self.migrate_from_0_8_6()
			
 
				-    #     # ... future migrations here
			
 
				-    
			
 
				-    # def migrate_from_0_7_2(self):
			
 
				-    #     """Migrate the folder structure from 0.7.2 to the current version"""
			
 
				-    #     # migrate any existing output_dir into data/archiveresults/<extractor>/YYYY-MM-DD/<domain>/<abid>
			
 
				-    #     # create self.output_dir if it doesn't exist
			
 
				-    #     # move loose files in snapshot_dir into self.output_dir
			
 
				-    #     # update self.pwd = self.output_dir
			
 
				-    #     print(f'{self}.migrate_from_0_7_2()')
			
 
				-    
			
 
				-    # def migrate_from_0_8_6(self):
			
 
				-    #     """Migrate the folder structure from 0.8.6 to the current version"""
			
 
				-    #     # ... future migration code here ...
			
 
				-    #     print(f'{self}.migrate_from_0_8_6()')
			
 
				-            
			
 
				-    # def save_json_index(self):
			
 
				-    #     """Save the json index file to ./.index.json"""
			
 
				-    #     print(f'{self}.save_json_index()')
			
 
				-    #     pass
			
 
				-    
			
 
				-    # def save_symlinks_index(self):
			
 
				-    #     """Update the symlink farm idnexes to point to the new location of self.output_dir"""
			
 
				-    #     # ln -s self.output_dir data/index/results_by_type/wget/YYYY-MM-DD/example.com/<abid>
			
 
				-    #     # ln -s self.output_dir data/index/results_by_day/YYYY-MM-DD/example.com/wget/<abid>
			
 
				-    #     # ln -s self.output_dir data/index/results_by_domain/example.com/YYYY-MM-DD/wget/<abid>
			
 
				-    #     # ln -s self.output_dir data/index/results_by_abid/<abid>
			
 
				-    #     # ln -s self.output_dir data/archive/<snapshot_timestamp>/<extractor>
			
 
				-    #     print(f'{self}.save_symlinks_index()')
			
 
				-    
			
 
				-    # def save_html_index(self):
			
 
				-    #     """Save the html index file to ./.index.html"""
			
 
				-    #     print(f'{self}.save_html_index()')
			
 
				-    #     pass
			
 
				-
			
 
				-    # def save_merkle_index(self):
			
 
				-    #     """Calculate the recursive sha256 of all the files in the output path and save it to ./.checksum.json"""
			
 
				-    #     print(f'{self}.save_merkle_index()')
			
 
				-    #     pass
			
 
				 
			
 
				     def save_search_index(self):
			
 
				-        """Pass any indexable text to the search backend indexer (e.g. sonic, SQLiteFTS5, etc.)"""
			
 
				-        print(f'{self}.save_search_index()')
			
 
				         pass
			
 
				-
			
 
				-
			
 
				-    # def get_storage_dir(self, create=True, symlink=True):
			
 
				-    #     date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')
			
 
				-    #     domain_str = domain(self.snapshot.url)
			
 
				-    #     abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
			
 
				-
			
 
				-    #     if create and not abs_storage_dir.is_dir():
			
 
				-    #         abs_storage_dir.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-    #     if symlink:
			
 
				-    #         LINK_PATHS = [
			
 
				-    #             Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
			
 
				-    #             # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
			
 
				-    #             # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
			
 
				-    #             Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
			
 
				-    #             Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
			
 
				-    #         ]
			
 
				-    #         for link_path in LINK_PATHS:
			
 
				-    #             link_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				-    #             try:
			
 
				-    #                 link_path.symlink_to(abs_storage_dir)
			
 
				-    #             except FileExistsError:
			
 
				-    #                 link_path.unlink()
			
 
				-    #                 link_path.symlink_to(abs_storage_dir)
			
 
				-
			
 
				-    #     return abs_storage_dir
			
 
				-
			
 
				-    # def symlink_index(self, create=True):
			
 
				-    #     abs_result_dir = self.get_storage_dir(create=create)
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-        
			
 
				-# @abx.hookimpl.on_archiveresult_created
			
 
				-# def exec_archiveresult_extractor_effects(archiveresult):
			
 
				-#     config = get_scope_config(...)
			
 
				-    
			
 
				-#     # abx.archivebox.writes.update_archiveresult_started(archiveresult, start_ts=timezone.now())
			
 
				-#     # abx.archivebox.events.on_archiveresult_updated(archiveresult)
			
 
				-    
			
 
				-#     # check if it should be skipped
			
 
				-#     if not abx.archivebox.reads.get_archiveresult_should_run(archiveresult, config):
			
 
				-#         abx.archivebox.writes.update_archiveresult_skipped(archiveresult, status='skipped')
			
 
				-#         abx.archivebox.events.on_archiveresult_skipped(archiveresult, config)
			
 
				-#         return
			
 
				-    
			
 
				-#     # run the extractor method and save the output back to the archiveresult
			
 
				-#     try:
			
 
				-#         output = abx.archivebox.effects.exec_archiveresult_extractor(archiveresult, config)
			
 
				-#         abx.archivebox.writes.update_archiveresult_succeeded(archiveresult, output=output, error=None, end_ts=timezone.now())
			
 
				-#     except Exception as e:
			
 
				-#         abx.archivebox.writes.update_archiveresult_failed(archiveresult, error=e, end_ts=timezone.now())
			
 
				-    
			
 
				-#     # bump the modified time on the archiveresult and Snapshot
			
 
				-#     abx.archivebox.events.on_archiveresult_updated(archiveresult)
			
 
				-#     abx.archivebox.events.on_snapshot_updated(archiveresult.snapshot)
			
 
				-    
			
 
				-
			
 
				-# @abx.hookimpl.reads.get_outlink_parents
			
 
				-# def get_outlink_parents(url, crawl_pk=None, config=None):
			
 
				-#     scope = Q(dst=url)
			
 
				-#     if crawl_pk:
			
 
				-#         scope = scope | Q(via__snapshot__crawl_id=crawl_pk)
			
 
				-    
			
 
				-#     parent = list(Outlink.objects.filter(scope))
			
 
				-#     if not parent:
			
 
				-#         # base case: we reached the top of the chain, no more parents left
			
 
				-#         return []
			
 
				-    
			
 
				-#     # recursive case: there is another parent above us, get its parents
			
 
				-#     yield parent[0]
			
 
				-#     yield from get_outlink_parents(parent[0].src, crawl_pk=crawl_pk, config=config)
			
 
				-
			
 
				-
			
--- a/archivebox/core/statemachines.py
+++ b/archivebox/core/statemachines.py
@@ -43,7 +43,7 @@ class SnapshotMachine(StateMachine, strict_states=True):
 
				         super().__init__(snapshot, *args, **kwargs)
			
 
				         
			
 
				     def __repr__(self) -> str:
			
 
				-        return f'[grey53]Snapshot\\[{self.snapshot.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				+        return f'[grey53]Snapshot\\[{self.snapshot.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.snapshot.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				     
			
 
				     def __str__(self) -> str:
			
 
				         return self.__repr__()
			
@@ -93,11 +93,6 @@ class SnapshotMachine(StateMachine, strict_states=True):
 
				             status=Snapshot.StatusChoices.STARTED,
			
 
				         )
			
 
				         
			
 
				-        # run_subcommand([
			
 
				-        #     'archivebox', 'snapshot', self.snapshot.ABID,
			
 
				-        #     '--start',
			
 
				-        # ])
			
 
				-        
			
 
				     @sealed.enter
			
 
				     def enter_sealed(self):
			
 
				         print(f'{self}.on_sealed() ↳ snapshot.retry_at=None')
			
@@ -160,7 +155,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				         super().__init__(archiveresult, *args, **kwargs)
			
 
				     
			
 
				     def __repr__(self) -> str:
			
 
				-        return f'[grey53]ArchiveResult\\[{self.archiveresult.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				+        return f'[grey53]ArchiveResult\\[{self.archiveresult.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.archiveresult.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				     
			
 
				     def __str__(self) -> str:
			
 
				         return self.__repr__()
			
@@ -207,11 +202,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
 
				             status=ArchiveResult.StatusChoices.QUEUED,
			
 
				             start_ts=timezone.now(),
			
 
				         )   # lock the obj for the next ~30s to limit racing with other workers
			
 
				-        
			
 
				-        # run_subcommand([
			
 
				-        #     'archivebox', 'extract', self.archiveresult.ABID,
			
 
				-        # ])
			
 
				-        
			
 
				+
			
 
				         # create the output directory and fork the new extractor job subprocess
			
 
				         self.archiveresult.create_output_dir()
			
 
				         # self.archiveresult.extract(background=True)
			
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -205,7 +205,7 @@ class SnapshotView(View):
 
				                     format_html(
			
 
				                         (
			
 
				                             '<center><br/><br/><br/>'
			
 
				-                            'No Snapshot directories match the given timestamp/ID/ABID: <code>{}</code><br/><br/>'
			
 
				+                            'No Snapshot directories match the given timestamp/ID: <code>{}</code><br/><br/>'
			
 
				                             'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
			
 
				                             '</center>'
			
 
				                         ),
			
@@ -230,7 +230,7 @@ class SnapshotView(View):
 
				                 return HttpResponse(
			
 
				                     format_html(
			
 
				                         (
			
 
				-                            'Multiple Snapshots match the given timestamp/ID/ABID <code>{}</code><br/><pre>'
			
 
				+                            'Multiple Snapshots match the given timestamp/ID <code>{}</code><br/><pre>'
			
 
				                         ),
			
 
				                         slug,
			
 
				                     ) + snapshot_hrefs + format_html(
			
@@ -282,34 +282,12 @@ class SnapshotView(View):
 
				                     status=404,
			
 
				                 )
			
 
				             
			
 
				-        # # slud is an ID
			
 
				-        # ulid = slug.split('_', 1)[-1]
			
 
				-        # try:
			
 
				-        #     try:
			
 
				-        #         snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid))
			
 
				-        #     except Snapshot.DoesNotExist:
			
 
				-        #         pass
			
 
				-
			
 
				-        #     try:
			
 
				-        #         snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug))
			
 
				-        #     except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned):
			
 
				-        #         pass
			
 
				-
			
 
				-        #     try:
			
 
				-        #         snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
			
 
				-        #     except Snapshot.DoesNotExist:
			
 
				-        #         pass
			
 
				-        #     return redirect(f'/archive/{snapshot.timestamp}/index.html')
			
 
				-        # except Snapshot.DoesNotExist:
			
 
				-        #     pass
			
 
				-
			
 
				         # slug is a URL
			
 
				         try:
			
 
				             try:
			
 
				-                # try exact match on full url / ABID first
			
 
				+                # try exact match on full url / ID first
			
 
				                 snapshot = Snapshot.objects.get(
			
 
				-                    Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
			
 
				-                    | Q(abid__icontains=path) | Q(id__icontains=path)
			
 
				+                    Q(url='http://' + path) | Q(url='https://' + path) | Q(id__icontains=path)
			
 
				                 )
			
 
				             except Snapshot.DoesNotExist:
			
 
				                 # fall back to match on exact base_url
			
@@ -345,7 +323,7 @@ class SnapshotView(View):
 
				                 format_html(
			
 
				                     '{} <code style="font-size: 0.8em">{}</code> <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
			
 
				                     snap.bookmarked_at.strftime('%Y-%m-%d %H:%M:%S'),
			
 
				-                    snap.abid,
			
 
				+                    str(snap.id)[:8],
			
 
				                     snap.timestamp,
			
 
				                     snap.timestamp,
			
 
				                     snap.url,
			
@@ -353,7 +331,7 @@ class SnapshotView(View):
 
				                 )
			
 
				                 for snap in Snapshot.objects.filter(
			
 
				                     Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
			
 
				-                    | Q(abid__icontains=path) | Q(id__icontains=path)
			
 
				+                    | Q(id__icontains=path)
			
 
				                 ).only('url', 'timestamp', 'title', 'bookmarked_at').order_by('-bookmarked_at')
			
 
				             )
			
 
				             return HttpResponse(
			
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@@ -5,18 +5,18 @@ from django.contrib import admin
 
				 
			
 
				 from archivebox import DATA_DIR
			
 
				 
			
 
				-from archivebox.base_models.admin import ABIDModelAdmin
			
 
				+from archivebox.base_models.admin import BaseModelAdmin
			
 
				 
			
 
				 from core.models import Snapshot
			
 
				 from crawls.models import Seed, Crawl, CrawlSchedule
			
 
				 
			
 
				 
			
 
				-class SeedAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str', 'crawls', 'num_crawls', 'num_snapshots')
			
 
				-    sort_fields = ('abid', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str')
			
 
				-    search_fields = ('abid', 'created_by__username', 'label', 'notes', 'uri', 'extractor', 'tags_str')
			
 
				-    
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info', 'scheduled_crawls', 'crawls', 'snapshots', 'contents')
			
 
				+class SeedAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str', 'crawls', 'num_crawls', 'num_snapshots')
			
 
				+    sort_fields = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str')
			
 
				+    search_fields = ('id', 'created_by__username', 'label', 'notes', 'uri', 'extractor', 'tags_str')
			
 
				+
			
 
				+    readonly_fields = ('created_at', 'modified_at', 'scheduled_crawls', 'crawls', 'snapshots', 'contents')
			
 
				     fields = ('label', 'notes', 'uri', 'extractor', 'tags_str', 'config', 'created_by', *readonly_fields)
			
 
				 
			
 
				     list_filter = ('extractor', 'created_by')
			
@@ -64,12 +64,12 @@ class SeedAdmin(ABIDModelAdmin):
 
				 
			
 
				 
			
 
				 
			
 
				-class CrawlAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at', 'num_snapshots')
			
 
				-    sort_fields = ('abid', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at')
			
 
				-    search_fields = ('abid', 'created_by__username', 'max_depth', 'label', 'notes', 'seed_id', 'seed__abid', 'schedule_id', 'schedule__abid', 'status', 'seed__uri')
			
 
				-    
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info', 'snapshots', 'seed_contents')
			
 
				+class CrawlAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at', 'num_snapshots')
			
 
				+    sort_fields = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at')
			
 
				+    search_fields = ('id', 'created_by__username', 'max_depth', 'label', 'notes', 'seed_id', 'schedule_id', 'status', 'seed__uri')
			
 
				+
			
 
				+    readonly_fields = ('created_at', 'modified_at', 'snapshots', 'seed_contents')
			
 
				     fields = ('label', 'notes', 'urls', 'status', 'retry_at', 'max_depth', 'seed', 'schedule', 'created_by', *readonly_fields)
			
 
				 
			
 
				     list_filter = ('max_depth', 'seed', 'schedule', 'created_by', 'status', 'retry_at')
			
@@ -116,12 +116,12 @@ class CrawlAdmin(ABIDModelAdmin):
 
				 
			
 
				 
			
 
				 
			
 
				-class CrawlScheduleAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_at', 'created_by', 'label', 'notes', 'template_str', 'crawls', 'num_crawls', 'num_snapshots')
			
 
				-    sort_fields = ('abid', 'created_at', 'created_by', 'label', 'notes', 'template_str')
			
 
				-    search_fields = ('abid', 'created_by__username', 'label', 'notes', 'schedule_id', 'schedule__abid', 'template_id', 'template__abid', 'template__seed__uri')
			
 
				-    
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info', 'crawls', 'snapshots')
			
 
				+class CrawlScheduleAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'created_by', 'label', 'notes', 'template_str', 'crawls', 'num_crawls', 'num_snapshots')
			
 
				+    sort_fields = ('id', 'created_at', 'created_by', 'label', 'notes', 'template_str')
			
 
				+    search_fields = ('id', 'created_by__username', 'label', 'notes', 'schedule_id', 'template_id', 'template__seed__uri')
			
 
				+
			
 
				+    readonly_fields = ('created_at', 'modified_at', 'crawls', 'snapshots')
			
 
				     fields = ('label', 'notes', 'schedule', 'template', 'created_by', *readonly_fields)
			
 
				 
			
 
				     list_filter = ('created_by',)
			
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -1,493 +1,173 @@
 
				 __package__ = 'archivebox.crawls'
			
 
				 
			
 
				 from typing import TYPE_CHECKING, Iterable
			
 
				+from uuid import uuid7
			
 
				 from pathlib import Path
			
 
				-from django_stubs_ext.db.models import TypedModelMeta
			
 
				 
			
 
				 from django.db import models
			
 
				 from django.db.models import QuerySet
			
 
				-from django.core.validators import MaxValueValidator, MinValueValidator 
			
 
				+from django.core.validators import MaxValueValidator, MinValueValidator
			
 
				 from django.conf import settings
			
 
				 from django.urls import reverse_lazy
			
 
				 from django.utils import timezone
			
 
				+from django_stubs_ext.db.models import TypedModelMeta
			
 
				 
			
 
				 from archivebox.config import CONSTANTS
			
 
				-from base_models.models import ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats, get_or_create_system_user_pk
			
 
				+from archivebox.base_models.models import ModelWithSerializers, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats, get_or_create_system_user_pk
			
 
				 from workers.models import ModelWithStateMachine
			
 
				-from tags.models import KVTag, GenericRelation
			
 
				 
			
 
				 if TYPE_CHECKING:
			
 
				     from core.models import Snapshot, ArchiveResult
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
 
				-class Seed(ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags, ABIDModel, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats):
			
 
				-    """
			
 
				-    A fountain that produces URLs (+metadata) each time it's queried e.g.
			
 
				-        - file:///data/sources/2024-01-02_11-57-51__cli_add.txt
			
 
				-        - file:///data/sources/2024-01-02_11-57-51__web_ui_add.txt
			
 
				-        - file:///Users/squash/Library/Application Support/Google/Chrome/Default/Bookmarks
			
 
				-        - https://getpocket.com/user/nikisweeting/feed
			
 
				-        - https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
			
 
				-        - ...
			
 
				-    Each query of a Seed can produce the same list of URLs, or a different list each time.
			
 
				-    The list of URLs it returns is used to create a new Crawl and seed it with new pending Snapshots.
			
 
				-        
			
 
				-    When a crawl is created, a root_snapshot is initially created with a URI set to the Seed URI.
			
 
				-    The seed's preferred extractor is executed on that URI, which produces an ArchiveResult containing outlinks.
			
 
				-    The outlinks then get turned into new pending Snapshots under the same crawl,
			
 
				-    and the cycle repeats until Crawl.max_depth.
			
 
				-
			
 
				-    Each consumption of a Seed by an Extractor can produce new urls, as Seeds can point to
			
 
				-    stateful remote services, files with contents that change, directories that have new files within, etc.
			
 
				-    """
			
 
				-    
			
 
				-    ### ModelWithReadOnlyFields:
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'created_by', 'uri')
			
 
				-    
			
 
				-    ### Immutable fields
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)                  # unique source location where URLs will be loaded from
			
 
				+class Seed(ModelWithSerializers, ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False)
			
 
				-    
			
 
				-    ### Mutable fields:
			
 
				-    extractor = models.CharField(default='auto', max_length=32, help_text='The parser / extractor to use to load URLs from this source (default: auto)')
			
 
				-    tags_str = models.CharField(max_length=255, null=False, blank=True, default='', help_text='An optional comma-separated list of tags to attach to any URLs that come from this source')
			
 
				-    label = models.CharField(max_length=255, null=False, blank=True, default='', help_text='A human-readable label for this seed')
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				 
			
 
				-    ### ModelWithConfig:
			
 
				-    config = models.JSONField(default=dict, help_text='An optional JSON object containing extra config to put in scope when loading URLs from this source')
			
 
				-
			
 
				-    ### ModelWithOutputDir:
			
 
				-    output_dir = models.CharField(max_length=255, null=False, blank=True, default='', help_text='The directory to store the output of this seed')
			
 
				-
			
 
				-    ### ModelWithNotes:
			
 
				-    notes = models.TextField(blank=True, null=False, default='', help_text='Any extra notes this seed should have')
			
 
				-
			
 
				-    ### ModelWithKVTags:
			
 
				-    tag_set = GenericRelation(
			
 
				-        KVTag,
			
 
				-        related_query_name="seed",
			
 
				-        content_type_field="obj_type",
			
 
				-        object_id_field="obj_id",
			
 
				-        order_by=('name',),
			
 
				-    )
			
 
				-    
			
 
				-    ### ABIDModel:
			
 
				-    abid_prefix = 'src_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.uri'
			
 
				-    abid_subtype_src = 'self.extractor'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-    
			
 
				-    ### ModelWithOutputDir:
			
 
				-    output_dir = models.FilePathField(path=settings.ARCHIVE_DIR, null=False, blank=True, default='', help_text='The directory to store the output of this crawl')
			
 
				-    output_dir_template = 'archive/seeds/{self.created_at.strftime("%Y%m%d")}/{self.abid}'
			
 
				-    output_dir_symlinks = [
			
 
				-        ('index.json',      'self.as_json()'),
			
 
				-        ('config.toml',     'benedict(self.config).as_toml()'),
			
 
				-        ('seed/',           'self.seed.output_dir.relative_to(self.output_dir)'),
			
 
				-        ('persona/',        'self.persona.output_dir.relative_to(self.output_dir)'),
			
 
				-        ('created_by/',     'self.created_by.output_dir.relative_to(self.output_dir)'),
			
 
				-        ('schedule/',       'self.schedule.output_dir.relative_to(self.output_dir)'),
			
 
				-        ('sessions/',       '[session.output_dir for session in self.session_set.all()]'),
			
 
				-        ('snapshots/',      '[snapshot.output_dir for snapshot in self.snapshot_set.all()]'),
			
 
				-        ('archiveresults/', '[archiveresult.output_dir for archiveresult in self.archiveresult_set.all()]'),
			
 
				-    ]
			
 
				-    
			
 
				-    ### Managers:
			
 
				+    uri = models.URLField(max_length=2048)
			
 
				+    extractor = models.CharField(default='auto', max_length=32)
			
 
				+    tags_str = models.CharField(max_length=255, null=False, blank=True, default='')
			
 
				+    label = models.CharField(max_length=255, null=False, blank=True, default='')
			
 
				+    config = models.JSONField(default=dict)
			
 
				+    output_dir = models.FilePathField(path=settings.ARCHIVE_DIR, null=False, blank=True, default='')
			
 
				+    notes = models.TextField(blank=True, null=False, default='')
			
 
				+
			
 
				     crawl_set: models.Manager['Crawl']
			
 
				 
			
 
				     class Meta:
			
 
				         verbose_name = 'Seed'
			
 
				         verbose_name_plural = 'Seeds'
			
 
				-        
			
 
				-        unique_together = (('created_by', 'uri', 'extractor'),('created_by', 'label'))
			
 
				+        unique_together = (('created_by', 'uri', 'extractor'), ('created_by', 'label'))
			
 
				 
			
 
				+    def __str__(self):
			
 
				+        return f'[{self.id}] {self.uri[:64]}'
			
 
				 
			
 
				     @classmethod
			
 
				-    def from_file(cls, source_file: Path, label: str='', parser: str='auto', tag: str='', created_by: int|None=None, config: dict|None=None):
			
 
				+    def from_file(cls, source_file: Path, label: str = '', parser: str = 'auto', tag: str = '', created_by=None, config=None):
			
 
				         source_path = str(source_file.resolve()).replace(str(CONSTANTS.DATA_DIR), '/data')
			
 
				-        
			
 
				         seed, _ = cls.objects.get_or_create(
			
 
				-            label=label or source_file.name,
			
 
				-            uri=f'file://{source_path}',
			
 
				+            label=label or source_file.name, uri=f'file://{source_path}',
			
 
				             created_by_id=getattr(created_by, 'pk', created_by) or get_or_create_system_user_pk(),
			
 
				-            extractor=parser,
			
 
				-            tags_str=tag,
			
 
				-            config=config or {},
			
 
				+            extractor=parser, tags_str=tag, config=config or {},
			
 
				         )
			
 
				-        seed.save()
			
 
				         return seed
			
 
				 
			
 
				     @property
			
 
				     def source_type(self):
			
 
				-        # e.g. http/https://
			
 
				-        #      file://
			
 
				-        #      pocketapi://
			
 
				-        #      s3://
			
 
				-        #      etc..
			
 
				         return self.uri.split('://', 1)[0].lower()
			
 
				 
			
 
				     @property
			
 
				     def api_url(self) -> str:
			
 
				-        # /api/v1/core/seed/{uulid}
			
 
				-        return reverse_lazy('api-1:get_seed', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
			
 
				-
			
 
				-    @property
			
 
				-    def api_docs_url(self) -> str:
			
 
				-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_seed'
			
 
				-
			
 
				-    @property
			
 
				-    def scheduled_crawl_set(self) -> QuerySet['CrawlSchedule']:
			
 
				-        from crawls.models import CrawlSchedule
			
 
				-        return CrawlSchedule.objects.filter(template__seed_id=self.pk)
			
 
				+        return reverse_lazy('api-1:get_seed', args=[self.id])
			
 
				 
			
 
				     @property
			
 
				     def snapshot_set(self) -> QuerySet['Snapshot']:
			
 
				         from core.models import Snapshot
			
 
				-        
			
 
				-        crawl_ids = self.crawl_set.values_list('pk', flat=True)
			
 
				-        return Snapshot.objects.filter(crawl_id__in=crawl_ids)
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-class CrawlSchedule(ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags, ABIDModel, ModelWithNotes, ModelWithHealthStats):
			
 
				-    """
			
 
				-    A record for a job that should run repeatedly on a given schedule.
			
 
				-    
			
 
				-    It pulls from a given Seed and creates a new Crawl for each scheduled run.
			
 
				-    The new Crawl will inherit all the properties of the crawl_template Crawl.
			
 
				-    """
			
 
				-    ### ABIDModel:
			
 
				-    abid_prefix = 'cws_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.template.seed.uri'
			
 
				-    abid_subtype_src = 'self.template.persona'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-    
			
 
				-    ### ModelWithReadOnlyFields:
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'created_by', 'template_id')
			
 
				-    
			
 
				-    ### Immutable fields:
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+        return Snapshot.objects.filter(crawl_id__in=self.crawl_set.values_list('pk', flat=True))
			
 
				+
			
 
				+
			
 
				+class CrawlSchedule(ModelWithSerializers, ModelWithNotes, ModelWithHealthStats):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False)
			
 
				-    template: 'Crawl' = models.ForeignKey('Crawl', on_delete=models.CASCADE, null=False, blank=False, help_text='The base crawl that each new scheduled job should copy as a template')  # type: ignore
			
 
				-    
			
 
				-    ### Mutable fields
			
 
				-    schedule = models.CharField(max_length=64, blank=False, null=False, help_text='The schedule to run this crawl on in CRON syntax e.g. 0 0 * * * (see https://crontab.guru/)')
			
 
				-    is_enabled = models.BooleanField(default=True)
			
 
				-    label = models.CharField(max_length=64, blank=True, null=False, default='', help_text='A human-readable label for this scheduled crawl')
			
 
				-    notes = models.TextField(blank=True, null=False, default='', help_text='Any extra notes this crawl should have')
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				-    
			
 
				-    ### ModelWithKVTags:
			
 
				-    tag_set = GenericRelation(
			
 
				-        KVTag,
			
 
				-        related_query_name="crawlschedule",
			
 
				-        content_type_field="obj_type",
			
 
				-        object_id_field="obj_id",
			
 
				-        order_by=('name',),
			
 
				-    )
			
 
				-    
			
 
				-    ### Managers:
			
 
				+
			
 
				+    template: 'Crawl' = models.ForeignKey('Crawl', on_delete=models.CASCADE, null=False, blank=False)  # type: ignore
			
 
				+    schedule = models.CharField(max_length=64, blank=False, null=False)
			
 
				+    is_enabled = models.BooleanField(default=True)
			
 
				+    label = models.CharField(max_length=64, blank=True, null=False, default='')
			
 
				+    notes = models.TextField(blank=True, null=False, default='')
			
 
				+
			
 
				     crawl_set: models.Manager['Crawl']
			
 
				-    
			
 
				+
			
 
				     class Meta(TypedModelMeta):
			
 
				         verbose_name = 'Scheduled Crawl'
			
 
				         verbose_name_plural = 'Scheduled Crawls'
			
 
				-        
			
 
				+
			
 
				     def __str__(self) -> str:
			
 
				-        uri = (self.template and self.template.seed and self.template.seed.uri) or '<no url set>'
			
 
				-        crawl_label = self.label or (self.template and self.template.seed and self.template.seed.label) or 'Untitled Crawl'
			
 
				-        if self.id and self.template:
			
 
				-            return f'[{self.ABID}] {uri[:64]} @ {self.schedule} (Scheduled {crawl_label})'
			
 
				-        return f'[{self.abid_prefix}****not*saved*yet****] {uri[:64]} @ {self.schedule} (Scheduled {crawl_label})'
			
 
				-    
			
 
				+        return f'[{self.id}] {self.template.seed.uri[:64] if self.template and self.template.seed else ""} @ {self.schedule}'
			
 
				+
			
 
				     @property
			
 
				     def api_url(self) -> str:
			
 
				-        # /api/v1/core/crawlschedule/{uulid}
			
 
				-        return reverse_lazy('api-1:get_any', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
			
 
				+        return reverse_lazy('api-1:get_any', args=[self.id])
			
 
				 
			
 
				-    @property
			
 
				-    def api_docs_url(self) -> str:
			
 
				-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_any'
			
 
				-    
			
 
				     def save(self, *args, **kwargs):
			
 
				-        self.label = self.label or self.template.seed.label or self.template.seed.uri
			
 
				+        self.label = self.label or (self.template.seed.label if self.template and self.template.seed else '')
			
 
				         super().save(*args, **kwargs)
			
 
				-        
			
 
				-        # make sure the template crawl points to this schedule as its schedule
			
 
				-        self.template.schedule = self
			
 
				-        self.template.save()
			
 
				-        
			
 
				-    @property
			
 
				-    def snapshot_set(self) -> QuerySet['Snapshot']:
			
 
				-        from core.models import Snapshot
			
 
				-        
			
 
				-        crawl_ids = self.crawl_set.values_list('pk', flat=True)
			
 
				-        return Snapshot.objects.filter(crawl_id__in=crawl_ids)
			
 
				-    
			
 
				-
			
 
				-class CrawlManager(models.Manager):
			
 
				-    pass
			
 
				-
			
 
				-class CrawlQuerySet(models.QuerySet):
			
 
				-    """
			
 
				-    Enhanced QuerySet for Crawl that adds some useful methods.
			
 
				-    
			
 
				-    To get all the snapshots for a given set of Crawls:
			
 
				-        Crawl.objects.filter(seed__uri='https://example.com/some/rss.xml').snapshots() -> QuerySet[Snapshot]
			
 
				-    
			
 
				-    To get all the archiveresults for a given set of Crawls:
			
 
				-        Crawl.objects.filter(seed__uri='https://example.com/some/rss.xml').archiveresults() -> QuerySet[ArchiveResult]
			
 
				-    
			
 
				-    To export the list of Crawls as a CSV or JSON:
			
 
				-        Crawl.objects.filter(seed__uri='https://example.com/some/rss.xml').export_as_csv() -> str
			
 
				-        Crawl.objects.filter(seed__uri='https://example.com/some/rss.xml').export_as_json() -> str
			
 
				-    """
			
 
				-    def snapshots(self, **filter_kwargs) -> QuerySet['Snapshot']:
			
 
				-        return Snapshot.objects.filter(crawl_id__in=self.values_list('pk', flat=True), **filter_kwargs)
			
 
				-    
			
 
				-    def archiveresults(self) -> QuerySet['ArchiveResult']:
			
 
				-        return ArchiveResult.objects.filter(snapshot__crawl_id__in=self.values_list('pk', flat=True))
			
 
				-    
			
 
				-    def as_csv_str(self, keys: Iterable[str]=()) -> str:
			
 
				-        return '\n'.join(
			
 
				-            row.as_csv(keys=keys)
			
 
				-            for row in self.all()
			
 
				-        )
			
 
				-    
			
 
				-    def as_jsonl_str(self, keys: Iterable[str]=()) -> str:
			
 
				-        return '\n'.join([
			
 
				-            row.as_jsonl_row(keys=keys)
			
 
				-            for row in self.all()
			
 
				-        ])
			
 
				-
			
 
				-
			
 
				-
			
 
				-class Crawl(ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags, ABIDModel, ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWithStateMachine):
			
 
				-    """
			
 
				-    A single session of URLs to archive starting from a given Seed and expanding outwards. An "archiving session" so to speak.
			
 
				-
			
 
				-    A new Crawl should be created for each loading from a Seed (because it can produce a different set of URLs every time its loaded).
			
 
				-    E.g. every scheduled import from an RSS feed should create a new Crawl, and more loadings from the same seed each create a new Crawl
			
 
				-    
			
 
				-    Every "Add" task triggered from the Web UI, CLI, or Scheduled Crawl should create a new Crawl with the seed set to a 
			
 
				-    file URI e.g. file:///sources/<date>_{ui,cli}_add.txt containing the user's input.
			
 
				-    """
			
 
				-    
			
 
				-    ### ModelWithReadOnlyFields:
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'created_by', 'seed')
			
 
				-    
			
 
				-    ### Immutable fields:
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+        if self.template:
			
 
				+            self.template.schedule = self
			
 
				+            self.template.save()
			
 
				+
			
 
				+
			
 
				+class Crawl(ModelWithSerializers, ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWithStateMachine):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False)
			
 
				+    modified_at = models.DateTimeField(auto_now=True)
			
 
				+
			
 
				     seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False)
			
 
				-    
			
 
				-    ### Mutable fields:
			
 
				-    urls = models.TextField(blank=True, null=False, default='', help_text='The log of URLs discovered in this crawl, one per line, should be 1:1 with snapshot_set')
			
 
				+    urls = models.TextField(blank=True, null=False, default='')
			
 
				     config = models.JSONField(default=dict)
			
 
				     max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
			
 
				     tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
			
 
				-    persona_id = models.UUIDField(null=True, blank=True)  # TODO: replace with self.persona = models.ForeignKey(Persona, on_delete=models.SET_NULL, null=True, blank=True, editable=True)
			
 
				-    label = models.CharField(max_length=64, blank=True, null=False, default='', help_text='A human-readable label for this crawl')
			
 
				-    notes = models.TextField(blank=True, null=False, default='', help_text='Any extra notes this crawl should have')
			
 
				+    persona_id = models.UUIDField(null=True, blank=True)
			
 
				+    label = models.CharField(max_length=64, blank=True, null=False, default='')
			
 
				+    notes = models.TextField(blank=True, null=False, default='')
			
 
				     schedule = models.ForeignKey(CrawlSchedule, on_delete=models.SET_NULL, null=True, blank=True, editable=True)
			
 
				-    modified_at = models.DateTimeField(auto_now=True)
			
 
				-    
			
 
				-    ### ModelWithKVTags:
			
 
				-    tag_set = GenericRelation(
			
 
				-        KVTag,
			
 
				-        related_query_name="crawl",
			
 
				-        content_type_field="obj_type",
			
 
				-        object_id_field="obj_id",
			
 
				-        order_by=('name',),
			
 
				-    )
			
 
				-    
			
 
				-    ### ModelWithStateMachine:
			
 
				+    output_dir = models.FilePathField(path=settings.ARCHIVE_DIR, null=False, blank=True, default='')
			
 
				+
			
 
				+    status = ModelWithStateMachine.StatusField(choices=ModelWithStateMachine.StatusChoices, default=ModelWithStateMachine.StatusChoices.QUEUED)
			
 
				+    retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
			
 
				+
			
 
				     state_machine_name = 'crawls.statemachines.CrawlMachine'
			
 
				     retry_at_field_name = 'retry_at'
			
 
				     state_field_name = 'status'
			
 
				     StatusChoices = ModelWithStateMachine.StatusChoices
			
 
				     active_state = StatusChoices.STARTED
			
 
				-    
			
 
				-    status = ModelWithStateMachine.StatusField(choices=StatusChoices, default=StatusChoices.QUEUED)
			
 
				-    retry_at = ModelWithStateMachine.RetryAtField(default=timezone.now)
			
 
				 
			
 
				-    ### ABIDModel:
			
 
				-    abid_prefix = 'cwl_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.seed.uri'
			
 
				-    abid_subtype_src = 'self.persona'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = True
			
 
				-    
			
 
				-    ### ModelWithOutputDir:
			
 
				-    output_dir = models.FilePathField(path=settings.ARCHIVE_DIR, null=False, blank=True, default='', help_text='The directory to store the output of this crawl')
			
 
				-    output_dir_template = 'archive/crawls/{getattr(crawl, crawl.abid_ts_src).strftime("%Y%m%d")}/{crawl.abid}'
			
 
				-    output_dir_symlinks = [
			
 
				-        ('index.json', 'self.as_json'),
			
 
				-        ('seed/', 'self.seed.output_dir'),
			
 
				-        ('persona/', 'self.persona.output_dir'),
			
 
				-        ('created_by/', 'self.created_by.output_dir'),
			
 
				-        ('schedule/', 'self.schedule.output_dir'),
			
 
				-        ('sessions/', '[session.output_dir for session in self.session_set.all()]'),
			
 
				-        ('snapshots/', '[snapshot.output_dir for snapshot in self.snapshot_set.all()]'),
			
 
				-        ('archiveresults/', '[archiveresult.output_dir for archiveresult in self.archiveresult_set.all()]'),
			
 
				-    ]
			
 
				-    
			
 
				-    ### Managers:    
			
 
				     snapshot_set: models.Manager['Snapshot']
			
 
				-    
			
 
				-    # @property
			
 
				-    # def persona(self) -> Persona:
			
 
				-    #     # TODO: replace with self.persona = models.ForeignKey(Persona, on_delete=models.SET_NULL, null=True, blank=True, editable=True)
			
 
				-    #     return self.persona_id
			
 
				-    
			
 
				 
			
 
				     class Meta(TypedModelMeta):
			
 
				         verbose_name = 'Crawl'
			
 
				         verbose_name_plural = 'Crawls'
			
 
				-        
			
 
				+
			
 
				     def __str__(self):
			
 
				-        url = (self.seed and self.seed.uri) or '<no url set>'
			
 
				-        parser = (self.seed and self.seed.extractor) or 'auto'
			
 
				-        created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
			
 
				-        if self.id and self.seed:
			
 
				-            return f'[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
			
 
				-        return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
			
 
				-        
			
 
				+        return f'[{self.id}] {self.seed.uri[:64] if self.seed else ""}'
			
 
				+
			
 
				     @classmethod
			
 
				-    def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
			
 
				+    def from_seed(cls, seed: Seed, max_depth: int = 0, persona: str = 'Default', tags_str: str = '', config=None, created_by=None):
			
 
				         crawl, _ = cls.objects.get_or_create(
			
 
				-            seed=seed,
			
 
				-            max_depth=max_depth,
			
 
				-            tags_str=tags_str or seed.tags_str,
			
 
				-            persona=persona or seed.config.get('DEFAULT_PERSONA') or 'Default',
			
 
				+            seed=seed, max_depth=max_depth, tags_str=tags_str or seed.tags_str,
			
 
				             config=seed.config or config or {},
			
 
				             created_by_id=getattr(created_by, 'pk', created_by) or seed.created_by_id,
			
 
				         )
			
 
				-        crawl.save()
			
 
				         return crawl
			
 
				-        
			
 
				-    @property
			
 
				-    def template(self):
			
 
				-        """If this crawl was created under a ScheduledCrawl, returns the original template Crawl it was based off"""
			
 
				-        if not self.schedule:
			
 
				-            return None
			
 
				-        return self.schedule.template
			
 
				 
			
 
				     @property
			
 
				     def api_url(self) -> str:
			
 
				-        # /api/v1/core/crawl/{uulid}
			
 
				-        # TODO: implement get_crawl
			
 
				-        return reverse_lazy('api-1:get_crawl', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
			
 
				+        return reverse_lazy('api-1:get_crawl', args=[self.id])
			
 
				 
			
 
				-    @property
			
 
				-    def api_docs_url(self) -> str:
			
 
				-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
			
 
				-    
			
 
				-    def pending_snapshots(self) -> QuerySet['Snapshot']:
			
 
				-        return self.snapshot_set.filter(retry_at__isnull=False)
			
 
				-    
			
 
				-    def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
			
 
				-        from core.models import ArchiveResult
			
 
				-        
			
 
				-        snapshot_ids = self.snapshot_set.values_list('id', flat=True)
			
 
				-        pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=False)
			
 
				-        return pending_archiveresults
			
 
				-    
			
 
				     def create_root_snapshot(self) -> 'Snapshot':
			
 
				-        print(f'Crawl[{self.ABID}].create_root_snapshot()')
			
 
				         from core.models import Snapshot
			
 
				-        
			
 
				         try:
			
 
				             return Snapshot.objects.get(crawl=self, url=self.seed.uri)
			
 
				         except Snapshot.DoesNotExist:
			
 
				             pass
			
 
				-
			
 
				         root_snapshot, _ = Snapshot.objects.update_or_create(
			
 
				-            crawl=self,
			
 
				-            url=self.seed.uri,
			
 
				-            defaults={
			
 
				-                'status': Snapshot.INITIAL_STATE,
			
 
				-                'retry_at': timezone.now(),
			
 
				-                'timestamp': str(timezone.now().timestamp()),
			
 
				-                # 'config': self.seed.config,
			
 
				-            },
			
 
				+            crawl=self, url=self.seed.uri,
			
 
				+            defaults={'status': Snapshot.INITIAL_STATE, 'retry_at': timezone.now(), 'timestamp': str(timezone.now().timestamp())},
			
 
				         )
			
 
				-        root_snapshot.save()
			
 
				         return root_snapshot
			
 
				 
			
 
				 
			
 
				-class Outlink(ModelWithReadOnlyFields, ModelWithSerializers, ModelWithUUID, ModelWithKVTags):
			
 
				-    """A record of a link found on a page, pointing to another page."""
			
 
				-    read_only_fields = ('id', 'src', 'dst', 'crawl', 'via')
			
 
				-    
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    
			
 
				-    src = models.URLField()   # parent page where the outlink/href was found       e.g. https://example.com/downloads
			
 
				-    dst = models.URLField()   # remote location the child outlink/href points to   e.g. https://example.com/downloads/some_file.pdf
			
 
				-    
			
 
				+class Outlink(ModelWithSerializers):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    src = models.URLField()
			
 
				+    dst = models.URLField()
			
 
				     crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, null=False, blank=False, related_name='outlink_set')
			
 
				     via = models.ForeignKey('core.ArchiveResult', on_delete=models.SET_NULL, null=True, blank=True, related_name='outlink_set')
			
 
				 
			
 
				     class Meta:
			
 
				         unique_together = (('src', 'dst', 'via'),)
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-        
			
 
				-# @abx.hookimpl.on_archiveresult_created
			
 
				-# def exec_archiveresult_extractor_effects(archiveresult):
			
 
				-#     config = get_scope_config(...)
			
 
				-    
			
 
				-#     # abx.archivebox.writes.update_archiveresult_started(archiveresult, start_ts=timezone.now())
			
 
				-#     # abx.archivebox.events.on_archiveresult_updated(archiveresult)
			
 
				-    
			
 
				-#     # check if it should be skipped
			
 
				-#     if not abx.archivebox.reads.get_archiveresult_should_run(archiveresult, config):
			
 
				-#         abx.archivebox.writes.update_archiveresult_skipped(archiveresult, status='skipped')
			
 
				-#         abx.archivebox.events.on_archiveresult_skipped(archiveresult, config)
			
 
				-#         return
			
 
				-    
			
 
				-#     # run the extractor method and save the output back to the archiveresult
			
 
				-#     try:
			
 
				-#         output = abx.archivebox.effects.exec_archiveresult_extractor(archiveresult, config)
			
 
				-#         abx.archivebox.writes.update_archiveresult_succeeded(archiveresult, output=output, error=None, end_ts=timezone.now())
			
 
				-#     except Exception as e:
			
 
				-#         abx.archivebox.writes.update_archiveresult_failed(archiveresult, error=e, end_ts=timezone.now())
			
 
				-    
			
 
				-#     # bump the modified time on the archiveresult and Snapshot
			
 
				-#     abx.archivebox.events.on_archiveresult_updated(archiveresult)
			
 
				-#     abx.archivebox.events.on_snapshot_updated(archiveresult.snapshot)
			
 
				-    
			
 
				-
			
 
				-# @abx.hookimpl.reads.get_outlink_parents
			
 
				-# def get_outlink_parents(url, crawl_pk=None, config=None):
			
 
				-#     scope = Q(dst=url)
			
 
				-#     if crawl_pk:
			
 
				-#         scope = scope | Q(via__snapshot__crawl_id=crawl_pk)
			
 
				-    
			
 
				-#     parent = list(Outlink.objects.filter(scope))
			
 
				-#     if not parent:
			
 
				-#         # base case: we reached the top of the chain, no more parents left
			
 
				-#         return []
			
 
				-    
			
 
				-#     # recursive case: there is another parent above us, get its parents
			
 
				-#     yield parent[0]
			
 
				-#     yield from get_outlink_parents(parent[0].src, crawl_pk=crawl_pk, config=config)
			
 
				-
			
 
				-
			
--- a/archivebox/crawls/statemachines.py
+++ b/archivebox/crawls/statemachines.py
@@ -36,7 +36,7 @@ class CrawlMachine(StateMachine, strict_states=True):
 
				         super().__init__(crawl, *args, **kwargs)
			
 
				     
			
 
				     def __repr__(self) -> str:
			
 
				-        return f'[grey53]Crawl\\[{self.crawl.ABID}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.crawl.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				+        return f'[grey53]Crawl\\[{self.crawl.id}] 🏃‍♂️ Worker\\[pid={os.getpid()}].tick()[/grey53] [blue]{self.crawl.status.upper()}[/blue] ⚙️ [grey37]Machine[/grey37]'
			
 
				     
			
 
				     def __str__(self) -> str:
			
 
				         return self.__repr__()
			
--- a/archivebox/machine/admin.py
+++ b/archivebox/machine/admin.py
@@ -5,18 +5,15 @@ import abx
 
				 from django.contrib import admin
			
 
				 from django.utils.html import format_html
			
 
				 
			
 
				-from archivebox.base_models.admin import ABIDModelAdmin
			
 
				-
			
 
				+from archivebox.base_models.admin import BaseModelAdmin
			
 
				 from machine.models import Machine, NetworkInterface, InstalledBinary
			
 
				 
			
 
				 
			
 
				+class MachineAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health')
			
 
				+    sort_fields = ('id', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
			
 
				 
			
 
				-class MachineAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health')
			
 
				-    sort_fields = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
			
 
				-    # search_fields = ('id', 'abid', 'guid', 'hostname', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release')
			
 
				-    
			
 
				-    readonly_fields = ('guid', 'created_at', 'modified_at', 'abid_info', 'ips')
			
 
				+    readonly_fields = ('guid', 'created_at', 'modified_at', 'ips')
			
 
				     fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'num_uses_succeeded', 'num_uses_failed')
			
 
				 
			
 
				     list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
			
@@ -24,23 +21,20 @@ class MachineAdmin(ABIDModelAdmin):
 
				     list_per_page = 100
			
 
				     actions = ["delete_selected"]
			
 
				 
			
 
				-    @admin.display(
			
 
				-        description='Public IP',
			
 
				-        ordering='networkinterface__ip_public',
			
 
				-    )
			
 
				+    @admin.display(description='Public IP', ordering='networkinterface__ip_public')
			
 
				     def ips(self, machine):
			
 
				         return format_html(
			
 
				             '<a href="/admin/machine/networkinterface/?q={}"><b><code>{}</code></b></a>',
			
 
				-            machine.abid,
			
 
				-            ', '.join(machine.networkinterface_set.values_list('ip_public', flat=True)),
			
 
				+            machine.id, ', '.join(machine.networkinterface_set.values_list('ip_public', flat=True)),
			
 
				         )
			
 
				 
			
 
				-class NetworkInterfaceAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health')
			
 
				-    sort_fields = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
			
 
				-    search_fields = ('abid', 'machine__abid', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
			
 
				-    
			
 
				-    readonly_fields = ('machine', 'created_at', 'modified_at', 'abid_info', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
			
 
				+
			
 
				+class NetworkInterfaceAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health')
			
 
				+    sort_fields = ('id', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
			
 
				+    search_fields = ('id', 'machine__id', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
			
 
				+
			
 
				+    readonly_fields = ('machine', 'created_at', 'modified_at', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
			
 
				     fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country', 'num_uses_succeeded', 'num_uses_failed')
			
 
				 
			
 
				     list_filter = ('isp', 'country', 'region')
			
@@ -48,24 +42,20 @@ class NetworkInterfaceAdmin(ABIDModelAdmin):
 
				     list_per_page = 100
			
 
				     actions = ["delete_selected"]
			
 
				 
			
 
				-    @admin.display(
			
 
				-        description='Machine',
			
 
				-        ordering='machine__abid',
			
 
				-    )
			
 
				+    @admin.display(description='Machine', ordering='machine__id')
			
 
				     def machine_info(self, iface):
			
 
				         return format_html(
			
 
				             '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
			
 
				-            iface.machine.id,
			
 
				-            iface.machine.abid,
			
 
				-            iface.machine.hostname,
			
 
				+            iface.machine.id, str(iface.machine.id)[:8], iface.machine.hostname,
			
 
				         )
			
 
				 
			
 
				-class InstalledBinaryAdmin(ABIDModelAdmin):
			
 
				-    list_display = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
			
 
				-    sort_fields = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
			
 
				-    search_fields = ('abid', 'machine__abid', 'name', 'binprovider', 'version', 'abspath', 'sha256')
			
 
				-    
			
 
				-    readonly_fields = ('created_at', 'modified_at', 'abid_info')
			
 
				+
			
 
				+class InstalledBinaryAdmin(BaseModelAdmin):
			
 
				+    list_display = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
			
 
				+    sort_fields = ('id', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
			
 
				+    search_fields = ('id', 'machine__id', 'name', 'binprovider', 'version', 'abspath', 'sha256')
			
 
				+
			
 
				+    readonly_fields = ('created_at', 'modified_at')
			
 
				     fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
			
 
				 
			
 
				     list_filter = ('name', 'binprovider', 'machine_id')
			
@@ -73,20 +63,14 @@ class InstalledBinaryAdmin(ABIDModelAdmin):
 
				     list_per_page = 100
			
 
				     actions = ["delete_selected"]
			
 
				 
			
 
				-    @admin.display(
			
 
				-        description='Machine',
			
 
				-        ordering='machine__abid',
			
 
				-    )
			
 
				+    @admin.display(description='Machine', ordering='machine__id')
			
 
				     def machine_info(self, installed_binary):
			
 
				         return format_html(
			
 
				             '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
			
 
				-            installed_binary.machine.id,
			
 
				-            installed_binary.machine.abid,
			
 
				-            installed_binary.machine.hostname,
			
 
				+            installed_binary.machine.id, str(installed_binary.machine.id)[:8], installed_binary.machine.hostname,
			
 
				         )
			
 
				 
			
 
				 
			
 
				-
			
 
				 @abx.hookimpl
			
 
				 def register_admin(admin_site):
			
 
				     admin_site.register(Machine, MachineAdmin)
			
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -6,7 +6,7 @@ import signal
 
				 import socket
			
 
				 import subprocess
			
 
				 import multiprocessing
			
 
				-
			
 
				+from uuid import uuid7
			
 
				 from datetime import timedelta
			
 
				 from pathlib import Path
			
 
				 
			
@@ -16,21 +16,17 @@ from django.utils.functional import cached_property
 
				 
			
 
				 import abx
			
 
				 import archivebox
			
 
				-
			
 
				 from abx_pkg import Binary, BinProvider
			
 
				-from archivebox.base_models.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
			
 
				-
			
 
				+from archivebox.base_models.models import ModelWithHealthStats
			
 
				 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
			
 
				 
			
 
				-_CURRENT_MACHINE = None                              # global cache for the current machine
			
 
				-_CURRENT_INTERFACE = None                            # global cache for the current network interface
			
 
				-_CURRENT_BINARIES = {}                               # global cache for the currently installed binaries
			
 
				-
			
 
				-
			
 
				-MACHINE_RECHECK_INTERVAL = 7 * 24 * 60 * 60         # 1 week (how often should we check for OS/hardware changes?)
			
 
				-NETWORK_INTERFACE_RECHECK_INTERVAL = 1 * 60 * 60    # 1 hour (how often should we check for public IP/private IP/DNS changes?)
			
 
				-INSTALLED_BINARY_RECHECK_INTERVAL = 1 * 30 * 60     # 30min  (how often should we check for changes to locally installed binaries?)
			
 
				+_CURRENT_MACHINE = None
			
 
				+_CURRENT_INTERFACE = None
			
 
				+_CURRENT_BINARIES = {}
			
 
				 
			
 
				+MACHINE_RECHECK_INTERVAL = 7 * 24 * 60 * 60
			
 
				+NETWORK_INTERFACE_RECHECK_INTERVAL = 1 * 60 * 60
			
 
				+INSTALLED_BINARY_RECHECK_INTERVAL = 1 * 30 * 60
			
 
				 
			
 
				 
			
 
				 class MachineManager(models.Manager):
			
@@ -38,393 +34,177 @@ class MachineManager(models.Manager):
 
				         return Machine.current()
			
 
				 
			
 
				 
			
 
				-class Machine(ABIDModel, ModelWithHealthStats):
			
 
				-    """Audit log entry for a physical machine that was used to do archiving."""
			
 
				-    
			
 
				-    abid_prefix = 'mcn_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.guid'
			
 
				-    abid_subtype_src = '"01"'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = False
			
 
				-    
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'guid', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family')
			
 
				-
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+class Machine(models.Model, ModelWithHealthStats):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				+    guid = models.CharField(max_length=64, default=None, null=False, unique=True, editable=False)
			
 
				+    hostname = models.CharField(max_length=63, default=None, null=False)
			
 
				+    hw_in_docker = models.BooleanField(default=False, null=False)
			
 
				+    hw_in_vm = models.BooleanField(default=False, null=False)
			
 
				+    hw_manufacturer = models.CharField(max_length=63, default=None, null=False)
			
 
				+    hw_product = models.CharField(max_length=63, default=None, null=False)
			
 
				+    hw_uuid = models.CharField(max_length=255, default=None, null=False)
			
 
				+    os_arch = models.CharField(max_length=15, default=None, null=False)
			
 
				+    os_family = models.CharField(max_length=15, default=None, null=False)
			
 
				+    os_platform = models.CharField(max_length=63, default=None, null=False)
			
 
				+    os_release = models.CharField(max_length=63, default=None, null=False)
			
 
				+    os_kernel = models.CharField(max_length=255, default=None, null=False)
			
 
				+    stats = models.JSONField(default=dict, null=False)
			
 
				+    num_uses_failed = models.PositiveIntegerField(default=0)
			
 
				+    num_uses_succeeded = models.PositiveIntegerField(default=0)
			
 
				 
			
 
				-    # IMMUTABLE PROPERTIES
			
 
				-    guid = models.CharField(max_length=64, default=None, null=False, unique=True, editable=False)  # 64char sha256 hash of machine's unique hardware ID
			
 
				-    
			
 
				-    # MUTABLE PROPERTIES
			
 
				-    hostname = models.CharField(max_length=63, default=None, null=False)        # e.g. somehost.subdomain.example.com
			
 
				-    hw_in_docker = models.BooleanField(default=False, null=False)               # e.g. False
			
 
				-    hw_in_vm = models.BooleanField(default=False, null=False)                   # e.g. False
			
 
				-    hw_manufacturer = models.CharField(max_length=63, default=None, null=False) # e.g. Apple
			
 
				-    hw_product = models.CharField(max_length=63, default=None, null=False)      # e.g. Mac Studio Mac13,1
			
 
				-    hw_uuid = models.CharField(max_length=255, default=None, null=False)        # e.g. 39A12B50-...-...-...-...
			
 
				-    
			
 
				-    os_arch = models.CharField(max_length=15, default=None, null=False)         # e.g. arm64
			
 
				-    os_family = models.CharField(max_length=15, default=None, null=False)       # e.g. darwin
			
 
				-    os_platform = models.CharField(max_length=63, default=None, null=False)     # e.g. macOS-14.6.1-arm64-arm-64bit
			
 
				-    os_release = models.CharField(max_length=63, default=None, null=False)      # e.g. macOS 14.6.1
			
 
				-    os_kernel = models.CharField(max_length=255, default=None, null=False)      # e.g. Darwin Kernel Version 23.6.0: Mon Jul 29 21:14:30 PDT 2024; root:xnu-10063.141.2~1/RELEASE_ARM64_T6000
			
 
				-    
			
 
				-    # STATS COUNTERS
			
 
				-    stats = models.JSONField(default=dict, null=False)                    # e.g. {"cpu_load": [1.25, 2.4, 1.4], "mem_swap_used_pct": 56, ...}
			
 
				-    
			
 
				-    # num_uses_failed = models.PositiveIntegerField(default=0)                  # from ModelWithHealthStats
			
 
				-    # num_uses_succeeded = models.PositiveIntegerField(default=0)
			
 
				-    
			
 
				     objects: MachineManager = MachineManager()
			
 
				-    
			
 
				     networkinterface_set: models.Manager['NetworkInterface']
			
 
				 
			
 
				     @classmethod
			
 
				     def current(cls) -> 'Machine':
			
 
				-        """Get the current machine that ArchiveBox is running on."""
			
 
				-        
			
 
				         global _CURRENT_MACHINE
			
 
				         if _CURRENT_MACHINE:
			
 
				-            expires_at = _CURRENT_MACHINE.modified_at + timedelta(seconds=MACHINE_RECHECK_INTERVAL)
			
 
				-            if timezone.now() < expires_at:
			
 
				-                # assume current machine cant change *while archivebox is actively running on it*
			
 
				-                # it's not strictly impossible to swap hardware while code is running,
			
 
				-                # but its rare and unusual so we check only once per week
			
 
				-                # (e.g. VMWare can live-migrate a VM to a new host while it's running)
			
 
				+            if timezone.now() < _CURRENT_MACHINE.modified_at + timedelta(seconds=MACHINE_RECHECK_INTERVAL):
			
 
				                 return _CURRENT_MACHINE
			
 
				-            else:
			
 
				-                _CURRENT_MACHINE = None
			
 
				-        
			
 
				-        _CURRENT_MACHINE, _created = cls.objects.update_or_create(
			
 
				+            _CURRENT_MACHINE = None
			
 
				+        _CURRENT_MACHINE, _ = cls.objects.update_or_create(
			
 
				             guid=get_host_guid(),
			
 
				-            defaults={
			
 
				-                'hostname': socket.gethostname(),
			
 
				-                **get_os_info(),
			
 
				-                **get_vm_info(),
			
 
				-                'stats': get_host_stats(),
			
 
				-            },
			
 
				-        )        
			
 
				-        _CURRENT_MACHINE.save()  # populate ABID
			
 
				-        
			
 
				+            defaults={'hostname': socket.gethostname(), **get_os_info(), **get_vm_info(), 'stats': get_host_stats()},
			
 
				+        )
			
 
				         return _CURRENT_MACHINE
			
 
				 
			
 
				 
			
 
				-
			
 
				 class NetworkInterfaceManager(models.Manager):
			
 
				     def current(self) -> 'NetworkInterface':
			
 
				         return NetworkInterface.current()
			
 
				 
			
 
				 
			
 
				-class NetworkInterface(ABIDModel, ModelWithHealthStats):
			
 
				-    """Audit log entry for a physical network interface / internet connection that was used to do archiving."""
			
 
				-    
			
 
				-    abid_prefix = 'net_'
			
 
				-    abid_ts_src = 'self.machine.created_at'
			
 
				-    abid_uri_src = 'self.machine.guid'
			
 
				-    abid_subtype_src = 'self.iface'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = False
			
 
				-    
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'machine', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
			
 
				-    
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+class NetworkInterface(models.Model, ModelWithHealthStats):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				-    
			
 
				-    machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False)  # e.g. Machine(id=...)
			
 
				-
			
 
				-    # IMMUTABLE PROPERTIES
			
 
				-    mac_address = models.CharField(max_length=17, default=None, null=False, editable=False)   # e.g. ab:cd:ef:12:34:56
			
 
				-    ip_public = models.GenericIPAddressField(default=None, null=False, editable=False)        # e.g. 123.123.123.123 or 2001:0db8:85a3:0000:0000:8a2e:0370:7334
			
 
				-    ip_local = models.GenericIPAddressField(default=None, null=False, editable=False)         # e.g. 192.168.2.18    or 2001:0db8:85a3:0000:0000:8a2e:0370:7334
			
 
				-    dns_server = models.GenericIPAddressField(default=None, null=False, editable=False)       # e.g. 8.8.8.8         or 2001:0db8:85a3:0000:0000:8a2e:0370:7334
			
 
				-    
			
 
				-    # MUTABLE PROPERTIES
			
 
				-    hostname = models.CharField(max_length=63, default=None, null=False)                      # e.g. somehost.sub.example.com
			
 
				-    iface = models.CharField(max_length=15, default=None, null=False)                         # e.g. en0
			
 
				-    isp = models.CharField(max_length=63, default=None, null=False)                           # e.g. AS-SONICTELECOM
			
 
				-    city = models.CharField(max_length=63, default=None, null=False)                          # e.g. Berkeley
			
 
				-    region = models.CharField(max_length=63, default=None, null=False)                        # e.g. California
			
 
				-    country = models.CharField(max_length=63, default=None, null=False)                       # e.g. United States
			
 
				-
			
 
				-    # STATS COUNTERS (inherited from ModelWithHealthStats)
			
 
				-    # num_uses_failed = models.PositiveIntegerField(default=0)
			
 
				-    # num_uses_succeeded = models.PositiveIntegerField(default=0)
			
 
				+    machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False)
			
 
				+    mac_address = models.CharField(max_length=17, default=None, null=False, editable=False)
			
 
				+    ip_public = models.GenericIPAddressField(default=None, null=False, editable=False)
			
 
				+    ip_local = models.GenericIPAddressField(default=None, null=False, editable=False)
			
 
				+    dns_server = models.GenericIPAddressField(default=None, null=False, editable=False)
			
 
				+    hostname = models.CharField(max_length=63, default=None, null=False)
			
 
				+    iface = models.CharField(max_length=15, default=None, null=False)
			
 
				+    isp = models.CharField(max_length=63, default=None, null=False)
			
 
				+    city = models.CharField(max_length=63, default=None, null=False)
			
 
				+    region = models.CharField(max_length=63, default=None, null=False)
			
 
				+    country = models.CharField(max_length=63, default=None, null=False)
			
 
				+    num_uses_failed = models.PositiveIntegerField(default=0)
			
 
				+    num_uses_succeeded = models.PositiveIntegerField(default=0)
			
 
				 
			
 
				     objects: NetworkInterfaceManager = NetworkInterfaceManager()
			
 
				-    
			
 
				+
			
 
				     class Meta:
			
 
				-        unique_together = (
			
 
				-            # if *any* of these change, it's considered a different interface
			
 
				-            # because we might get different downloaded content as a result,
			
 
				-            # this forces us to store an audit trail whenever these things change
			
 
				-            ('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),
			
 
				-        )
			
 
				-        
			
 
				+        unique_together = (('machine', 'ip_public', 'ip_local', 'mac_address', 'dns_server'),)
			
 
				+
			
 
				     @classmethod
			
 
				     def current(cls) -> 'NetworkInterface':
			
 
				-        """Get the current network interface for the current machine."""
			
 
				-        
			
 
				         global _CURRENT_INTERFACE
			
 
				         if _CURRENT_INTERFACE:
			
 
				-            # assume the current network interface (public IP, DNS servers, etc.) wont change more than once per hour
			
 
				-            expires_at = _CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL)
			
 
				-            if timezone.now() < expires_at:
			
 
				+            if timezone.now() < _CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL):
			
 
				                 return _CURRENT_INTERFACE
			
 
				-            else:
			
 
				-                _CURRENT_INTERFACE = None
			
 
				-        
			
 
				+            _CURRENT_INTERFACE = None
			
 
				         machine = Machine.objects.current()
			
 
				         net_info = get_host_network()
			
 
				-        _CURRENT_INTERFACE, _created = cls.objects.update_or_create(
			
 
				-            machine=machine,
			
 
				-            ip_public=net_info.pop('ip_public'),
			
 
				-            ip_local=net_info.pop('ip_local'),
			
 
				-            mac_address=net_info.pop('mac_address'),
			
 
				-            dns_server=net_info.pop('dns_server'),
			
 
				-            defaults=net_info,
			
 
				+        _CURRENT_INTERFACE, _ = cls.objects.update_or_create(
			
 
				+            machine=machine, ip_public=net_info.pop('ip_public'), ip_local=net_info.pop('ip_local'),
			
 
				+            mac_address=net_info.pop('mac_address'), dns_server=net_info.pop('dns_server'), defaults=net_info,
			
 
				         )
			
 
				-        _CURRENT_INTERFACE.save()  # populate ABID
			
 
				-
			
 
				         return _CURRENT_INTERFACE
			
 
				 
			
 
				 
			
 
				 class InstalledBinaryManager(models.Manager):
			
 
				     def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary':
			
 
				-        """Get or create an InstalledBinary record for a Binary on the local machine"""
			
 
				-        
			
 
				         global _CURRENT_BINARIES
			
 
				-        cached_binary = _CURRENT_BINARIES.get(binary.name)
			
 
				-        if cached_binary:
			
 
				-            expires_at = cached_binary.modified_at + timedelta(seconds=INSTALLED_BINARY_RECHECK_INTERVAL)
			
 
				-            if timezone.now() < expires_at:
			
 
				-                is_loaded = binary.abspath and binary.version and binary.sha256
			
 
				-                if is_loaded:
			
 
				-                    # if the caller took did the (expensive) job of loading the binary from the filesystem already
			
 
				-                    # then their in-memory version is certainly more up-to-date than any potential cached version
			
 
				-                    # use this opportunity to invalidate the cache in case if anything has changed
			
 
				-                    is_different_from_cache = (
			
 
				-                        binary.abspath != cached_binary.abspath
			
 
				-                        or binary.version != cached_binary.version
			
 
				-                        or binary.sha256 != cached_binary.sha256
			
 
				-                    )
			
 
				-                    if is_different_from_cache:
			
 
				-                        _CURRENT_BINARIES.pop(binary.name)
			
 
				-                    else:
			
 
				-                        return cached_binary
			
 
				-                else:
			
 
				-                    # if they have not yet loaded the binary
			
 
				-                    # but our cache is recent enough and not expired, assume cached version is good enough
			
 
				-                    # it will automatically reload when the cache expires
			
 
				-                    # cached_binary will be stale/bad for up to 30min if binary was updated/removed on host system
			
 
				-                    return cached_binary
			
 
				-            else:
			
 
				-                # cached binary is too old, reload it from scratch
			
 
				-                _CURRENT_BINARIES.pop(binary.name)
			
 
				-        
			
 
				+        cached = _CURRENT_BINARIES.get(binary.name)
			
 
				+        if cached and timezone.now() < cached.modified_at + timedelta(seconds=INSTALLED_BINARY_RECHECK_INTERVAL):
			
 
				+            return cached
			
 
				         if not binary.abspath or not binary.version or not binary.sha256:
			
 
				-            # if binary was not yet loaded from filesystem, do it now
			
 
				-            # this is expensive, we have to find it's abspath, version, and sha256, but it's necessary
			
 
				-            # to make sure we have a good, up-to-date record of it in the DB & in-memroy cache
			
 
				             binary = archivebox.pm.hook.binary_load(binary=binary, fresh=True)
			
 
				-
			
 
				-        assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
			
 
				-        
			
 
				-        _CURRENT_BINARIES[binary.name], _created = self.update_or_create(
			
 
				-            machine=Machine.objects.current(),
			
 
				-            name=binary.name,
			
 
				-            binprovider=binary.loaded_binprovider.name,
			
 
				-            version=str(binary.loaded_version),
			
 
				-            abspath=str(binary.loaded_abspath),
			
 
				-            sha256=str(binary.loaded_sha256),
			
 
				+        _CURRENT_BINARIES[binary.name], _ = self.update_or_create(
			
 
				+            machine=Machine.objects.current(), name=binary.name, binprovider=binary.loaded_binprovider.name,
			
 
				+            version=str(binary.loaded_version), abspath=str(binary.loaded_abspath), sha256=str(binary.loaded_sha256),
			
 
				         )
			
 
				-        cached_binary = _CURRENT_BINARIES[binary.name]
			
 
				-        cached_binary.save()   # populate ABID
			
 
				-        
			
 
				-        # if we get this far make sure DB record matches in-memroy cache
			
 
				-        assert str(cached_binary.binprovider) == str(binary.loaded_binprovider.name)
			
 
				-        assert str(cached_binary.abspath) == str(binary.loaded_abspath)
			
 
				-        assert str(cached_binary.version) == str(binary.loaded_version)
			
 
				-        assert str(cached_binary.sha256) == str(binary.loaded_sha256)
			
 
				-        
			
 
				-        return cached_binary
			
 
				-    
			
 
				-
			
 
				-
			
 
				-class InstalledBinary(ABIDModel, ModelWithHealthStats):
			
 
				-    abid_prefix = 'bin_'
			
 
				-    abid_ts_src = 'self.machine.created_at'
			
 
				-    abid_uri_src = 'self.machine.guid'
			
 
				-    abid_subtype_src = 'self.binprovider'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = False
			
 
				-    
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'machine', 'name', 'binprovider', 'abspath', 'version', 'sha256')
			
 
				-    
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				+        return _CURRENT_BINARIES[binary.name]
			
 
				+
			
 
				+
			
 
				+class InstalledBinary(models.Model, ModelWithHealthStats):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    created_at = models.DateTimeField(default=timezone.now, db_index=True)
			
 
				     modified_at = models.DateTimeField(auto_now=True)
			
 
				-    
			
 
				-    # IMMUTABLE PROPERTIES
			
 
				     machine = models.ForeignKey(Machine, on_delete=models.CASCADE, default=None, null=False, blank=True)
			
 
				     name = models.CharField(max_length=63, default=None, null=False, blank=True)
			
 
				     binprovider = models.CharField(max_length=31, default=None, null=False, blank=True)
			
 
				     abspath = models.CharField(max_length=255, default=None, null=False, blank=True)
			
 
				     version = models.CharField(max_length=32, default=None, null=False, blank=True)
			
 
				     sha256 = models.CharField(max_length=64, default=None, null=False, blank=True)
			
 
				-    
			
 
				-    # MUTABLE PROPERTIES (TODO)
			
 
				-    # is_pinned = models.BooleanField(default=False)    # i.e. should this binary superceede other binaries with the same name on the host?
			
 
				-    # is_valid = models.BooleanField(default=True)      # i.e. is this binary still available on the host?
			
 
				-    
			
 
				-    # STATS COUNTERS (inherited from ModelWithHealthStats)
			
 
				-    # num_uses_failed = models.PositiveIntegerField(default=0)
			
 
				-    # num_uses_succeeded = models.PositiveIntegerField(default=0)
			
 
				-    
			
 
				+    num_uses_failed = models.PositiveIntegerField(default=0)
			
 
				+    num_uses_succeeded = models.PositiveIntegerField(default=0)
			
 
				+
			
 
				     objects: InstalledBinaryManager = InstalledBinaryManager()
			
 
				-    
			
 
				+
			
 
				     class Meta:
			
 
				         verbose_name = 'Installed Binary'
			
 
				         verbose_name_plural = 'Installed Binaries'
			
 
				-        unique_together = (
			
 
				-            ('machine', 'name', 'abspath', 'version', 'sha256'),
			
 
				-        )
			
 
				+        unique_together = (('machine', 'name', 'abspath', 'version', 'sha256'),)
			
 
				 
			
 
				     def __str__(self) -> str:
			
 
				         return f'{self.name}@{self.binprovider}+{self.abspath}@{self.version}'
			
 
				-    
			
 
				-    def clean(self, *args, **kwargs) -> None:
			
 
				-        assert self.name or self.abspath
			
 
				-        self.name = str(self.name or self.abspath)
			
 
				-        assert self.name
			
 
				-
			
 
				-        if not hasattr(self, 'machine'):
			
 
				-            self.machine = Machine.objects.current()
			
 
				-        if not self.binprovider:
			
 
				-            all_known_binproviders = list(abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values())
			
 
				-            binary = archivebox.pm.hook.binary_load(binary=Binary(name=self.name, binproviders=all_known_binproviders), fresh=True)
			
 
				-            self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
			
 
				-        if not self.abspath:
			
 
				-            self.abspath = self.BINPROVIDER.get_abspath(self.name)
			
 
				-        if not self.version:
			
 
				-            self.version = self.BINPROVIDER.get_version(self.name, abspath=self.abspath)
			
 
				-        if not self.sha256:
			
 
				-            self.sha256 = self.BINPROVIDER.get_sha256(self.name, abspath=self.abspath)
			
 
				-            
			
 
				-        super().clean(*args, **kwargs)
			
 
				 
			
 
				     @cached_property
			
 
				     def BINARY(self) -> Binary:
			
 
				         for binary in abx.as_dict(archivebox.pm.hook.get_BINARIES()).values():
			
 
				             if binary.name == self.name:
			
 
				                 return binary
			
 
				-        raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
			
 
				-        # TODO: we could technically reconstruct it from scratch, but why would we ever want to do that?
			
 
				+        raise Exception(f'Binary {self.name} not found')
			
 
				 
			
 
				     @cached_property
			
 
				     def BINPROVIDER(self) -> BinProvider:
			
 
				-        for binprovider in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values():
			
 
				-            if binprovider.name == self.binprovider:
			
 
				-                return binprovider
			
 
				-        raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
			
 
				-
			
 
				-    # maybe not a good idea to provide this? Binary in DB is a record of the binary's config
			
 
				-    # whereas a loaded binary is a not-yet saved instance that may not have the same config
			
 
				-    # why would we want to load a binary record from the db when it could be freshly loaded?
			
 
				-    def load_from_db(self) -> Binary:
			
 
				-        # TODO: implement defaults arg in abx_pkg
			
 
				-        # return self.BINARY.load(defaults={
			
 
				-        #     'binprovider': self.BINPROVIDER,
			
 
				-        #     'abspath': Path(self.abspath),
			
 
				-        #     'version': self.version,
			
 
				-        #     'sha256': self.sha256,
			
 
				-        # })
			
 
				-        
			
 
				-        return Binary.model_validate({
			
 
				-            **self.BINARY.model_dump(),
			
 
				-            'abspath': self.abspath and Path(self.abspath),
			
 
				-            'version': self.version,
			
 
				-            'sha256': self.sha256,
			
 
				-            'loaded_binprovider': self.BINPROVIDER,
			
 
				-            'binproviders_supported': self.BINARY.binproviders_supported,
			
 
				-            'overrides': self.BINARY.overrides,
			
 
				-        })
			
 
				-
			
 
				-    def load_fresh(self) -> Binary:
			
 
				-        return archivebox.pm.hook.binary_load(binary=self.BINARY, fresh=True)
			
 
				-
			
 
				-
			
 
				+        for bp in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values():
			
 
				+            if bp.name == self.binprovider:
			
 
				+                return bp
			
 
				+        raise Exception(f'BinProvider {self.binprovider} not found')
			
 
				 
			
 
				 
			
 
				 def spawn_process(proc_id: str):
			
 
				-    proc = Process.objects.get(id=proc_id)
			
 
				-    proc.spawn()
			
 
				-    
			
 
				+    Process.objects.get(id=proc_id).spawn()
			
 
				+
			
 
				 
			
 
				 class ProcessManager(models.Manager):
			
 
				     pass
			
 
				 
			
 
				+
			
 
				 class ProcessQuerySet(models.QuerySet):
			
 
				-    """
			
 
				-    Enhanced QuerySet for Process model, usage:
			
 
				-        Process.objects.queued() -> QuerySet[Process] [Process(pid=None, returncode=None), Process(pid=None, returncode=None)]
			
 
				-        Process.objects.running() -> QuerySet[Process] [Process(pid=123, returncode=None), Process(pid=456, returncode=None)]
			
 
				-        Process.objects.exited() -> QuerySet[Process] [Process(pid=789, returncode=0), Process(pid=101, returncode=1)]
			
 
				-        Process.objects.running().pids() -> [456]
			
 
				-        Process.objects.kill() -> 1
			
 
				-    """
			
 
				-    
			
 
				     def queued(self):
			
 
				         return self.filter(pid__isnull=True, returncode__isnull=True)
			
 
				-    
			
 
				+
			
 
				     def running(self):
			
 
				         return self.filter(pid__isnull=False, returncode__isnull=True)
			
 
				-            
			
 
				+
			
 
				     def exited(self):
			
 
				         return self.filter(returncode__isnull=False)
			
 
				-    
			
 
				+
			
 
				     def kill(self):
			
 
				-        total_killed = 0
			
 
				+        count = 0
			
 
				         for proc in self.running():
			
 
				             proc.kill()
			
 
				-            total_killed += 1
			
 
				-        return total_killed
			
 
				-    
			
 
				+            count += 1
			
 
				+        return count
			
 
				+
			
 
				     def pids(self):
			
 
				         return self.values_list('pid', flat=True)
			
 
				 
			
 
				 
			
 
				-class Process(ABIDModel):
			
 
				-    abid_prefix = 'pid_'
			
 
				-    abid_ts_src = 'self.created_at'
			
 
				-    abid_uri_src = 'self.cmd'
			
 
				-    abid_subtype_src = 'self.actor_type or "00"'
			
 
				-    abid_rand_src = 'self.id'
			
 
				-    abid_drift_allowed = False
			
 
				-    
			
 
				-    read_only_fields = ('id', 'abid', 'created_at', 'cmd', 'cwd', 'actor_type', 'timeout')
			
 
				-    
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    abid = ABIDField(prefix=abid_prefix)
			
 
				-    
			
 
				-    # immutable state
			
 
				-    cmd = models.JSONField(default=list)                             # shell argv
			
 
				-    cwd = models.CharField(max_length=255)                           # working directory
			
 
				-    actor_type = models.CharField(max_length=255, null=True)         # python ActorType that this process is running
			
 
				-    timeout = models.PositiveIntegerField(null=True, default=None)   # seconds to wait before killing the process if it's still running
			
 
				-    
			
 
				+class Process(models.Model):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
			
 
				+    cmd = models.JSONField(default=list)
			
 
				+    cwd = models.CharField(max_length=255)
			
 
				+    actor_type = models.CharField(max_length=255, null=True)
			
 
				+    timeout = models.PositiveIntegerField(null=True, default=None)
			
 
				     created_at = models.DateTimeField(null=False, default=timezone.now, editable=False)
			
 
				     modified_at = models.DateTimeField(null=False, default=timezone.now, editable=False)
			
 
				-
			
 
				-    # mutable fields
			
 
				     machine = models.ForeignKey(Machine, on_delete=models.CASCADE)
			
 
				     pid = models.IntegerField(null=True)
			
 
				     launched_at = models.DateTimeField(null=True)
			
@@ -433,14 +213,6 @@ class Process(ABIDModel):
 
				     stdout = models.TextField(default='', null=False)
			
 
				     stderr = models.TextField(default='', null=False)
			
 
				 
			
 
				-    machine_id: str
			
 
				-
			
 
				-    # optional mutable state that can be used to trace what the process is doing
			
 
				-    # active_event = models.ForeignKey('Event', null=True, on_delete=models.SET_NULL)
			
 
				-    
			
 
				-    emitted_events: models.RelatedManager['Event']
			
 
				-    claimed_events: models.RelatedManager['Event']
			
 
				-    
			
 
				     objects: ProcessManager = ProcessManager.from_queryset(ProcessQuerySet)()
			
 
				 
			
 
				     @classmethod
			
@@ -448,60 +220,32 @@ class Process(ABIDModel):
 
				         proc_id = os.environ.get('PROCESS_ID', '').strip()
			
 
				         if not proc_id:
			
 
				             proc = cls.objects.create(
			
 
				-                cmd=sys.argv,
			
 
				-                cwd=os.getcwd(),
			
 
				-                actor_type=None,
			
 
				-                timeout=None,
			
 
				-                machine=Machine.objects.current(),
			
 
				-                pid=os.getpid(),
			
 
				-                launched_at=timezone.now(),
			
 
				-                finished_at=None,
			
 
				-                returncode=None,
			
 
				-                stdout='',
			
 
				-                stderr='',
			
 
				+                cmd=sys.argv, cwd=os.getcwd(), machine=Machine.objects.current(),
			
 
				+                pid=os.getpid(), launched_at=timezone.now(),
			
 
				             )
			
 
				             os.environ['PROCESS_ID'] = str(proc.id)
			
 
				             return proc
			
 
				-        
			
 
				         proc = cls.objects.get(id=proc_id)
			
 
				-        if proc.pid:
			
 
				-            assert os.getpid() == proc.pid, f'Process ID mismatch: {proc.pid} != {os.getpid()}'
			
 
				-        else:
			
 
				-            proc.pid = os.getpid()
			
 
				-
			
 
				+        proc.pid = proc.pid or os.getpid()
			
 
				         proc.machine = Machine.current()
			
 
				-        proc.cwd = os.getcwd()    
			
 
				+        proc.cwd = os.getcwd()
			
 
				         proc.cmd = sys.argv
			
 
				         proc.launched_at = proc.launched_at or timezone.now()
			
 
				         proc.save()
			
 
				-        
			
 
				-        return proc
			
 
				-
			
 
				-    @classmethod
			
 
				-    def create_and_fork(cls, **kwargs):
			
 
				-        proc = cls.objects.create(**kwargs)
			
 
				-        proc.fork()
			
 
				         return proc
			
 
				 
			
 
				     def fork(self):
			
 
				         if self.pid:
			
 
				-            raise Exception(f'Process is already running, cannot fork again: {self}')
			
 
				-        
			
 
				-        # fork the process in the background
			
 
				+            raise Exception(f'Process already running: {self}')
			
 
				         multiprocessing.Process(target=spawn_process, args=(self.id,)).start()
			
 
				 
			
 
				     def spawn(self):
			
 
				         if self.pid:
			
 
				-            raise Exception(f'Process already running, cannot spawn again: {self}')
			
 
				-        
			
 
				-        # spawn the process in the foreground and block until it exits
			
 
				+            raise Exception(f'Process already running: {self}')
			
 
				         proc = subprocess.Popen(self.cmd, cwd=self.cwd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
			
 
				         self.pid = proc.pid
			
 
				         self.launched_at = timezone.now()
			
 
				         self.save()
			
 
				-        # Event.dispatch('PROC_UPDATED', {'process_id': self.id})
			
 
				-        
			
 
				-        # block until the process exits
			
 
				         proc.wait()
			
 
				         self.finished_at = timezone.now()
			
 
				         self.returncode = proc.returncode
			
@@ -509,36 +253,13 @@ class Process(ABIDModel):
 
				         self.stderr = proc.stderr.read()
			
 
				         self.pid = None
			
 
				         self.save()
			
 
				-        # Event.dispatch('PROC_UPDATED', {'process_id': self.id})
			
 
				-        
			
 
				-    def kill(self):
			
 
				-        if not self.is_running: return
			
 
				-        assert self.machine == Machine.current(), f'Cannot kill actor on another machine: {self.machine_id} != {Machine.current().id}'
			
 
				-        
			
 
				-        os.kill(self.pid, signal.SIGKILL)
			
 
				-        self.pid = None
			
 
				-        self.save()
			
 
				-        # Event.dispatch('PROC_UPDATED', {'process_id': self.id})
			
 
				 
			
 
				-    @property
			
 
				-    def is_pending(self):
			
 
				-        return (self.pid is None) and (self.returncode is None)
			
 
				+    def kill(self):
			
 
				+        if self.pid and self.returncode is None:
			
 
				+            os.kill(self.pid, signal.SIGKILL)
			
 
				+            self.pid = None
			
 
				+            self.save()
			
 
				 
			
 
				     @property
			
 
				     def is_running(self):
			
 
				-        return (self.pid is not None) and (self.returncode is None)
			
 
				-    
			
 
				-    @property
			
 
				-    def is_failed(self):
			
 
				-        return self.returncode not in (None, 0)
			
 
				-    
			
 
				-    @property
			
 
				-    def is_succeeded(self):
			
 
				-        return self.returncode == 0
			
 
				-    
			
 
				-    # @property
			
 
				-    # def is_idle(self):
			
 
				-    #     if not self.actor_type:
			
 
				-    #         raise Exception(f'Process {self.id} has no actor_type set, can only introspect active events if Process.actor_type is set to the Actor its running')
			
 
				-    #     return self.active_event is None
			
 
				-
			
 
				+        return self.pid is not None and self.returncode is None
			
--- a/archivebox/tags/models.py
+++ b/archivebox/tags/models.py
@@ -1,328 +1,6 @@
 
				-__package__ = 'archivebox.tags'
			
 
				-
			
 
				-import uuid
			
 
				-from typing import Type, ClassVar, Iterable, Any
			
 
				-
			
 
				-from benedict import benedict
			
 
				-
			
 
				-from django.db import models, transaction
			
 
				-from django.db.models import QuerySet, F
			
 
				-from django.db.models.functions import Substr, StrIndex, Concat
			
 
				-from django.conf import settings
			
 
				-
			
 
				-from django.utils.text import slugify
			
 
				-from django.utils.functional import classproperty              # type: ignore
			
 
				-from django.contrib.auth.models import User
			
 
				-from django.contrib.contenttypes.fields import GenericForeignKey
			
 
				-from django.contrib.contenttypes.models import ContentType
			
 
				-from django.contrib.contenttypes.fields import GenericRelation
			
 
				-
			
 
				-
			
 
				-from base_models.models import ABIDModel, ABIDField, AutoDateTimeField, get_or_create_system_user_pk
			
 
				-
			
 
				-FORBIDDEN_TAG_CHARS = ('=', '\n', '\t', '\r', ',', '\'', '"', '\\')
			
 
				-
			
 
				-
			
 
				-class KVTagManager(models.Manager):
			
 
				-    pass
			
 
				-
			
 
				-class KVTagQuerySet(models.QuerySet):
			
 
				-    """
			
 
				-    Enhanced QuerySet for KVTag objects.
			
 
				-    
			
 
				-    To list all unique tag names:
			
 
				-        KVTag.objects.filter(obj__created_by_id=123).names() -> {'tag1', 'tag2', 'tag3'}
			
 
				-    
			
 
				-    To list all the Snapshot objects with a given tag:
			
 
				-        KVTag.objects.filter(name='tag1').objects(Snapshot) -> QuerySet[Snapshot]: [snapshot1, snapshot2, snapshot3]
			
 
				-
			
 
				-    To rename a tag "abcd" to "xyz":
			
 
				-        KVTag.objects.filter(name='abcd').rename(name='xyz') -> QuerySet[KVTag]: [xyz, xyz, xyz]
			
 
				-    """
			
 
				-    
			
 
				-    def kvtags(self) -> 'KVTagQuerySet':
			
 
				-        return self.filter(value__isnull=False)
			
 
				-    
			
 
				-    def non_kvtags(self) -> 'KVTagQuerySet':
			
 
				-        return self.filter(value__isnull=True)
			
 
				-    
			
 
				-    def rename(self, name: str) -> 'KVTagQuerySet':
			
 
				-        self.update(name=name)
			
 
				-        return self._clone()
			
 
				-
			
 
				-    def names(self) -> set[str]:
			
 
				-        """get the unique set of names of tags in this queryset"""
			
 
				-        return set(self.non_kvtags().values('name').distinct().values_list('name', flat=True))
			
 
				-    
			
 
				-    def keys(self) -> set[str]:
			
 
				-        """get the unique set of keys of tags in this queryset"""
			
 
				-        return set(self.kvtags().values('name').distinct().values_list('name', flat=True))
			
 
				-
			
 
				-    def values(self) -> set[str]:
			
 
				-        """get the unique set of values of tags in this queryset"""
			
 
				-        return set(self.kvtags().values_list('value').distinct().values_list('value', flat=True))
			
 
				-    
			
 
				-    def tag_dict(self) -> dict[str, str]:
			
 
				-        """
			
 
				-        Returns a dictionary of dictionaries, where the outer key is the obj_id and the inner key is the tag name.
			
 
				-        {
			
 
				-            'abcd-2345-2343-234234': {
			
 
				-                'uuid': 'abcd-2345-2343-234234',
			
 
				-                'sha256': 'abc123k3j423kj423kl4j23',
			
 
				-                'path': '/data/sources/2024-01-02_11-57-51__cli_add.txt',
			
 
				-                'some-flat-tag': None,
			
 
				-                'some-other-tag': None,
			
 
				-            },
			
 
				-            'efgh-2345-2343-234234': {
			
 
				-                ...
			
 
				-            },
			
 
				-        }
			
 
				-        """
			
 
				-        tag_dict = {}
			
 
				-        for tag in self:
			
 
				-            tag_dict[tag.obj_id] = tag_dict.get(tag.obj_id, {})
			
 
				-            tag_dict[tag.obj_id][tag.key] = tag_dict[tag.obj_id].get(tag.key, tag.value)
			
 
				-
			
 
				-        return benedict(tag_dict)
			
 
				-
			
 
				-    def model_classes(self) -> list[Type[models.Model]]:
			
 
				-        """get the unique set of Model classes of objects in this queryset"""
			
 
				-        obj_types = set(self.values('obj_type').distinct().values_list('obj_type', flat=True))
			
 
				-        return [obj_type.model_class() for obj_type in obj_types]
			
 
				-    
			
 
				-    def model_class(self) -> Type[models.Model]:
			
 
				-        """get the single Model class of objects in this queryset (or raise an error if there are multiple types)"""
			
 
				-        model_classes = self.model_classes()
			
 
				-        assert len(model_classes) == 1, f'KVTagQuerySet.model_class() can only be called when the queried objects are all a single type (found multiple types: {model_classes})'
			
 
				-        return model_classes[0]
			
 
				-    
			
 
				-    def objects(self, model_class: Type[models.Model] | ContentType | None = None) -> QuerySet:
			
 
				-        """Get the queryset of objects that have the tags we've selected (pass a Model or ContentType to filter by obj_type)"""
			
 
				-        Model: Type[models.Model]
			
 
				-        
			
 
				-        if isinstance(model_class, ContentType):
			
 
				-            Model = model_class.model_class()
			
 
				-        elif model_class is None:
			
 
				-            # if no explicit obj_type is provided, try to infer it from the queryset (raises error if queryset is a mixture of multiple types)
			
 
				-            Model = self.model_class()
			
 
				-        else:
			
 
				-            Model = model_class
			
 
				+"""
			
 
				+The main Tag model is defined in core/models.py
			
 
				+This file is kept for backwards compatibility but contains no models.
			
 
				+"""
			
 
				 
			
 
				-        # at this point model_class should be a model class
			
 
				-        assert issubclass(Model, models.Model)
			
 
				-        
			
 
				-        # the the queryset of objects that have the tags we've selected
			
 
				-        obj_ids = self.values_list('obj_id', flat=True)
			
 
				-        return Model.objects.filter(id__in=obj_ids)
			
 
				-    
			
 
				-
			
 
				-    # In the future, consider:
			
 
				-    # def delete(self) -> None:
			
 
				-    #    self.update(deleted_at=timezone.now())
			
 
				-
			
 
				-
			
 
				-
			
 
				-class KVTag(ModelWithReadOnlyFields):
			
 
				-    """
			
 
				-    Very flexible K:V tagging system that allows you to tag any model with any tag.
			
 
				-    e.g. to tag a Snapshot with 3 tags:
			
 
				-        KVTag.objects.create(obj=snapshot1, name='tag1-simple some text')
			
 
				-        snapshot1.tags.create(name='tag1-simple some text')  <- this duplicate would be blocked by an IntegrityError (obj_id + name must be unique)
			
 
				-        
			
 
				-        snapshot1.tags.create(name='ABID', value='snp_abc123k3j423kj423kl4j23')
			
 
				-        snapshot1.tags.create(name='SHA256', value='1234234abc123k3j423kj423kl4j23')
			
 
				-        snapshot1.tags.create(name='SAVE_WGET', value='False')
			
 
				-        snapshot1.tags.create(name='URI', value='file:///data/sources/2024-01-02_11-57-51__cli_add.txt')
			
 
				-    """
			
 
				-    
			
 
				-    ####################### All fields are immutable! ###########################
			
 
				-    #                  enforced by ModelWithReadOnlyFields
			
 
				-    read_only_fields = ('id', 'created_at', 'name', 'value', 'obj_type', 'obj_id')
			
 
				-    #############################################################################
			
 
				-    
			
 
				-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
			
 
				-
			
 
				-    name = models.CharField(null=False, blank=False, max_length=255, db_index=True)
			
 
				-    value = models.TextField(null=True, blank=True, db_default=Substr('name', StrIndex('name', '=')))
			
 
				-
			
 
				-    obj_type = models.ForeignKey(ContentType, on_delete=models.CASCADE, null=False, blank=False, default=None, db_index=True)
			
 
				-    obj_id = models.UUIDField(null=False, blank=False, default=None, db_index=True)
			
 
				-    obj = GenericForeignKey('obj_type', 'obj_id')
			
 
				-
			
 
				-    objects: KVTagManager = KVTagManager.from_queryset(KVTagQuerySet)()
			
 
				-
			
 
				-    class Meta:
			
 
				-        db_table = 'core_KVTags'
			
 
				-        unique_together = [('obj_id', 'name')]
			
 
				-    
			
 
				-    def __str__(self) -> str:
			
 
				-        return self.keyval_str if self.name else '<new-KVTag>'
			
 
				-    
			
 
				-    def __repr__(self) -> str:
			
 
				-        return f'#{self.name}'
			
 
				-
			
 
				-    @property
			
 
				-    def key(self) -> str:
			
 
				-        self.clean()
			
 
				-        return self.name
			
 
				-    
			
 
				-    @property
			
 
				-    def val(self) -> str | None:
			
 
				-        self.clean()
			
 
				-        return self.value
			
 
				-    
			
 
				-    @property
			
 
				-    def keyval_str(self) -> str:
			
 
				-        self.clean()
			
 
				-        return f'{self.key}={self.value}' if self.value else self.key
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def parse_keyval_str(keyval_str: str) -> tuple[str, str | None]:
			
 
				-        name, value = keyval_str.split('=', 1) if ('=' in keyval_str) else (keyval_str, '')
			
 
				-        return name.strip(), value.strip() or None
			
 
				-    
			
 
				-    def clean(self) -> None:
			
 
				-        # check that the object being tagged is not a KVTag object itself
			
 
				-        kvtag_obj_type = ContentType.objects.get_for_model(self.__class__)
			
 
				-        assert self.obj_type != kvtag_obj_type, f'A KVTag(obj_type={self.obj_type}).obj -> {self.obj} points to another KVTag object (you cannot tag a KVTag with another KVTag)'
			
 
				-        
			
 
				-        # check that the object being tagged inherits from ModelWithKVTags
			
 
				-        assert isinstance(self.obj, ModelWithKVTags), f"A KVTag(obj_type={self.obj_type}).obj -> {self.obj} points to an object that doesn't support tags (you can only tag models that inherit from ModelWithKVTags)"
			
 
				-
			
 
				-        # parse key, value from name if it contains an = sign, otherwise key = name & val = None
			
 
				-        name, value = self.parse_keyval_str(self.name)
			
 
				-        
			
 
				-        # update values with cleaned values
			
 
				-        self.name = self.name or name
			
 
				-        self.value = self.value or value
			
 
				-        
			
 
				-        assert isinstance(self.name, str) and self.name.strip(), f'KVTag(name={self.name}).name must be a non-empty string'
			
 
				-        
			
 
				-        # check if tag is a simple key
			
 
				-        if self.value is None:
			
 
				-            # basic (lax) check for forbidden characters
			
 
				-            unallowed_chars = [char for char in self.name if char in FORBIDDEN_TAG_CHARS]
			
 
				-            assert not unallowed_chars, f'KVTag(name={self.name}).name contains symbols or whitespace that are not allowed: {unallowed_chars[0]}'
			
 
				-            
			
 
				-        # check if tag is a key=value pair
			
 
				-        else:
			
 
				-            # strict check that key is a valid identifier
			
 
				-            assert self.name.isidentifier(), f'KVTag(name={self.value}).name must be a valid identifier string (a-Z, 0-9, _)'
			
 
				-            
			
 
				-            # basic (lax) check for forbidden characters in value
			
 
				-            unallowed_chars = [char for char in self.name if char in FORBIDDEN_TAG_CHARS]
			
 
				-            assert isinstance(self.value, str) and self.value.strip() and not unallowed_chars, f'KVTag(value={self.value}).value must be a non-empty string (with no newlines, commas, = signs, quotes, or forward slashes)'
			
 
				-
			
 
				-    def save(self, *args, **kwargs) -> None:
			
 
				-        self.clean()        
			
 
				-        super().save(*args, **kwargs)
			
 
				-    
			
 
				-    @property
			
 
				-    def slug(self) -> str:
			
 
				-        return slugify(self.name)
			
 
				-    
			
 
				-    @property
			
 
				-    def created_by_id(self) -> User:
			
 
				-        if self.obj and hasattr(self.obj, 'created_by_id'):
			
 
				-            return self.obj.created_by_id
			
 
				-        return get_or_create_system_user_pk()
			
 
				-    
			
 
				-    @property
			
 
				-    def created_by(self) -> User:
			
 
				-        return User.objects.get(pk=self.created_by_id)
			
 
				-
			
 
				-
			
 
				-class ModelWithKVTags(ModelWithReadOnlyFields):
			
 
				-    """
			
 
				-    A base class for models that have tags, adds 0 additional storage overhead to models with 0 tags.
			
 
				-    
			
 
				-    Snapshot.objects.get(id='...').tags.clear()
			
 
				-    Snapshot.objects.get(id='...').tags.create(name='tag1')
			
 
				-    Snapshot.objects.get(id='...').tags.create(name='tag2', value='some-value')
			
 
				-    Snapshot.objects.get(id='...').tags.create(name='tag3')
			
 
				-    Snapshot.objects.get(id='...').tags.filter(name='tag3').delete()
			
 
				-    snapshot.objects.get(id='...').tag_names -> ['tag1', 'tag2']
			
 
				-    snapshot.objects.get(id='...').tag_dict -> {'tag1': None, 'tag2': 'some-value'}
			
 
				-    snapshot.objects.get(id='...').tag_csv -> 'tag1,tag2'
			
 
				-    """
			
 
				-    
			
 
				-    read_only_fields = ('id',)
			
 
				-    
			
 
				-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    
			
 
				-    tag_set = GenericRelation(
			
 
				-        KVTag,
			
 
				-        # related_query_name="snapshot",       set this in subclasses, allows queries like KVTag.objects.filter(snapshot__url='https://example.com')
			
 
				-        content_type_field="obj_type",
			
 
				-        object_id_field="obj_id",
			
 
				-        order_by=('name',),
			
 
				-    )
			
 
				-    kvtag_set = tag_set
			
 
				-    
			
 
				-    class Meta:
			
 
				-        abstract = True
			
 
				-
			
 
				-    @classproperty
			
 
				-    def content_type(cls) -> ContentType:
			
 
				-        return ContentType.objects.get_for_model(cls)
			
 
				-    
			
 
				-    @property
			
 
				-    def tag_dict(self) -> dict[str, str]:
			
 
				-        """
			
 
				-        {
			
 
				-            '⭐️': None,
			
 
				-            'some-other-tag': None,
			
 
				-            'some tag/testing 234[po4]': None,
			
 
				-            'uuid': 'abcd-2345-2343-234234',
			
 
				-            'sha256': 'abc123k3j423kj423kl4j23',
			
 
				-            'file': '/data/sources/2024-01-02_11-57-51__cli_add.txt',
			
 
				-        }
			
 
				-        """
			
 
				-        return benedict({
			
 
				-            tag.key: tag.value
			
 
				-            for tag in self.tag_set.order_by('created_at')
			
 
				-        })
			
 
				-        
			
 
				-    def get_tag_value(self, tag_name: str) -> str | None:
			
 
				-        """get the value of a tag with the given name pointing to this object, or None if no matching tag exists"""
			
 
				-        tag = self.tag_set.filter(name=tag_name).order_by('created_at').last()
			
 
				-        return tag and tag.value
			
 
				-    
			
 
				-    def set_tag_value(self, tag_name: str, tag_value: str | None) -> KVTag:
			
 
				-        """create or update a Tag pointing to this objects with the given name, to the given value"""
			
 
				-        with transaction.atomic():
			
 
				-            tag, _created = KVTag.objects.update_or_create(obj=self, name=tag_name, defaults={'value': tag_value})
			
 
				-            tag.save()
			
 
				-        return tag
			
 
				-    
			
 
				-    @property
			
 
				-    def tag_names(self) -> list[str]:
			
 
				-        return [str(tag) for tag in self.tag_set.order_by('created_at')]
			
 
				-    
			
 
				-    @tag_names.setter
			
 
				-    def tag_names_setter(self, tag_names: list[str]) -> None:
			
 
				-        kvtags = []
			
 
				-        for tag_name in tag_names:
			
 
				-            key, value = KVTag.parse_keyval_str(tag_name)
			
 
				-            kvtags.append(self.set_tag_value(key, value))
			
 
				-        self.tag_set.set(kvtags)
			
 
				-    
			
 
				-    @property
			
 
				-    def tags_csv(self) -> str:
			
 
				-        return ','.join(self.tag_names)
			
 
				-
			
 
				-    # Meh, not really needed:
			
 
				-    # @tags_csv.setter
			
 
				-    # def tags_csv_setter(self, tags_csv: str) -> None:
			
 
				-    #     with transaction.atomic():
			
 
				-    #         # delete all existing tags
			
 
				-    #         self.tag_set.delete()
			
 
				-    #
			
 
				-    #         # add a new tag for each comma-separated value in tags_str
			
 
				-    #         new_kvtags = []
			
 
				-    #         for tag_name in tags_csv.split(','):
			
 
				-    #             new_kvtags.append(KVTag(obj=self, name=tag_name))
			
 
				-    #
			
 
				-    #         KVTag.objects.bulk_create(new_kvtags)
			
 
				-    #         self.tag_set.set(new_kvtags)
			
 
				+__package__ = 'archivebox.tags'
			
--- a/archivebox/workers/models.py
+++ b/archivebox/workers/models.py
@@ -13,7 +13,6 @@ from django.core import checks
 
				 from django.utils import timezone
			
 
				 from django.utils.functional import classproperty
			
 
				 
			
 
				-from base_models.models import ABIDModel, ABIDField
			
 
				 from machine.models import Process
			
 
				 
			
 
				 from statemachine import registry, StateMachine, State
			
@@ -340,23 +339,8 @@ class EventQuerySet(models.QuerySet):
 
				         return self.filter(claimed_at__lt=timezone.now() - timedelta(seconds=older_than))
			
 
				 
			
 
				 
			
 
				-class Event(ABIDModel):
			
 
				-    abid_prefix = 'evn_'
			
 
				-    abid_ts_src = 'self.deliver_at'                  # e.g. 'self.created_at'
			
 
				-    abid_uri_src = 'self.name'                       # e.g. 'self.uri'                (MUST BE SET)
			
 
				-    abid_subtype_src = 'self.emitted_by'             # e.g. 'self.extractor'
			
 
				-    abid_rand_src = 'self.id'                        # e.g. 'self.uuid' or 'self.id'
			
 
				-    abid_drift_allowed: bool = False                 # set to True to allow abid_field values to change after a fixed ABID has been issued (NOT RECOMMENDED: means values can drift out of sync from original ABID)
			
 
				-
			
 
				-    read_only_fields = ('id', 'deliver_at', 'name', 'kwargs', 'timeout', 'parent', 'emitted_by', 'on_success', 'on_failure')
			
 
				-
			
 
				-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, null=False, editable=False, unique=True, verbose_name='ID')
			
 
				-    
			
 
				-    # disable these fields from inherited models, they're not needed / take up too much room
			
 
				-    abid = None
			
 
				-    created_at = None
			
 
				-    created_by = None
			
 
				-    created_by_id = None
			
 
				+class Event(models.Model):
			
 
				+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, null=False, editable=False, unique=True)
			
 
				     
			
 
				     # immutable fields
			
 
				     deliver_at = models.DateTimeField(default=timezone.now, null=False, editable=False, unique=True, db_index=True)
			
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@@ -173,7 +173,7 @@ class Orchestrator:
 
				         
			
 
				                     next_obj = queue.first()
			
 
				                     print()
			
 
				-                    print(f'🏃‍♂️ {self}.runloop() {actor_type.__name__.ljust(20)} queue={str(queue.count()).ljust(3)} next={next_obj.abid if next_obj else "None"} {next_obj.status if next_obj else "None"} {(timezone.now() - next_obj.retry_at).total_seconds() if next_obj and next_obj.retry_at else "None"}')
			
 
				+                    print(f'🏃‍♂️ {self}.runloop() {actor_type.__name__.ljust(20)} queue={str(queue.count()).ljust(3)} next={next_obj.id if next_obj else "None"} {next_obj.status if next_obj else "None"} {(timezone.now() - next_obj.retry_at).total_seconds() if next_obj and next_obj.retry_at else "None"}')
			
 
				                     self.idle_count = 0
			
 
				                     try:
			
 
				                         existing_actors = actor_type.get_running_actors()
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 
				 [project]
			
 
				 name = "archivebox"
			
 
				 version = "0.8.6rc3"
			
 
				-requires-python = ">=3.10"
			
 
				+requires-python = ">=3.14"
			
 
				 description = "Self-hosted internet archiving solution."
			
 
				 authors = [{name = "Nick Sweeting", email = "[email protected]"}]
			
 
				 license = {text = "MIT"}
			
@@ -22,9 +22,7 @@ classifiers = [
 
				     "Natural Language :: English",
			
 
				     "Operating System :: OS Independent",
			
 
				     "Programming Language :: Python :: 3",
			
 
				-    "Programming Language :: Python :: 3.10",
			
 
				-    "Programming Language :: Python :: 3.11",
			
 
				-    "Programming Language :: Python :: 3.12",
			
 
				+    "Programming Language :: Python :: 3.14",
			
 
				     "Topic :: Internet :: WWW/HTTP",
			
 
				     "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
			
 
				     "Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
			
@@ -41,7 +39,7 @@ classifiers = [
 
				 dependencies = [
			
 
				     ### Django libraries
			
 
				     "setuptools>=74.1.0",   # for: django 5 on python >=3.12, distutils is no longer in stdlib but django 5.1 expects distutils (TODO: check if this can be removed eventually)
			
 
				-    "django>=5.1.4,<6.0",
			
 
				+    "django>=6.0",
			
 
				     "channels[daphne]>=4.1.0",
			
 
				     "django-ninja>=1.3.0",
			
 
				     "django-extensions>=3.2.3",
			
@@ -50,7 +48,6 @@ dependencies = [
 
				     "django-signal-webhooks>=0.3.0",
			
 
				     "django-admin-data-views>=0.4.1",
			
 
				     "django-object-actions>=4.3.0",
			
 
				-    "django-charid-field>=0.4",  # TODO: remove this and dedicated ABID field in favor of using KVTag for charids
			
 
				     "django-taggit==6.1.0",     # TODO: remove this in favor of KVTags only
			
 
				 
			
 
				     ### State Management
			
@@ -77,9 +74,6 @@ dependencies = [
 
				     "pydantic>=2.8.0",       # for: archivebox.api (django-ninja), Binary & BinProvider (abx-pkg), archivebox.config (pydantic-settings), and archivebox.index.schema (pydantic)
			
 
				     "pydantic-settings>=2.5.2", # for: archivebox.config
			
 
				     "python-benedict[io,parse]>=0.33.2", # for: dict replacement all over the codebase to allow .attr-style access
			
 
				-    "ulid-py>=1.1.0",        # TODO: remove this in favor of pure ABID / UUID4
			
 
				-    "typeid-python>=0.3.1",  # TODO: remove this in favor of pure ABID / UUID4
			
 
				-    "base32-crockford==0.3.0",  # TODO: remove this in favor of pure ABID / UUID4
			
 
				     "blake3>=1.0.0",         # TODO: remove this in favor of sha256 everywhere?
			
 
				     
			
 
				     ### Static Typing