Переглянути джерело

switch everywhere to use Snapshot.pk and ArchiveResult.pk instead of id

Nick Sweeting 1 рік тому
батько
коміт
0420662174

+ 9 - 6
archivebox/abid_utils/abid.py

@@ -48,6 +48,8 @@ class ABID(NamedTuple):
 
 
     @classmethod
     @classmethod
     def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
     def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
+        assert buffer, f'Attempted to create ABID from null value {buffer}'
+
         buffer = str(buffer)
         buffer = str(buffer)
         if '_' in buffer:
         if '_' in buffer:
             prefix, suffix = buffer.split('_')
             prefix, suffix = buffer.split('_')
@@ -55,7 +57,7 @@ class ABID(NamedTuple):
             prefix, suffix = prefix.strip('_'), buffer
             prefix, suffix = prefix.strip('_'), buffer
 
 
         assert len(prefix) == ABID_PREFIX_LEN - 1   # length without trailing _
         assert len(prefix) == ABID_PREFIX_LEN - 1   # length without trailing _
-        assert len(suffix) == ABID_SUFFIX_LEN
+        assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
 
 
         return cls(
         return cls(
             prefix=abid_part_from_prefix(prefix),
             prefix=abid_part_from_prefix(prefix),
@@ -118,6 +120,7 @@ def abid_part_from_uri(uri: str) -> str:
     """
     """
     'E4A5CCD9'     # takes first 8 characters of sha256(url)
     'E4A5CCD9'     # takes first 8 characters of sha256(url)
     """
     """
+    uri = str(uri)
     return uri_hash(uri)[:ABID_URI_LEN]
     return uri_hash(uri)[:ABID_URI_LEN]
 
 
 def abid_part_from_ts(ts: Optional[datetime]) -> str:
 def abid_part_from_ts(ts: Optional[datetime]) -> str:
@@ -131,10 +134,11 @@ def abid_part_from_subtype(subtype: str) -> str:
     Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
     Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
     Also allows us to change the ulid spec later by putting special sigil values here.
     Also allows us to change the ulid spec later by putting special sigil values here.
     """
     """
+    subtype = str(subtype)
     if len(subtype) == ABID_SUBTYPE_LEN:
     if len(subtype) == ABID_SUBTYPE_LEN:
         return subtype
         return subtype
 
 
-    return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN]
+    return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
 
 
 def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
 def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
     """
     """
@@ -146,16 +150,15 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
     elif isinstance(rand, UUID):
     elif isinstance(rand, UUID):
         # if it's a uuid we take the last 6 characters of the ULID represation of it
         # if it's a uuid we take the last 6 characters of the ULID represation of it
         return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
         return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
-    elif isinstance(rand, str):
-        # if it's a string we take the last 6 characters of it verbatim
-        return rand[-ABID_RAND_LEN:]
     elif isinstance(rand, int):
     elif isinstance(rand, int):
         # if it's a BigAutoInteger field we convert it from an int to a 0-padded string
         # if it's a BigAutoInteger field we convert it from an int to a 0-padded string
         rand_str = str(rand)[-ABID_RAND_LEN:]
         rand_str = str(rand)[-ABID_RAND_LEN:]
         padding_needed = ABID_RAND_LEN - len(rand_str)
         padding_needed = ABID_RAND_LEN - len(rand_str)
         rand_str = ('0'*padding_needed) + rand_str
         rand_str = ('0'*padding_needed) + rand_str
         return rand_str
         return rand_str
-    raise NotImplementedError('Random component of an ABID can only be computed from a str or UUID')
+
+    # otherwise treat it as a string, take the last 6 characters of it verbatim
+    return str(rand)[-ABID_RAND_LEN:].upper()
 
 
 
 
 def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:
 def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:

+ 9 - 6
archivebox/abid_utils/models.py

@@ -28,14 +28,16 @@ from .abid import (
 # Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
 # Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
 ABIDField = partial(
 ABIDField = partial(
     CharIDField,
     CharIDField,
-    default=ulid.new,
     max_length=ABID_LEN,
     max_length=ABID_LEN,
-    help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)"
+    help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
+    default=None,
+    null=True,
+    blank=True,
+    db_index=True,
+    unique=True,
 )
 )
 
 
 
 
-
-
 class ABIDModel(models.Model):
 class ABIDModel(models.Model):
     abid_prefix: str = DEFAULT_ABID_PREFIX  # e.g. 'tag_'
     abid_prefix: str = DEFAULT_ABID_PREFIX  # e.g. 'tag_'
     abid_ts_src = 'None'                    # e.g. 'self.created'
     abid_ts_src = 'None'                    # e.g. 'self.created'
@@ -54,7 +56,8 @@ class ABIDModel(models.Model):
 
 
     def save(self, *args: Any, **kwargs: Any) -> None:
     def save(self, *args: Any, **kwargs: Any) -> None:
         if hasattr(self, 'abid'):
         if hasattr(self, 'abid'):
-            self.abid: ABID = self.abid or self.calculate_abid()
+            # self.abid = ABID.parse(self.abid) if self.abid else self.calculate_abid()
+            self.abid = self.calculate_abid()
         else:
         else:
             print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
             print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
             self.abid = self.calculate_abid()
             self.abid = self.calculate_abid()
@@ -106,7 +109,7 @@ class ABIDModel(models.Model):
         """
         """
         ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
         ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
         """
         """
-        return ABID.parse(self.abid) if self.abid else self.calculate_abid()
+        return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.calculate_abid()
 
 
     @property
     @property
     def ULID(self) -> ulid.ULID:
     def ULID(self) -> ulid.ULID:

+ 19 - 4
archivebox/api/models.py

@@ -12,7 +12,7 @@ from signal_webhooks.models import WebhookBase
 
 
 from django_stubs_ext.db.models import TypedModelMeta
 from django_stubs_ext.db.models import TypedModelMeta
 
 
-from abid_utils.models import ABIDModel
+from abid_utils.models import ABIDModel, ABIDField
 
 
 
 
 def generate_secret_token() -> str:
 def generate_secret_token() -> str:
@@ -21,7 +21,15 @@ def generate_secret_token() -> str:
 
 
 
 
 class APIToken(ABIDModel):
 class APIToken(ABIDModel):
+    abid_prefix = 'apt'
+    abid_ts_src = 'self.created'
+    abid_uri_src = 'self.token'
+    abid_subtype_src = 'self.user_id'
+    abid_rand_src = 'self.id'
+
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
+    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
+    abid = ABIDField(prefix=abid_prefix)
 
 
     user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
     user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
     token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
     token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
@@ -42,7 +50,8 @@ class APIToken(ABIDModel):
     def __json__(self) -> dict:
     def __json__(self) -> dict:
         return {
         return {
             "TYPE":             "APIToken",    
             "TYPE":             "APIToken",    
-            "id":               str(self.id),
+            "uuid":             str(self.id),
+            "abid":             str(self.calculate_abid()),
             "user_id":          str(self.user.id),
             "user_id":          str(self.user.id),
             "user_username":    self.user.username,
             "user_username":    self.user.username,
             "token":            self.token,
             "token":            self.token,
@@ -77,9 +86,14 @@ class OutboundWebhook(ABIDModel, WebhookBase):
     Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
     Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
         settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
         settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
     """
     """
-    ID_PREFIX = 'whk'
+    abid_prefix = 'whk'
+    abid_ts_src = 'self.created'
+    abid_uri_src = 'self.endpoint'
+    abid_subtype_src = 'self.ref'
+    abid_rand_src = 'self.id'
 
 
-    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
+    uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
+    abid = ABIDField(prefix=abid_prefix)
 
 
     WebhookBase._meta.get_field('name').help_text = (
     WebhookBase._meta.get_field('name').help_text = (
         'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
         'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
@@ -92,3 +106,4 @@ class OutboundWebhook(ABIDModel, WebhookBase):
 
 
     class Meta(WebhookBase.Meta):
     class Meta(WebhookBase.Meta):
         verbose_name = 'API Outbound Webhook'
         verbose_name = 'API Outbound Webhook'
+

+ 1 - 1
archivebox/api/v1_auth.py

@@ -47,6 +47,6 @@ def check_api_token(request, token_data: TokenAuthSchema):
         request=request,
         request=request,
     )
     )
     if user:
     if user:
-        return {"success": True, "user_id": str(user.id)}
+        return {"success": True, "user_id": str(user.pk)}
     
     
     return {"success": False, "user_id": None}
     return {"success": False, "user_id": None}

+ 46 - 20
archivebox/api/v1_core.py

@@ -10,7 +10,7 @@ from ninja import Router, Schema, FilterSchema, Field, Query
 from ninja.pagination import paginate
 from ninja.pagination import paginate
 
 
 from core.models import Snapshot, ArchiveResult, Tag
 from core.models import Snapshot, ArchiveResult, Tag
-
+from abid_utils.abid import ABID
 
 
 router = Router(tags=['Core Models'])
 router = Router(tags=['Core Models'])
 
 
@@ -20,9 +20,12 @@ router = Router(tags=['Core Models'])
 ### ArchiveResult #########################################################################
 ### ArchiveResult #########################################################################
 
 
 class ArchiveResultSchema(Schema):
 class ArchiveResultSchema(Schema):
-    id: UUID
+    pk: str
+    uuid: UUID
+    abid: str
+
+    snapshot_abid: str
 
 
-    snapshot_id: UUID
     snapshot_url: str
     snapshot_url: str
     snapshot_tags: str
     snapshot_tags: str
 
 
@@ -36,8 +39,16 @@ class ArchiveResultSchema(Schema):
     created: datetime
     created: datetime
 
 
     @staticmethod
     @staticmethod
-    def resolve_id(obj):
-        return obj.uuid
+    def resolve_pk(obj):
+        return str(obj.pk)
+
+    @staticmethod
+    def resolve_uuid(obj):
+        return str(obj.uuid)
+
+    @staticmethod
+    def resolve_abid(obj):
+        return str(obj.ABID)
 
 
     @staticmethod
     @staticmethod
     def resolve_created(obj):
     def resolve_created(obj):
@@ -47,16 +58,21 @@ class ArchiveResultSchema(Schema):
     def resolve_snapshot_url(obj):
     def resolve_snapshot_url(obj):
         return obj.snapshot.url
         return obj.snapshot.url
 
 
+    @staticmethod
+    def resolve_snapshot_abid(obj):
+        return str(obj.snapshot.ABID)
+
     @staticmethod
     @staticmethod
     def resolve_snapshot_tags(obj):
     def resolve_snapshot_tags(obj):
         return obj.snapshot.tags_str()
         return obj.snapshot.tags_str()
 
 
 
 
 class ArchiveResultFilterSchema(FilterSchema):
 class ArchiveResultFilterSchema(FilterSchema):
-    id: Optional[UUID] = Field(None, q='uuid')
+    uuid: Optional[UUID] = Field(None, q='uuid')
+    # abid: Optional[str] = Field(None, q='abid')
 
 
     search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
     search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
-    snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
+    snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid')
     snapshot_url: Optional[str] = Field(None, q='snapshot__url')
     snapshot_url: Optional[str] = Field(None, q='snapshot__url')
     snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
     snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
     
     
@@ -115,7 +131,9 @@ def get_archiveresult(request, archiveresult_id: str):
 
 
 
 
 class SnapshotSchema(Schema):
 class SnapshotSchema(Schema):
-    id: UUID
+    pk: str
+    uuid: UUID
+    abid: str
 
 
     url: str
     url: str
     tags: str
     tags: str
@@ -128,9 +146,17 @@ class SnapshotSchema(Schema):
 
 
     archiveresults: List[ArchiveResultSchema]
     archiveresults: List[ArchiveResultSchema]
 
 
-    # @staticmethod
-    # def resolve_id(obj):
-    #     return str(obj.id)
+    @staticmethod
+    def resolve_pk(obj):
+        return str(obj.pk)
+
+    @staticmethod
+    def resolve_uuid(obj):
+        return str(obj.uuid)
+
+    @staticmethod
+    def resolve_abid(obj):
+        return str(obj.ABID)
 
 
     @staticmethod
     @staticmethod
     def resolve_tags(obj):
     def resolve_tags(obj):
@@ -167,10 +193,10 @@ def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_arc
     results = filters.filter(qs)
     results = filters.filter(qs)
     return results
     return results
 
 
[email protected]("/snapshot/{snapshot_id}", response=SnapshotSchema)
-def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
[email protected]("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
+def get_snapshot(request, snapshot_uuid: str, with_archiveresults: bool=True):
     request.with_archiveresults = with_archiveresults
     request.with_archiveresults = with_archiveresults
-    snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+    snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
     return snapshot
     return snapshot
 
 
 
 
@@ -179,9 +205,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
 #     snapshot = Snapshot.objects.create(**payload.dict())
 #     snapshot = Snapshot.objects.create(**payload.dict())
 #     return snapshot
 #     return snapshot
 #
 #
-# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
-# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
-#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
+# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
+#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
 #
 #
 #     for attr, value in payload.dict().items():
 #     for attr, value in payload.dict().items():
 #         setattr(snapshot, attr, value)
 #         setattr(snapshot, attr, value)
@@ -189,9 +215,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
 #
 #
 #     return snapshot
 #     return snapshot
 #
 #
-# @router.delete("/snapshot/{snapshot_id}")
-# def delete_snapshot(request, snapshot_id: str):
-#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+# @router.delete("/snapshot/{snapshot_uuid}")
+# def delete_snapshot(request, snapshot_uuid: str):
+#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
 #     snapshot.delete()
 #     snapshot.delete()
 #     return {"success": True}
 #     return {"success": True}
 
 

+ 32 - 28
archivebox/core/admin.py

@@ -164,7 +164,7 @@ class SnapshotActionForm(ActionForm):
 class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
 class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
     sort_fields = ('title_str', 'url_str', 'added', 'files')
     sort_fields = ('title_str', 'url_str', 'added', 'files')
-    readonly_fields = ('info', 'bookmarked', 'added', 'updated')
+    readonly_fields = ('info', 'pk', 'uuid', 'abid', 'calculate_abid', 'bookmarked', 'added', 'updated')
     search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
     search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
     fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
     fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
     list_filter = ('added', 'updated', 'tags', 'archiveresult__status')
     list_filter = ('added', 'updated', 'tags', 'archiveresult__status')
@@ -213,12 +213,14 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
     #             </form>
     #             </form>
     #         ''',
     #         ''',
     #         csrf.get_token(self.request),
     #         csrf.get_token(self.request),
-    #         obj.id,
+    #         obj.pk,
     #     )
     #     )
 
 
     def info(self, obj):
     def info(self, obj):
         return format_html(
         return format_html(
             '''
             '''
+            PK: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
+            ABID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
             UUID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
             UUID: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
             Timestamp: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
             Timestamp: <code style="font-size: 10px; user-select: all">{}</code> &nbsp; &nbsp;
             URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
             URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
@@ -230,9 +232,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
             Extension: {} &nbsp; &nbsp;
             Extension: {} &nbsp; &nbsp;
             <br/><br/>
             <br/><br/>
             <a href="/archive/{}">View Snapshot index ➡️</a> &nbsp; &nbsp;
             <a href="/archive/{}">View Snapshot index ➡️</a> &nbsp; &nbsp;
-            <a href="/admin/core/snapshot/?id__exact={}">View actions ⚙️</a>
+            <a href="/admin/core/snapshot/?uuid__exact={}">View actions ⚙️</a>
             ''',
             ''',
-            obj.id,
+            obj.pk,
+            obj.ABID,
+            obj.uuid,
             obj.timestamp,
             obj.timestamp,
             obj.url_hash,
             obj.url_hash,
             '✅' if obj.is_archived else '❌',
             '✅' if obj.is_archived else '❌',
@@ -244,7 +248,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
             obj.headers and obj.headers.get('Content-Type') or '?',
             obj.headers and obj.headers.get('Content-Type') or '?',
             obj.extension or '?',
             obj.extension or '?',
             obj.timestamp,
             obj.timestamp,
-            obj.id,
+            obj.uuid,
         )
         )
 
 
     @admin.display(
     @admin.display(
@@ -411,38 +415,38 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
 class TagAdmin(admin.ModelAdmin):
 class TagAdmin(admin.ModelAdmin):
     list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
     list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
     sort_fields = ('id', 'name', 'slug')
     sort_fields = ('id', 'name', 'slug')
-    readonly_fields = ('id', 'num_snapshots', 'snapshots')
+    readonly_fields = ('id', 'pk', 'abid', 'calculate_abid', 'num_snapshots', 'snapshots')
     search_fields = ('id', 'name', 'slug')
     search_fields = ('id', 'name', 'slug')
     fields = (*readonly_fields, 'name', 'slug')
     fields = (*readonly_fields, 'name', 'slug')
     actions = ['delete_selected']
     actions = ['delete_selected']
     ordering = ['-id']
     ordering = ['-id']
 
 
-    def num_snapshots(self, obj):
+    def num_snapshots(self, tag):
         return format_html(
         return format_html(
             '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
             '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
-            obj.id,
-            obj.snapshot_set.count(),
+            tag.id,
+            tag.snapshot_set.count(),
         )
         )
 
 
-    def snapshots(self, obj):
-        total_count = obj.snapshot_set.count()
+    def snapshots(self, tag):
+        total_count = tag.snapshot_set.count()
         return mark_safe('<br/>'.join(
         return mark_safe('<br/>'.join(
             format_html(
             format_html(
                 '{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
                 '{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
                 snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
                 snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
-                snap.id,
-                snap.timestamp,
+                snap.pk,
+                snap.abid,
                 snap.url,
                 snap.url,
             )
             )
-            for snap in obj.snapshot_set.order_by('-updated')[:10]
-        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
+            for snap in tag.snapshot_set.order_by('-updated')[:10]
+        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">and {total_count-10} more...<a>' if tag.snapshot_set.count() > 10 else ''))
 
 
 
 
 @admin.register(ArchiveResult, site=archivebox_admin)
 @admin.register(ArchiveResult, site=archivebox_admin)
 class ArchiveResultAdmin(admin.ModelAdmin):
 class ArchiveResultAdmin(admin.ModelAdmin):
     list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
     list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
     sort_fields = ('start_ts', 'extractor', 'status')
     sort_fields = ('start_ts', 'extractor', 'status')
-    readonly_fields = ('id', 'uuid', 'snapshot_str', 'tags_str')
+    readonly_fields = ('id', 'ABID', 'snapshot_str', 'tags_str')
     search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
     search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
     fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version')
     fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version')
     autocomplete_fields = ['snapshot']
     autocomplete_fields = ['snapshot']
@@ -454,31 +458,31 @@ class ArchiveResultAdmin(admin.ModelAdmin):
     @admin.display(
     @admin.display(
         description='snapshot'
         description='snapshot'
     )
     )
-    def snapshot_str(self, obj):
+    def snapshot_str(self, result):
         return format_html(
         return format_html(
             '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
             '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
             '<small>{}</small>',
             '<small>{}</small>',
-            obj.snapshot.timestamp,
-            obj.snapshot.timestamp,
-            obj.snapshot.url[:128],
+            result.snapshot.timestamp,
+            result.snapshot.timestamp,
+            result.snapshot.url[:128],
         )
         )
 
 
     @admin.display(
     @admin.display(
         description='tags'
         description='tags'
     )
     )
-    def tags_str(self, obj):
-        return obj.snapshot.tags_str()
+    def tags_str(self, result):
+        return result.snapshot.tags_str()
 
 
-    def cmd_str(self, obj):
+    def cmd_str(self, result):
         return format_html(
         return format_html(
             '<pre>{}</pre>',
             '<pre>{}</pre>',
-            ' '.join(obj.cmd) if isinstance(obj.cmd, list) else str(obj.cmd),
+            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
         )
         )
 
 
-    def output_str(self, obj):
+    def output_str(self, result):
         return format_html(
         return format_html(
             '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
             '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
-            obj.snapshot.timestamp,
-            obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
-            obj.output,
+            result.snapshot.timestamp,
+            result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
+            result.output,
         )
         )

+ 13 - 10
archivebox/core/models.py

@@ -6,6 +6,7 @@ from django_stubs_ext.db.models import TypedModelMeta
 
 
 import json
 import json
 
 
+import uuid
 from uuid import uuid4
 from uuid import uuid4
 from pathlib import Path
 from pathlib import Path
 
 
@@ -17,7 +18,7 @@ from django.urls import reverse
 from django.db.models import Case, When, Value, IntegerField
 from django.db.models import Case, When, Value, IntegerField
 from django.contrib.auth.models import User   # noqa
 from django.contrib.auth.models import User   # noqa
 
 
-from abid_utils.models import ABIDModel
+from abid_utils.models import ABIDModel, ABIDField
 
 
 from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
 from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
 from ..system import get_dir_size
 from ..system import get_dir_size
@@ -58,6 +59,8 @@ class Tag(ABIDModel):
     abid_rand_src = 'self.id'
     abid_rand_src = 'self.id'
 
 
     id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
     id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
+    abid = ABIDField(prefix=abid_prefix)
+    # no uuid on Tags
 
 
     name = models.CharField(unique=True, blank=False, max_length=100)
     name = models.CharField(unique=True, blank=False, max_length=100)
 
 
@@ -108,9 +111,9 @@ class Snapshot(ABIDModel):
     abid_subtype_src = '"01"'
     abid_subtype_src = '"01"'
     abid_rand_src = 'self.id'
     abid_rand_src = 'self.id'
 
 
-    id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
-
-    # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True)
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)  # legacy pk
+    uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
+    abid = ABIDField(prefix=abid_prefix)
 
 
     url = models.URLField(unique=True, db_index=True)
     url = models.URLField(unique=True, db_index=True)
     timestamp = models.CharField(max_length=32, unique=True, db_index=True)
     timestamp = models.CharField(max_length=32, unique=True, db_index=True)
@@ -153,7 +156,7 @@ class Snapshot(ABIDModel):
         return load_link_details(self.as_link())
         return load_link_details(self.as_link())
 
 
     def tags_str(self, nocache=True) -> str | None:
     def tags_str(self, nocache=True) -> str | None:
-        cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags'
+        cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
         calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
         calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
         if nocache:
         if nocache:
             tags_str = calc_tags_str()
             tags_str = calc_tags_str()
@@ -200,7 +203,7 @@ class Snapshot(ABIDModel):
 
 
     @cached_property
     @cached_property
     def archive_size(self):
     def archive_size(self):
-        cache_key = f'{str(self.id)[:12]}-{(self.updated or self.added).timestamp()}-size'
+        cache_key = f'{str(self.pk)[:12]}-{(self.updated or self.added).timestamp()}-size'
 
 
         def calc_dir_size():
         def calc_dir_size():
             try:
             try:
@@ -272,7 +275,7 @@ class Snapshot(ABIDModel):
         tags_id = []
         tags_id = []
         for tag in tags:
         for tag in tags:
             if tag.strip():
             if tag.strip():
-                tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
+                tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
         self.tags.clear()
         self.tags.clear()
         self.tags.add(*tags_id)
         self.tags.add(*tags_id)
 
 
@@ -322,9 +325,9 @@ class ArchiveResult(ABIDModel):
     abid_rand_src = 'self.uuid'
     abid_rand_src = 'self.uuid'
     EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
     EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
 
 
-    id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
-    uuid = models.UUIDField(default=uuid4, editable=True)
-    # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True)
+    id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')   # legacy pk
+    uuid = models.UUIDField(default=uuid.uuid4, editable=False, unique=True)      # legacy uuid
+    abid = ABIDField(prefix=abid_prefix)
 
 
     snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
     snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
     extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
     extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)

+ 15 - 14
archivebox/core/settings.py

@@ -62,13 +62,13 @@ INSTALLED_APPS = [
     'django.contrib.staticfiles',
     'django.contrib.staticfiles',
     'django.contrib.admin',
     'django.contrib.admin',
 
 
+    'signal_webhooks',
     'abid_utils',
     'abid_utils',
     'core',
     'core',
     'api',
     'api',
 
 
     'admin_data_views',
     'admin_data_views',
 
 
-    'signal_webhooks',
     'django_extensions',
     'django_extensions',
 ]
 ]
 
 
@@ -248,26 +248,27 @@ DATABASES = {
         'TIME_ZONE': TIMEZONE,
         'TIME_ZONE': TIMEZONE,
         # DB setup is sometimes modified at runtime by setup_django() in config.py
         # DB setup is sometimes modified at runtime by setup_django() in config.py
     },
     },
-    'cache': {
-        'ENGINE': 'django.db.backends.sqlite3',
-        'NAME': CACHE_DB_PATH,
-        'OPTIONS': {
-            'timeout': 60,
-            'check_same_thread': False,
-        },
-        'TIME_ZONE': TIMEZONE,
-    },
+    # 'cache': {
+    #     'ENGINE': 'django.db.backends.sqlite3',
+    #     'NAME': CACHE_DB_PATH,
+    #     'OPTIONS': {
+    #         'timeout': 60,
+    #         'check_same_thread': False,
+    #     },
+    #     'TIME_ZONE': TIMEZONE,
+    # },
 }
 }
+MIGRATION_MODULES = {'signal_webhooks': None}
 
 
 # as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
 # as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
 
 
 
 
 CACHES = {
 CACHES = {
-    'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
-    'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
-    'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
-    'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
+    'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
+    # 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
+    # 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
+    # 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
 }
 }
 
 
 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'

+ 2 - 2
archivebox/core/views.py

@@ -226,8 +226,8 @@ class SnapshotView(View):
                             '<i><b>Next steps:</i></b><br/>'
                             '<i><b>Next steps:</i></b><br/>'
                             f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
                             f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
                             f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
                             f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
-                            f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'
-                            f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
+                            f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>'
+                            f'- go to the <a href="/admin/core/snapshot/?uuid__startswith={snapshot.uuid}" target="_top">Snapshot actions</a> to re-archive<br/>'
                             '- or return to <a href="/" target="_top">the main index...</a></div>'
                             '- or return to <a href="/" target="_top">the main index...</a></div>'
                             '</center>'
                             '</center>'
                         ),
                         ),

+ 1 - 1
archivebox/extractors/__init__.py

@@ -160,7 +160,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                     # bump the updated time on the main Snapshot here, this is critical
                     # bump the updated time on the main Snapshot here, this is critical
                     # to be able to cache summaries of the ArchiveResults for a given
                     # to be able to cache summaries of the ArchiveResults for a given
                     # snapshot without having to load all the results from the DB each time.
                     # snapshot without having to load all the results from the DB each time.
-                    # (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
+                    # (we use {Snapshot.pk}-{Snapshot.updated} as the cache key and assume
                     # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
                     # ArchiveResults are unchanged as long as the updated timestamp is unchanged)
                     snapshot.save()
                     snapshot.save()
                 else:
                 else:

+ 1 - 1
archivebox/index/html.py

@@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
 
 
 
 
 def snapshot_icons(snapshot) -> str:
 def snapshot_icons(snapshot) -> str:
-    cache_key = f'{snapshot.id}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
+    cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
     
     
     def calc_snapshot_icons():
     def calc_snapshot_icons():
         from core.models import EXTRACTOR_CHOICES
         from core.models import EXTRACTOR_CHOICES

+ 17 - 2
archivebox/index/schema.py

@@ -192,6 +192,9 @@ class Link:
         if extended:
         if extended:
             info.update({
             info.update({
                 'snapshot_id': self.snapshot_id,
                 'snapshot_id': self.snapshot_id,
+                'snapshot_uuid': self.snapshot_uuid,
+                'snapshot_abid': self.snapshot_abid,
+
                 'link_dir': self.link_dir,
                 'link_dir': self.link_dir,
                 'archive_path': self.archive_path,
                 'archive_path': self.archive_path,
                 
                 
@@ -261,9 +264,21 @@ class Link:
         return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
         return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
 
 
     @cached_property
     @cached_property
-    def snapshot_id(self):
+    def snapshot(self):
         from core.models import Snapshot
         from core.models import Snapshot
-        return str(Snapshot.objects.only('id').get(url=self.url).id)
+        return Snapshot.objects.only('uuid').get(url=self.url)
+
+    @cached_property
+    def snapshot_id(self):
+        return str(self.snapshot.pk)
+
+    @cached_property
+    def snapshot_uuid(self):
+        return str(self.snapshot.uuid)
+
+    @cached_property
+    def snapshot_abid(self):
+        return str(self.snapshot.ABID)
 
 
     @classmethod
     @classmethod
     def field_names(cls):
     def field_names(cls):

+ 4 - 3
archivebox/index/sql.py

@@ -45,7 +45,8 @@ def write_link_to_sql_index(link: Link):
     info.pop('tags')
     info.pop('tags')
 
 
     try:
     try:
-        info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
+        snapshot = Snapshot.objects.get(url=link.url)
+        info["timestamp"] = snapshot.timestamp
     except Snapshot.DoesNotExist:
     except Snapshot.DoesNotExist:
         while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
         while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
             info["timestamp"] = str(float(info["timestamp"]) + 1.0)
             info["timestamp"] = str(float(info["timestamp"]) + 1.0)
@@ -57,7 +58,7 @@ def write_link_to_sql_index(link: Link):
         for entry in entries:
         for entry in entries:
             if isinstance(entry, dict):
             if isinstance(entry, dict):
                 result, _ = ArchiveResult.objects.get_or_create(
                 result, _ = ArchiveResult.objects.get_or_create(
-                    snapshot_id=snapshot.id,
+                    snapshot_id=snapshot.pk,
                     extractor=extractor,
                     extractor=extractor,
                     start_ts=parse_date(entry['start_ts']),
                     start_ts=parse_date(entry['start_ts']),
                     defaults={
                     defaults={
@@ -71,7 +72,7 @@ def write_link_to_sql_index(link: Link):
                 )
                 )
             else:
             else:
                 result, _ = ArchiveResult.objects.update_or_create(
                 result, _ = ArchiveResult.objects.update_or_create(
-                    snapshot_id=snapshot.id,
+                    snapshot_id=snapshot.pk,
                     extractor=extractor,
                     extractor=extractor,
                     start_ts=parse_date(entry.start_ts),
                     start_ts=parse_date(entry.start_ts),
                     defaults={
                     defaults={

+ 5 - 5
archivebox/search/__init__.py

@@ -39,7 +39,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
         backend = import_backend()
         backend = import_backend()
         if snap:
         if snap:
             try:
             try:
-                backend.index(snapshot_id=str(snap.id), texts=texts)
+                backend.index(snapshot_id=str(snap.pk), texts=texts)
             except Exception as err:
             except Exception as err:
                 stderr()
                 stderr()
                 stderr(
                 stderr(
@@ -54,7 +54,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
     if search_backend_enabled():
     if search_backend_enabled():
         backend = import_backend()
         backend = import_backend()
         try:
         try:
-            snapshot_ids = backend.search(query)
+            snapshot_pks = backend.search(query)
         except Exception as err:
         except Exception as err:
             stderr()
             stderr()
             stderr(
             stderr(
@@ -64,7 +64,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
             raise
             raise
         else:
         else:
             # TODO preserve ordering from backend
             # TODO preserve ordering from backend
-            qsearch = Snapshot.objects.filter(pk__in=snapshot_ids)
+            qsearch = Snapshot.objects.filter(pk__in=snapshot_pks)
             return qsearch
             return qsearch
     
     
     return Snapshot.objects.none()
     return Snapshot.objects.none()
@@ -74,9 +74,9 @@ def flush_search_index(snapshots: QuerySet):
     if not indexing_enabled() or not snapshots:
     if not indexing_enabled() or not snapshots:
         return
         return
     backend = import_backend()
     backend = import_backend()
-    snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))
+    snapshot_pks = (str(pk) for pk in snapshots.values_list('pk', flat=True))
     try:
     try:
-        backend.flush(snapshot_ids)
+        backend.flush(snapshot_pks)
     except Exception as err:
     except Exception as err:
         stderr()
         stderr()
         stderr(
         stderr(

+ 1 - 1
archivebox/templates/admin/snapshots_grid.html

@@ -147,7 +147,7 @@
     {% for obj in results %}
     {% for obj in results %}
       <div class="card">
       <div class="card">
           <div class="card-info">
           <div class="card-info">
-            <a href="{% url 'admin:core_snapshot_change' obj.id %}">
+            <a href="{% url 'admin:core_snapshot_change' obj.pk %}">
               <span class="timestamp">{{obj.added}}</span>
               <span class="timestamp">{{obj.added}}</span>
             </a>
             </a>
             <label>
             <label>