2
0
Эх сурвалжийг харах

add ulid and typeid to Snapshot and ArchiveResult

Nick Sweeting 1 жил өмнө
parent
commit
33bc4622a0

+ 82 - 3
archivebox/core/models.py

@@ -2,10 +2,13 @@ __package__ = 'archivebox.core'
 
 
 import uuid
+import ulid
 import json
+import hashlib
+from typeid import TypeID
 
 from pathlib import Path
-from typing import Optional, List
+from typing import Optional, List, NamedTuple
 from importlib import import_module
 
 from django.db import models
@@ -37,6 +40,13 @@ except AttributeError:
     JSONField = jsonfield.JSONField
 
 
+class ULIDParts(NamedTuple):
+    timestamp: str
+    url: str
+    subtype: str
+    randomness: str
+
+
 class Tag(models.Model):
     """
     Based on django-taggit model
@@ -99,6 +109,38 @@ class Snapshot(models.Model):
 
     keys = ('url', 'timestamp', 'title', 'tags', 'updated')
 
+    @property
+    def ulid_from_timestamp(self):
+        return str(ulid.from_timestamp(self.added))[:10]
+
+    @property
+    def ulid_from_urlhash(self):
+        return str(ulid.from_randomness(self.url_hash))[10:18]
+
+    @property
+    def ulid_from_type(self):
+        return '00'
+
+    @property
+    def ulid_from_randomness(self):
+        return str(ulid.from_uuid(self.id))[20:]
+
+    @property
+    def ulid_tuple(self) -> ULIDParts:
+        return ULIDParts(self.ulid_from_timestamp, self.ulid_from_urlhash, self.ulid_from_type, self.ulid_from_randomness)
+
+    @property
+    def ulid(self):
+        return ulid.parse(''.join(self.ulid_tuple))
+
+    @property
+    def uuid(self):
+        return self.ulid.uuid
+
+    @property
+    def typeid(self):
+        return TypeID.from_uuid(prefix='snapshot', suffix=self.ulid.uuid)
+
     def __repr__(self) -> str:
         title = self.title or '-'
         return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
@@ -163,7 +205,10 @@ class Snapshot(models.Model):
 
     @cached_property
     def url_hash(self):
-        return hashurl(self.url)
+        # return hashurl(self.url)
+        url_hash = hashlib.new('sha256')
+        url_hash.update(self.url.encode('utf-8'))
+        return url_hash.hexdigest()[:16]
 
     @cached_property
     def base_url(self):
@@ -271,7 +316,7 @@ class ArchiveResult(models.Model):
     EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
 
     id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
-    uuid = models.UUIDField(default=uuid.uuid4, editable=False)
+    uuid = models.UUIDField(default=uuid.uuid4, editable=True)
 
     snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
     extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
@@ -292,6 +337,40 @@ class ArchiveResult(models.Model):
     def snapshot_dir(self):
         return Path(self.snapshot.link_dir)
 
+    @property
+    def ulid_from_timestamp(self):
+        return self.snapshot.ulid_from_timestamp
+
+    @property
+    def ulid_from_urlhash(self):
+        return self.snapshot.ulid_from_urlhash
+
+    @property
+    def ulid_from_snapshot(self):
+        return str(self.snapshot.ulid)[:18]
+
+    @property
+    def ulid_from_type(self):
+        return hashlib.sha256(self.extractor.encode('utf-8')).hexdigest()[:2]
+
+    @property
+    def ulid_from_randomness(self):
+        return str(ulid.from_uuid(self.uuid))[20:]
+
+    @property
+    def ulid_tuple(self) -> ULIDParts:
+        return ULIDParts(self.ulid_from_timestamp, self.ulid_from_urlhash, self.ulid_from_type, self.ulid_from_randomness)
+
+    @property
+    def ulid(self):
+        final_ulid = ulid.parse(''.join(self.ulid_tuple))
+        # TODO: migrate self.uuid to match this new uuid
+        # self.uuid = final_ulid.uuid
+        return final_ulid
+
+    @property
+    def typeid(self):
+        return TypeID.from_uuid(prefix='result', suffix=self.ulid.uuid)
 
     @property
     def extractor_module(self):

+ 1 - 1
archivebox/core/settings.py

@@ -263,7 +263,7 @@ CACHES = {
     'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
     'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
     'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
-    # 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
+    'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
 }
 
 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'

+ 2 - 0
pyproject.toml

@@ -37,6 +37,8 @@ dependencies = [
     #  - See Github issues for more...
     "django-signal-webhooks>=0.3.0",
     "django-admin-data-views>=0.3.1",
+    "ulid-py>=1.1.0",
+    "typeid-python>=0.3.0",
 ]
 
 homepage = "https://github.com/ArchiveBox/ArchiveBox"