浏览代码

abid gradual improvements, some regrets

Nick Sweeting 1 年之前
父节点
当前提交
d060eaa499
共有 4 个文件被更改,包括 51 次插入31 次删除
  1. 28 14
      archivebox/abid_utils/abid.py
  2. 12 13
      archivebox/abid_utils/admin.py
  3. 2 1
      archivebox/core/admin.py
  4. 9 3
      archivebox/index/html.py

+ 28 - 14
archivebox/abid_utils/abid.py

@@ -1,4 +1,6 @@
-from typing import NamedTuple, Any, Union, Optional
+__package__ = 'archivebox.abid_utils'
+
+from typing import NamedTuple, Any, Union, Optional, Dict
 
 import ulid
 import uuid6
@@ -9,6 +11,7 @@ from uuid import UUID
 from typeid import TypeID            # type: ignore[import-untyped]
 from datetime import datetime
 
+from ..util import enforce_types
 
 
 ABID_PREFIX_LEN = 4
@@ -108,6 +111,7 @@ class ABID(NamedTuple):
 ####################################################
 
 
+@enforce_types
 def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
     """
     'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
@@ -130,17 +134,19 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
 
     return hashlib.sha256(uri_bytes).hexdigest().upper()
 
-def abid_part_from_prefix(prefix: Optional[str]) -> str:
+@enforce_types
+def abid_part_from_prefix(prefix: str) -> str:
     """
     'snp_'
     """
-    if prefix is None:
-        return 'obj_'
+    # if prefix is None:
+    #     return 'obj_'
 
     prefix = prefix.strip('_').lower()
     assert len(prefix) == 3
     return prefix + '_'
 
+@enforce_types
 def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
     """
     'E4A5CCD9'     # takes first 8 characters of sha256(url)
@@ -148,12 +154,14 @@ def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
     uri = str(uri)
     return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
 
-def abid_part_from_ts(ts: Optional[datetime]) -> str:
+@enforce_types
+def abid_part_from_ts(ts: datetime) -> str:
     """
     '01HX9FPYTR'   # produces 10 character Timestamp section of ulid based on added date
     """
-    return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
+    return str(ulid.from_timestamp(ts))[:ABID_TS_LEN]
 
+@enforce_types
 def abid_part_from_subtype(subtype: str) -> str:
     """
     Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
@@ -165,6 +173,7 @@ def abid_part_from_subtype(subtype: str) -> str:
 
     return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
 
+@enforce_types
 def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
     """
     'ZYEBQE'   # takes last 6 characters of randomness from existing legacy uuid db field
@@ -186,17 +195,22 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
     return str(rand)[-ABID_RAND_LEN:].upper()
 
 
-def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
+@enforce_types
+def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]:
+    return {
+        'prefix': abid_part_from_prefix(prefix),
+        'ts': abid_part_from_ts(ts),
+        'uri': abid_part_from_uri(uri, salt=salt),
+        'subtype': abid_part_from_subtype(subtype),
+        'rand': abid_part_from_rand(rand),
+    }
+
+@enforce_types
+def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID:
     """
     Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
     """
 
-    abid = ABID(
-        prefix=abid_part_from_prefix(prefix),
-        ts=abid_part_from_ts(ts),
-        uri=abid_part_from_uri(uri, salt=salt),
-        subtype=abid_part_from_subtype(subtype),
-        rand=abid_part_from_rand(rand),
-    )
+    abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt))
     assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
     return abid

+ 12 - 13
archivebox/abid_utils/admin.py

@@ -16,21 +16,20 @@ def highlight_diff(display_val, compare_val):
     display_val = str(display_val)
     compare_val = str(compare_val)
 
-    diff_chars = mark_safe('').join(
+    return mark_safe(''.join(
         format_html('<span style="color: red;">{}</span>', display_val[i])
         if display_val[i] != compare_val[i] else
         format_html('<span display="color: black">{}</span>', display_val[i])
         for i in range(len(display_val))
-    )
-    return diff_chars
+    ))
 
 def get_abid_info(self, obj, request=None):
     try:
         abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
 
-        fresh_abid = obj.generate_abid()
-        fresh_abid_diff = f' != &nbsp; .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
-        fresh_uuid_diff = f' != &nbsp; .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
+        fresh_abid = obj.ABID_FRESH
+        fresh_abid_diff = f' != &nbsp; .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
+        fresh_uuid_diff = f' != &nbsp; .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
 
         id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
         id_abid_diff = f' !=  .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
@@ -74,16 +73,16 @@ def get_abid_info(self, obj, request=None):
             </div>
             ''',
             obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
-            str(obj.abid), mark_safe(fresh_abid_diff),
-            str(obj.ABID.uuid), mark_safe(fresh_uuid_diff),
+            highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
+            highlight_diff(obj.ABID.uuid, fresh_abid.uuid), mark_safe(fresh_uuid_diff),
             str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
             # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
             # str(fresh_abid), mark_safe(fresh_abid_diff),
-            obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
-            obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
-            obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
-            obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
-            str(getattr(obj, 'old_id', '')),
+            highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
+            highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
+            highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
+            highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
+            highlight_diff(getattr(obj, 'old_id', ''), obj.pk),
         )
     except Exception as e:
         return str(e)

+ 2 - 1
archivebox/core/admin.py

@@ -352,7 +352,7 @@ class SnapshotActionForm(ActionForm):
 @admin.register(Snapshot, site=archivebox_admin)
 class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
-    sort_fields = ('title_str', 'url_str', 'added', 'files')
+    sort_fields = ('title_str', 'url_str', 'added')
     readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
     search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
     list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
@@ -510,6 +510,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
         # ordering='archiveresult_count',
     )
     def files(self, obj):
+        # return '-'
         return snapshot_icons(obj)
 
 

+ 9 - 3
archivebox/index/html.py

@@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
 
 
 def snapshot_icons(snapshot) -> str:
-    cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
+    cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}'
     
     def calc_snapshot_icons():
         from core.models import ArchiveResult
@@ -133,6 +133,7 @@ def snapshot_icons(snapshot) -> str:
         else:
             archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
 
+        # import ipdb; ipdb.set_trace()
         link = snapshot.as_link()
         path = link.archive_path
         canon = link.canonical_outputs()
@@ -197,7 +198,12 @@ def snapshot_icons(snapshot) -> str:
         # print(((end - start).total_seconds()*1000) // 1, 'ms')
         return result
 
-    return cache.get_or_set(cache_key, calc_snapshot_icons)
-    # return calc_snapshot_icons()
+    cache_result = cache.get(cache_key)
+    if cache_result:
+        return cache_result
+    
+    fresh_result = calc_snapshot_icons()
+    cache.set(cache_key, fresh_result, timeout=60 * 60 * 24)
+    return fresh_result