Browse Source

config and attr access improvements

Nick Sweeting 1 year ago
parent
commit
0285aa52a0

+ 1 - 1
archivebox/abid_utils/abid.py

@@ -115,7 +115,7 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
     if isinstance(uri, bytes):
         uri_str: str = uri.decode()
     else:
-        uri_str = uri
+        uri_str = str(uri)
 
     # only hash the domain part of URLs
     if '://' in uri_str:

+ 11 - 8
archivebox/abid_utils/models.py

@@ -15,6 +15,7 @@ from charidfield import CharIDField  # type: ignore[import-untyped]
 
 from django.conf import settings
 from django.db import models
+from django.utils import timezone
 from django.db.utils import OperationalError
 from django.contrib.auth import get_user_model
 
@@ -115,7 +116,8 @@ class ABIDModel(models.Model):
             raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
 
         if not ts:
-            ts = datetime.utcfromtimestamp(0)
+            # default to unix epoch with 00:00:00 UTC
+            ts = datetime.fromtimestamp(0, timezone.utc)     # equivalent to: ts = datetime.utcfromtimestamp(0)
             print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
 
         if not uri:
@@ -146,7 +148,13 @@ class ABIDModel(models.Model):
         """
         ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
         """
-        abid = None
+        
+        # if object is not yet saved to DB, always generate fresh ABID from values
+        if self._state.adding:
+            return self.generate_abid()
+        
+        # otherwise DB is single source of truth, load ABID from existing db pk
+        abid: ABID | None = None
         try:
             abid = abid or ABID.parse(self.pk)
         except Exception:
@@ -158,12 +166,7 @@ class ABIDModel(models.Model):
             pass
 
         try:
-            abid = abid or ABID.parse(self.uuid)
-        except Exception:
-            pass
-
-        try:
-            abid = abid or ABID.parse(self.abid)
+            abid = abid or ABID.parse(cast(str, self.abid))
         except Exception:
             pass
 

+ 6 - 3
archivebox/api/auth.py

@@ -1,6 +1,6 @@
 __package__ = 'archivebox.api'
 
-from typing import Optional
+from typing import Optional, cast
 
 from django.http import HttpRequest
 from django.contrib.auth import login
@@ -18,12 +18,13 @@ def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[Abs
 
     submitted_empty_form = token in ('string', '', None)
     if submitted_empty_form:
+        assert request is not None, 'No request provided for API key authentication'
         user = request.user       # see if user is authed via django session and use that as the default
     else:
         try:
             token = APIToken.objects.get(token=token)
             if token.is_valid():
-                user = token.user
+                user = token.created_by
         except APIToken.DoesNotExist:
             pass
 
@@ -38,6 +39,7 @@ def auth_using_password(username, password, request: Optional[HttpRequest]=None)
     
     submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
     if submitted_empty_form:
+        assert request is not None, 'No request provided for API key authentication'
         user = request.user       # see if user is authed via django session and use that as the default
     else:
         user = authenticate(
@@ -47,8 +49,9 @@ def auth_using_password(username, password, request: Optional[HttpRequest]=None)
 
     if not user:
         print('[❌] Failed to authenticate API user using API Key:', request)
+        user = None
 
-    return user
+    return cast(AbstractBaseUser | None, user)
 
 
 ### Base Auth Types

+ 13 - 13
archivebox/api/models.py

@@ -12,7 +12,8 @@ from signal_webhooks.models import WebhookBase
 
 from django_stubs_ext.db.models import TypedModelMeta
 
-from abid_utils.models import ABIDModel, ABIDField
+from abid_utils.models import ABIDModel, ABIDField, get_or_create_system_user_pk
+
 
 
 def generate_secret_token() -> str:
@@ -32,15 +33,13 @@ class APIToken(ABIDModel):
     abid_rand_src = 'self.id'
 
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
-    uuid = models.UUIDField(blank=True, null=True, editable=False, unique=True)
     abid = ABIDField(prefix=abid_prefix)
 
-    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
-    token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
-    
+    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
     created = models.DateTimeField(auto_now_add=True)
+
+    token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
     expires = models.DateTimeField(null=True, blank=True)
-    
 
     class Meta(TypedModelMeta):
         verbose_name = "API Key"
@@ -50,7 +49,7 @@ class APIToken(ABIDModel):
         return self.token
 
     def __repr__(self) -> str:
-        return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
+        return f'<APIToken user={self.created_by.username} token=************{self.token[-4:]}>'
 
     def __json__(self) -> dict:
         return {
@@ -63,10 +62,6 @@ class APIToken(ABIDModel):
             "expires":          self.expires_as_iso8601,
         }
 
-    @property
-    def ulid(self):
-        return self.get_abid().ulid
-
     @property
     def expires_as_iso8601(self):
         """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
@@ -100,10 +95,15 @@ class OutboundWebhook(ABIDModel, WebhookBase):
     abid_subtype_src = 'self.ref'
     abid_rand_src = 'self.id'
 
-    id = models.UUIDField(blank=True, null=True, unique=True, editable=True)
-    uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     abid = ABIDField(prefix=abid_prefix)
 
+    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
+    created = models.DateTimeField(auto_now_add=True)
+    modified = models.DateTimeField(auto_now=True)
+
+    # More fields here: WebhookBase...
+
     WebhookBase._meta.get_field('name').help_text = (
         'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
     WebhookBase._meta.get_field('signal').help_text = (

+ 6 - 6
archivebox/api/v1_core.py

@@ -309,9 +309,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
 #     snapshot = Snapshot.objects.create(**payload.dict())
 #     return snapshot
 #
-# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
-# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
-#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
+# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
+# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
+#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
 #
 #     for attr, value in payload.dict().items():
 #         setattr(snapshot, attr, value)
@@ -319,9 +319,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
 #
 #     return snapshot
 #
-# @router.delete("/snapshot/{snapshot_uuid}")
-# def delete_snapshot(request, snapshot_uuid: str):
-#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
+# @router.delete("/snapshot/{snapshot_id}")
+# def delete_snapshot(request, snapshot_id: str):
+#     snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
 #     snapshot.delete()
 #     return {"success": True}
 

+ 37 - 8
archivebox/config.py

@@ -44,6 +44,7 @@ from collections import defaultdict
 import importlib.metadata
 
 from .config_stubs import (
+    AttrDict,
     SimpleConfigValueDict,
     ConfigValue,
     ConfigDict,
@@ -379,6 +380,29 @@ ALLOWED_IN_OUTPUT_DIR = {
 ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
 
 
+CONSTANTS = {
+    "PACKAGE_DIR_NAME":             {'default': lambda c: PACKAGE_DIR_NAME},
+    "TEMPLATES_DIR_NAME":           {'default': lambda c: TEMPLATES_DIR_NAME},
+    "ARCHIVE_DIR_NAME":             {'default': lambda c: ARCHIVE_DIR_NAME},
+    "SOURCES_DIR_NAME":             {'default': lambda c: SOURCES_DIR_NAME},
+    "LOGS_DIR_NAME":                {'default': lambda c: LOGS_DIR_NAME},
+    "CACHE_DIR_NAME":               {'default': lambda c: CACHE_DIR_NAME},
+    "PERSONAS_DIR_NAME":            {'default': lambda c: PERSONAS_DIR_NAME},
+    "CRONTABS_DIR_NAME":            {'default': lambda c: CRONTABS_DIR_NAME},
+    "SQL_INDEX_FILENAME":           {'default': lambda c: SQL_INDEX_FILENAME},
+    "JSON_INDEX_FILENAME":          {'default': lambda c: JSON_INDEX_FILENAME},
+    "HTML_INDEX_FILENAME":          {'default': lambda c: HTML_INDEX_FILENAME},
+    "ROBOTS_TXT_FILENAME":          {'default': lambda c: ROBOTS_TXT_FILENAME},
+    "FAVICON_FILENAME":             {'default': lambda c: FAVICON_FILENAME},
+    "CONFIG_FILENAME":              {'default': lambda c: CONFIG_FILENAME},
+    "DEFAULT_CLI_COLORS":           {'default': lambda c: DEFAULT_CLI_COLORS},
+    "ANSI":                         {'default': lambda c: ANSI},
+    "COLOR_DICT":                   {'default': lambda c: COLOR_DICT},
+    "STATICFILE_EXTENSIONS":        {'default': lambda c: STATICFILE_EXTENSIONS},
+    "ALLOWED_IN_OUTPUT_DIR":        {'default': lambda c: ALLOWED_IN_OUTPUT_DIR},
+    "ALLOWDENYLIST_REGEX_FLAGS":    {'default': lambda c: ALLOWDENYLIST_REGEX_FLAGS},
+}
+
 ############################## Version Config ##################################
 
 def get_system_user() -> str:
@@ -498,9 +522,13 @@ def can_upgrade(config):
 
 ############################## Derived Config ##################################
 
+
+
 # These are derived/computed values calculated *after* all user-provided config values are ingested
 # they appear in `archivebox config` output and are intended to be read-only for the user
 DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
+    **CONSTANTS,
+
     'TERM_WIDTH':               {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
     'USER':                     {'default': lambda c: get_system_user()},
     'ANSI':                     {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
@@ -678,28 +706,29 @@ def load_config_val(key: str,
     raise Exception('Config values can only be str, bool, int, or json')
 
 
-def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
+def load_config_file(out_dir: str | None=None) -> Optional[ConfigDict]:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
     out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
+    assert out_dir and out_dir.is_dir()
     config_path = Path(out_dir) / CONFIG_FILENAME
     if config_path.exists():
         config_file = ConfigParser()
         config_file.optionxform = str
         config_file.read(config_path)
         # flatten into one namespace
-        config_file_vars = {
+        config_file_vars = ConfigDict({
             key.upper(): val
             for section, options in config_file.items()
                 for key, val in options.items()
-        }
+        })
         # print('[i] Loaded config file', os.path.abspath(config_path))
         # print(config_file_vars)
         return config_file_vars
     return None
 
 
-def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
+def write_config_file(config: Dict[str, str], out_dir: str | None=None) -> ConfigDict:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
     from .system import atomic_write
@@ -740,7 +769,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
             existing_config = dict(config_file[section])
         else:
             existing_config = {}
-        config_file[section] = {**existing_config, key: val}
+        config_file[section] = ConfigDict({**existing_config, key: val})
 
     # always make sure there's a SECRET_KEY defined for Django
     existing_secret_key = None
@@ -815,7 +844,7 @@ def load_config(defaults: ConfigDefaultDict,
             # raise
             raise SystemExit(2)
 
-    return extended_config
+    return AttrDict(extended_config)
 
 
 def parse_version_string(version: str) -> Tuple[int, int, int]:
@@ -1198,14 +1227,14 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
 
 
 def load_all_config():
-    CONFIG: ConfigDict = {}
+    CONFIG: ConfigDict = ConfigDict()
     for section_name, section_config in CONFIG_SCHEMA.items():
         CONFIG = load_config(section_config, CONFIG)
 
     return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
 
 # add all final config values in CONFIG to globals in this file
-CONFIG = load_all_config()
+CONFIG: ConfigDict = load_all_config()
 globals().update(CONFIG)
 # this lets us do:  from .config import DEBUG, MEDIA_TIMEOUT, ...
 

+ 6 - 1
archivebox/config_stubs.py

@@ -9,11 +9,15 @@ SimpleConfigValueDict = Dict[str, SimpleConfigValue]
 SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
 ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
 
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.__dict__ = self
 
 class BaseConfig(TypedDict):
     pass
 
-class ConfigDict(BaseConfig, total=False):
+class ConfigDict(BaseConfig, AttrDict, total=False):
     """
     # Regenerate by pasting this quine into `archivebox shell` 🥚
     from archivebox.config import ConfigDict, CONFIG_DEFAULTS
@@ -28,6 +32,7 @@ class ConfigDict(BaseConfig, total=False):
                 print(f'    {key}: {Type.__name__}')
         print()
     """
+
     IS_TTY: bool
     USE_COLOR: bool
     SHOW_PROGRESS: bool

+ 23 - 23
archivebox/core/admin.py

@@ -7,6 +7,7 @@ from io import StringIO
 from pathlib import Path
 from contextlib import redirect_stdout
 from datetime import datetime, timezone
+from typing import Dict, Any
 
 from django.contrib import admin
 from django.db.models import Count, Q
@@ -16,10 +17,12 @@ from django.utils.safestring import mark_safe
 from django.shortcuts import render, redirect
 from django.contrib.auth import get_user_model
 from django.core.exceptions import ValidationError
+from django.conf import settings
 from django import forms
 
 
-from signal_webhooks.admin import WebhookAdmin, get_webhook_model
+from signal_webhooks.admin import WebhookAdmin
+from signal_webhooks.utils import get_webhook_model
 # from plugantic.admin import CustomPlugin
 
 from ..util import htmldecode, urldecode, ansi_to_html
@@ -34,16 +37,11 @@ from index.html import snapshot_icons
 from logging_util import printable_filesize
 from main import add, remove
 from extractors import archive_links
-from config import (
-    OUTPUT_DIR,
-    SNAPSHOTS_PER_PAGE,
-    VERSION,
-    VERSIONS_AVAILABLE,
-    CAN_UPGRADE
-)
 
 
-GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
+CONFIG = settings.CONFIG
+
+GLOBAL_CONTEXT = {'VERSION': CONFIG.VERSION, 'VERSIONS_AVAILABLE': CONFIG.VERSIONS_AVAILABLE, 'CAN_UPGRADE': CONFIG.CAN_UPGRADE}
 
 # Admin URLs
 # /admin/
@@ -74,7 +72,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
             return redirect(f'/admin/login/?next={request.path}')
 
         request.current_app = self.name
-        context = {
+        context: Dict[str, Any] = {
             **self.each_context(request),
             'title': 'Add URLs',
         }
@@ -92,7 +90,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
                     "urls": url,
                     "depth": depth,
                     "update_all": False,
-                    "out_dir": OUTPUT_DIR,
+                    "out_dir": CONFIG.OUTPUT_DIR,
                 }
                 add_stdout = StringIO()
                 with redirect_stdout(add_stdout):
@@ -101,7 +99,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
 
                 context.update({
                     "stdout": ansi_to_html(add_stdout.getvalue().strip()),
-                    "form": AddLinkForm()
+                    "form": AddLinkForm(),
                 })
             else:
                 context["form"] = form
@@ -118,12 +116,14 @@ archivebox_admin.disable_action('delete_selected')
 # archivebox_admin.register(CustomPlugin)
 
 # patch admin with methods to add data views (implemented by admin_data_views package)
+# https://github.com/MrThearMan/django-admin-data-views
+# https://mrthearman.github.io/django-admin-data-views/setup/
 ############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
 from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
 
 archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)
+archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)       # type: ignore
+archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)           # type: ignore
 archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
 
 
@@ -146,7 +146,7 @@ class ArchiveResultInline(admin.TabularInline):
 
 
 class TagInline(admin.TabularInline):
-    model = Tag.snapshot_set.through
+    model = Tag.snapshot_set.through       # type: ignore
     # fk_name = 'snapshot'
     fields = ('id', 'tag')
     extra = 1
@@ -241,7 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
     autocomplete_fields = ['tags']
     inlines = [TagInline, ArchiveResultInline]
-    list_per_page = SNAPSHOTS_PER_PAGE
+    list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
 
     action_form = SnapshotActionForm
 
@@ -433,7 +433,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
 
         # Monkey patch here plus core_tags.py
         self.change_list_template = 'private_index_grid.html'
-        self.list_per_page = SNAPSHOTS_PER_PAGE
+        self.list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
         self.list_max_show_all = self.list_per_page
 
         # Call monkey patched view
@@ -458,7 +458,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
         archive_links([
             snapshot.as_link()
             for snapshot in queryset
-        ], out_dir=OUTPUT_DIR)
+        ], out_dir=CONFIG.OUTPUT_DIR)
 
     @admin.action(
         description="⬇️ Title"
@@ -467,7 +467,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
         archive_links([
             snapshot.as_link()
             for snapshot in queryset
-        ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
+        ], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
 
     @admin.action(
         description="Re-Snapshot"
@@ -485,13 +485,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
         archive_links([
             snapshot.as_link()
             for snapshot in queryset
-        ], overwrite=True, out_dir=OUTPUT_DIR)
+        ], overwrite=True, out_dir=CONFIG.OUTPUT_DIR)
 
     @admin.action(
         description="Delete"
     )
     def delete_snapshots(self, request, queryset):
-        remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
+        remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
 
 
     @admin.action(
@@ -578,7 +578,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
 
     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
     ordering = ['-start_ts']
-    list_per_page = SNAPSHOTS_PER_PAGE
+    list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
 
     @admin.display(
         description='Snapshot Info'
@@ -620,7 +620,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
         )
 
     def output_summary(self, result):
-        snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
+        snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
         output_str = format_html(
             '<pre style="display: inline-block">{}</pre><br/>',
             result.output,

+ 22 - 19
archivebox/core/models.py

@@ -1,7 +1,7 @@
 __package__ = 'archivebox.core'
 
 
-from typing import Optional, List, Dict
+from typing import Optional, List, Dict, Iterable
 from django_stubs_ext.db.models import TypedModelMeta
 
 import json
@@ -17,10 +17,10 @@ from django.utils.text import slugify
 from django.core.cache import cache
 from django.urls import reverse, reverse_lazy
 from django.db.models import Case, When, Value, IntegerField
+from django.conf import settings
 
 from abid_utils.models import ABIDModel, ABIDField
 
-from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
 from ..system import get_dir_size
 from ..util import parse_date, base_url
 from ..index.schema import Link
@@ -72,6 +72,7 @@ class Tag(ABIDModel):
     slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False)
     # slug is autoset on save from name, never set it manually
 
+    snapshot_set: models.Manager['Snapshot']
 
     class Meta(TypedModelMeta):
         verbose_name = "Tag"
@@ -154,6 +155,8 @@ class Snapshot(ABIDModel):
 
     keys = ('url', 'timestamp', 'title', 'tags', 'updated')
 
+    archiveresult_set: models.Manager['ArchiveResult']
+
     @property
     def uuid(self):
         return self.id
@@ -246,11 +249,11 @@ class Snapshot(ABIDModel):
 
     @cached_property
     def link_dir(self):
-        return str(ARCHIVE_DIR / self.timestamp)
+        return str(settings.CONFIG.ARCHIVE_DIR / self.timestamp)
 
     @cached_property
     def archive_path(self):
-        return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
+        return '{}/{}'.format(settings.CONFIG.ARCHIVE_DIR_NAME, self.timestamp)
 
     @cached_property
     def archive_size(self):
@@ -284,7 +287,7 @@ class Snapshot(ABIDModel):
 
     @cached_property
     def status_code(self) -> Optional[str]:
-        return self.headers and self.headers.get('Status-Code')
+        return self.headers.get('Status-Code') if self.headers else None
 
     @cached_property
     def history(self) -> dict:
@@ -322,7 +325,7 @@ class Snapshot(ABIDModel):
 
         return None
 
-    def save_tags(self, tags: List[str]=()) -> None:
+    def save_tags(self, tags: Iterable[str]=()) -> None:
         tags_id = []
         for tag in tags:
             if tag.strip():
@@ -334,17 +337,17 @@ class Snapshot(ABIDModel):
     # def get_storage_dir(self, create=True, symlink=True) -> Path:
     #     date_str = self.added.strftime('%Y%m%d')
     #     domain_str = domain(self.url)
-    #     abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
+    #     abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
 
     #     if create and not abs_storage_dir.is_dir():
     #         abs_storage_dir.mkdir(parents=True, exist_ok=True)
 
     #     if symlink:
     #         LINK_PATHS = [
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
-    #             # Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
+    #             Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
+    #             # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
+    #             Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
+    #             Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
     #         ]
     #         for link_path in LINK_PATHS:
     #             link_path.parent.mkdir(parents=True, exist_ok=True)
@@ -439,8 +442,8 @@ class ArchiveResult(ABIDModel):
         should be used for user-facing iframe embeds of this result
         """
 
-        if hasattr(self.extractor_module, 'get_embed_path'):
-            return self.extractor_module.get_embed_path(self)
+        if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None):
+            return get_embed_path_func(self)
 
         return self.extractor_module.get_output_path()
 
@@ -455,18 +458,18 @@ class ArchiveResult(ABIDModel):
     # def get_storage_dir(self, create=True, symlink=True):
     #     date_str = self.snapshot.added.strftime('%Y%m%d')
     #     domain_str = domain(self.snapshot.url)
-    #     abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
+    #     abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
 
     #     if create and not abs_storage_dir.is_dir():
     #         abs_storage_dir.mkdir(parents=True, exist_ok=True)
 
     #     if symlink:
     #         LINK_PATHS = [
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
-    #             # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
-    #             # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
-    #             Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
+    #             Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
+    #             # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
+    #             # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
+    #             Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
+    #             Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
     #         ]
     #         for link_path in LINK_PATHS:
     #             link_path.parent.mkdir(parents=True, exist_ok=True)

+ 44 - 58
archivebox/core/settings.py

@@ -9,32 +9,9 @@ import tempfile
 from pathlib import Path
 from django.utils.crypto import get_random_string
 
-from ..config import (
-    CONFIG,
-    DEBUG,
-    SECRET_KEY,
-    ALLOWED_HOSTS,
-    PACKAGE_DIR,
-    TEMPLATES_DIR_NAME,
-    CUSTOM_TEMPLATES_DIR,
-    SQL_INDEX_FILENAME,
-    OUTPUT_DIR,
-    ARCHIVE_DIR,
-    LOGS_DIR,
-    CACHE_DIR,
-    TIMEZONE,
-
-    LDAP,
-    LDAP_SERVER_URI,
-    LDAP_BIND_DN,
-    LDAP_BIND_PASSWORD,
-    LDAP_USER_BASE,
-    LDAP_USER_FILTER,
-    LDAP_USERNAME_ATTR,
-    LDAP_FIRSTNAME_ATTR,
-    LDAP_LASTNAME_ATTR,
-    LDAP_EMAIL_ATTR,
-)
+from ..config import CONFIG
+from ..config_stubs import AttrDict
+assert isinstance(CONFIG, AttrDict)
 
 IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
 IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
@@ -53,12 +30,12 @@ LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
 PASSWORD_RESET_URL = '/accounts/password_reset/'
 APPEND_SLASH = True
 
-DEBUG = DEBUG or ('--debug' in sys.argv)
+DEBUG = CONFIG.DEBUG or ('--debug' in sys.argv)
 
 
 # add plugins folders to system path, and load plugins in installed_apps
-BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins'
-USER_PLUGINS_DIR = OUTPUT_DIR / 'plugins'
+BUILTIN_PLUGINS_DIR = CONFIG.PACKAGE_DIR / 'plugins'
+USER_PLUGINS_DIR = CONFIG.OUTPUT_DIR / 'plugins'
 sys.path.insert(0, str(BUILTIN_PLUGINS_DIR))
 sys.path.insert(0, str(USER_PLUGINS_DIR))
 
@@ -127,7 +104,7 @@ AUTHENTICATION_BACKENDS = [
     'django.contrib.auth.backends.ModelBackend',
 ]
 
-if LDAP:
+if CONFIG.LDAP:
     try:
         import ldap
         from django_auth_ldap.config import LDAPSearch
@@ -138,23 +115,23 @@ if LDAP:
         global AUTH_LDAP_USER_SEARCH
         global AUTH_LDAP_USER_ATTR_MAP
 
-        AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
-        AUTH_LDAP_BIND_DN = LDAP_BIND_DN
-        AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
+        AUTH_LDAP_SERVER_URI = CONFIG.LDAP_SERVER_URI
+        AUTH_LDAP_BIND_DN = CONFIG.LDAP_BIND_DN
+        AUTH_LDAP_BIND_PASSWORD = CONFIG.LDAP_BIND_PASSWORD
 
-        assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
+        assert AUTH_LDAP_SERVER_URI and CONFIG.LDAP_USERNAME_ATTR and CONFIG.LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
 
         AUTH_LDAP_USER_SEARCH = LDAPSearch(
-            LDAP_USER_BASE,
+            CONFIG.LDAP_USER_BASE,
             ldap.SCOPE_SUBTREE,
-            '(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
+            '(&(' + CONFIG.LDAP_USERNAME_ATTR + '=%(user)s)' + CONFIG.LDAP_USER_FILTER + ')',
         )
 
         AUTH_LDAP_USER_ATTR_MAP = {
-            'username': LDAP_USERNAME_ATTR,
-            'first_name': LDAP_FIRSTNAME_ATTR,
-            'last_name': LDAP_LASTNAME_ATTR,
-            'email': LDAP_EMAIL_ATTR,
+            'username': CONFIG.LDAP_USERNAME_ATTR,
+            'first_name': CONFIG.LDAP_FIRSTNAME_ATTR,
+            'last_name': CONFIG.LDAP_LASTNAME_ATTR,
+            'email': CONFIG.LDAP_EMAIL_ATTR,
         }
 
         AUTHENTICATION_BACKENDS = [
@@ -206,6 +183,15 @@ if DEBUG_TOOLBAR:
     ]
     MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
 
+if DEBUG:
+    from django_autotyping.typing import AutotypingSettingsDict
+
+    INSTALLED_APPS += ['django_autotyping']
+    AUTOTYPING: AutotypingSettingsDict = {
+        "STUBS_GENERATION": {
+            "LOCAL_STUBS_DIR": Path(CONFIG.PACKAGE_DIR) / "typings",
+        }
+    }
 
 # https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
 # Must delete archivebox/templates/admin to use because it relies on some things we override
@@ -224,15 +210,15 @@ if DEBUG_REQUESTS_TRACKER:
 STATIC_URL = '/static/'
 
 STATICFILES_DIRS = [
-    *([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []),
-    str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'),
+    *([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
+    str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'static'),
 ]
 
 TEMPLATE_DIRS = [
-    *([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []),
-    str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'),
-    str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'),
-    str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
+    *([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
+    str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'core'),
+    str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
+    str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME),
 ]
 
 TEMPLATES = [
@@ -258,10 +244,10 @@ TEMPLATES = [
 
 
 CACHE_DB_FILENAME = 'cache.sqlite3'
-CACHE_DB_PATH = CACHE_DIR / CACHE_DB_FILENAME
+CACHE_DB_PATH = CONFIG.CACHE_DIR / CACHE_DB_FILENAME
 CACHE_DB_TABLE = 'django_cache'
 
-DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
+DATABASE_FILE = Path(CONFIG.OUTPUT_DIR) / CONFIG.SQL_INDEX_FILENAME
 DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
 
 DATABASES = {
@@ -272,7 +258,7 @@ DATABASES = {
             'timeout': 60,
             'check_same_thread': False,
         },
-        'TIME_ZONE': TIMEZONE,
+        'TIME_ZONE': CONFIG.TIMEZONE,
         # DB setup is sometimes modified at runtime by setup_django() in config.py
     },
     # 'cache': {
@@ -282,7 +268,7 @@ DATABASES = {
     #         'timeout': 60,
     #         'check_same_thread': False,
     #     },
-    #     'TIME_ZONE': TIMEZONE,
+    #     'TIME_ZONE': CONFIG.TIMEZONE,
     # },
 }
 MIGRATION_MODULES = {'signal_webhooks': None}
@@ -312,7 +298,7 @@ STORAGES = {
         "BACKEND": "django.core.files.storage.FileSystemStorage",
         "OPTIONS": {
             "base_url": "/archive/",
-            "location": ARCHIVE_DIR,
+            "location": CONFIG.ARCHIVE_DIR,
         },
     },
     # "personas": {
@@ -328,9 +314,9 @@ STORAGES = {
 ### Security Settings
 ################################################################################
 
-SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
+SECRET_KEY = CONFIG.SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
 
-ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
+ALLOWED_HOSTS = CONFIG.ALLOWED_HOSTS.split(',')
 
 SECURE_BROWSER_XSS_FILTER = True
 SECURE_CONTENT_TYPE_NOSNIFF = True
@@ -361,7 +347,7 @@ SHELL_PLUS_PRINT_SQL = False
 IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
 IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
 if IS_SHELL:
-    os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py')
+    os.environ['PYTHONSTARTUP'] = str(Path(CONFIG.PACKAGE_DIR) / 'core' / 'welcome_message.py')
 
 
 ################################################################################
@@ -373,10 +359,10 @@ USE_I18N = True
 USE_TZ = True
 DATETIME_FORMAT = 'Y-m-d g:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
-TIME_ZONE = TIMEZONE        # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
+TIME_ZONE = CONFIG.TIMEZONE        # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
 
 
-from django.conf.locale.en import formats as en_formats
+from django.conf.locale.en import formats as en_formats    # type: ignore
 
 en_formats.DATETIME_FORMAT = DATETIME_FORMAT
 en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
@@ -410,8 +396,8 @@ class NoisyRequestsFilter(logging.Filter):
 
         return 1
 
-if LOGS_DIR.exists():
-    ERROR_LOG = (LOGS_DIR / 'errors.log')
+if CONFIG.LOGS_DIR.exists():
+    ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log')
 else:
     # historically too many edge cases here around creating log dir w/ correct permissions early on
     # if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr

+ 1 - 1
archivebox/core/urls.py

@@ -46,7 +46,7 @@ urlpatterns = [
     # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django
 
     path('index.html', RedirectView.as_view(url='/')),
-    path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
+    path('index.json', static.serve, {'document_root': settings.CONFIG.OUTPUT_DIR, 'path': 'index.json'}),
     path('', HomepageView.as_view(), name='Home'),
 ]
 urlpatterns += staticfiles_urlpatterns()

+ 16 - 23
archivebox/extractors/favicon.py

@@ -11,27 +11,18 @@ from ..util import (
     domain,
     dedupe,
 )
-from ..config import (
-    TIMEOUT,
-    SAVE_FAVICON,
-    FAVICON_PROVIDER,
-    CURL_BINARY,
-    CURL_ARGS,
-    CURL_EXTRA_ARGS,
-    CURL_VERSION,
-    CHECK_SSL_VALIDITY,
-    CURL_USER_AGENT,
-)
+from ..config import CONFIG
 from ..logging_util import TimedProgress
 
 
 @enforce_types
-def should_save_favicon(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
-    out_dir = out_dir or Path(link.link_dir)
+def should_save_favicon(link: Link, out_dir: str | Path | None=None, overwrite: bool=False) -> bool:
+    assert link.link_dir
+    out_dir = Path(out_dir or link.link_dir)
     if not overwrite and (out_dir / 'favicon.ico').exists():
         return False
 
-    return SAVE_FAVICON
+    return CONFIG.SAVE_FAVICON
 
 @enforce_types
 def get_output_path():
@@ -39,24 +30,26 @@ def get_output_path():
 
 
 @enforce_types
-def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
     """download site favicon from google's favicon api"""
 
-    out_dir = out_dir or link.link_dir
+    out_dir = Path(out_dir or link.link_dir)
+    assert out_dir.exists()
+
     output: ArchiveOutput = 'favicon.ico'
     # later options take precedence
     options = [
-        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
+        *CONFIG.CURL_ARGS,
+        *CONFIG.CURL_EXTRA_ARGS,
         '--max-time', str(timeout),
         '--output', str(output),
-        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
-        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+        *(['--user-agent', '{}'.format(CONFIG.CURL_USER_AGENT)] if CONFIG.CURL_USER_AGENT else []),
+        *([] if CONFIG.CHECK_SSL_VALIDITY else ['--insecure']),
     ]
     cmd = [
-        CURL_BINARY,
+        CONFIG.CURL_BINARY,
         *dedupe(options),
-        FAVICON_PROVIDER.format(domain(link.url)),
+        CONFIG.FAVICON_PROVIDER.format(domain(link.url)),
     ]
     status = 'failed'
     timer = TimedProgress(timeout, prefix='      ')
@@ -72,7 +65,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
     return ArchiveResult(
         cmd=cmd,
         pwd=str(out_dir),
-        cmd_version=CURL_VERSION,
+        cmd_version=CONFIG.CURL_VERSION,
         output=output,
         status=status,
         **timer.stats,

+ 8 - 16
archivebox/extractors/git.py

@@ -14,15 +14,7 @@ from ..util import (
     without_query,
     without_fragment,
 )
-from ..config import (
-    TIMEOUT,
-    SAVE_GIT,
-    GIT_BINARY,
-    GIT_ARGS,
-    GIT_VERSION,
-    GIT_DOMAINS,
-    CHECK_SSL_VALIDITY
-)
+from ..config import CONFIG
 from ..logging_util import TimedProgress
 
 
@@ -50,17 +42,17 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
         return False
 
     is_clonable_url = (
-        (domain(link.url) in GIT_DOMAINS)
+        (domain(link.url) in CONFIG.GIT_DOMAINS)
         or (extension(link.url) == 'git')
     )
     if not is_clonable_url:
         return False
 
-    return SAVE_GIT
+    return CONFIG.SAVE_GIT
 
 
 @enforce_types
-def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
     """download full site using git"""
 
     out_dir = out_dir or Path(link.link_dir)
@@ -68,10 +60,10 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
     output_path = out_dir / output
     output_path.mkdir(exist_ok=True)
     cmd = [
-        GIT_BINARY,
+        CONFIG.GIT_BINARY,
         'clone',
-        *GIT_ARGS,
-        *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
+        *CONFIG.GIT_ARGS,
+        *([] if CONFIG.CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
         without_query(without_fragment(link.url)),
     ]
     status = 'succeeded'
@@ -96,7 +88,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
     return ArchiveResult(
         cmd=cmd,
         pwd=str(out_dir),
-        cmd_version=GIT_VERSION,
+        cmd_version=CONFIG.GIT_VERSION,
         output=output,
         status=status,
         **timer.stats,

+ 6 - 6
archivebox/index/schema.py

@@ -192,12 +192,12 @@ class Link:
         if extended:
             info.update({
                 'snapshot_id': self.snapshot_id,
-                'snapshot_uuid': self.snapshot_uuid,
+                'snapshot_old_id': self.snapshot_old_id,
                 'snapshot_abid': self.snapshot_abid,
 
                 'link_dir': self.link_dir,
                 'archive_path': self.archive_path,
-                
+
                 'hash': self.url_hash,
                 'base_url': self.base_url,
                 'scheme': self.scheme,
@@ -206,7 +206,7 @@ class Link:
                 'basename': self.basename,
                 'extension': self.extension,
                 'is_static': self.is_static,
-                
+
                 'tags_str': (self.tags or '').strip(','),   # only used to render static index in index/html.py, remove if no longer needed there
                 'icons': None,           # only used to render static index in index/html.py, remove if no longer needed there
 
@@ -266,15 +266,15 @@ class Link:
     @cached_property
     def snapshot(self):
         from core.models import Snapshot
-        return Snapshot.objects.only('id').get(url=self.url)
+        return Snapshot.objects.only('id', 'old_id', 'abid').get(url=self.url)
 
     @cached_property
     def snapshot_id(self):
         return str(self.snapshot.pk)
 
     @cached_property
-    def snapshot_uuid(self):
-        return str(self.snapshot.id)
+    def snapshot_old_id(self):
+        return str(self.snapshot.old_id)
 
     @cached_property
     def snapshot_abid(self):

+ 3 - 1
archivebox/manage.py

@@ -7,7 +7,9 @@ if __name__ == '__main__':
     # versions of ./manage.py commands whenever possible. When that's not possible
     # (e.g. makemigrations), you can comment out this check temporarily
 
-    if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv or 'squashmigrations' in sys.argv):
+    allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs']
+
+    if not any(cmd in sys.argv for cmd in allowed_commands):
         print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
         print()
         print('    Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')