瀏覽代碼

massively improve Snapshot admin list view query performance

Nick Sweeting 1 年之前
父節點
當前提交
24fe958ff3
共有 5 個文件被更改,包括 194 次插入39 次删除
  1. 1 1
      archivebox/config.py
  2. 108 25
      archivebox/core/admin.py
  3. 64 11
      archivebox/core/models.py
  4. 12 1
      archivebox/extractors/wget.py
  5. 9 1
      archivebox/index/html.py

+ 1 - 1
archivebox/config.py

@@ -103,7 +103,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
         'PUBLIC_SNAPSHOTS':          {'type': bool,  'default': True},
         'PUBLIC_SNAPSHOTS':          {'type': bool,  'default': True},
         'PUBLIC_ADD_VIEW':           {'type': bool,  'default': False},
         'PUBLIC_ADD_VIEW':           {'type': bool,  'default': False},
         'FOOTER_INFO':               {'type': str,   'default': 'Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.'},
         'FOOTER_INFO':               {'type': str,   'default': 'Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.'},
-        'SNAPSHOTS_PER_PAGE':        {'type': int,   'default': 40},
+        'SNAPSHOTS_PER_PAGE':        {'type': int,   'default': 100},
         'CUSTOM_TEMPLATES_DIR':      {'type': str,   'default': None},
         'CUSTOM_TEMPLATES_DIR':      {'type': str,   'default': None},
         'TIME_ZONE':                 {'type': str,   'default': 'UTC'},
         'TIME_ZONE':                 {'type': str,   'default': 'UTC'},
         'TIMEZONE':                  {'type': str,   'default': 'UTC'},
         'TIMEZONE':                  {'type': str,   'default': 'UTC'},

+ 108 - 25
archivebox/core/admin.py

@@ -10,12 +10,15 @@ from datetime import datetime, timezone
 from typing import Dict, Any
 from typing import Dict, Any
 
 
 from django.contrib import admin
 from django.contrib import admin
-from django.db.models import Count, Q
-from django.urls import path, reverse
+from django.db.models import Count, Q, Prefetch
+from django.urls import path, reverse, resolve
+from django.utils import timezone
+from django.utils.functional import cached_property
 from django.utils.html import format_html
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from django.utils.safestring import mark_safe
 from django.shortcuts import render, redirect
 from django.shortcuts import render, redirect
 from django.contrib.auth import get_user_model
 from django.contrib.auth import get_user_model
+from django.core.paginator import Paginator
 from django.core.exceptions import ValidationError
 from django.core.exceptions import ValidationError
 from django.conf import settings
 from django.conf import settings
 from django import forms
 from django import forms
@@ -126,22 +129,99 @@ archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_ad
 archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
 archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
 
 
 
 
+class AccelleratedPaginator(Paginator):
+    """
+    Accellerated Pagniator ignores DISTINCT when counting total number of rows.
+    Speeds up SELECT Count(*) on Admin views by >20x.
+    https://hakibenita.com/optimizing-the-django-admin-paginator
+    """
+
+    @cached_property
+    def count(self):
+        if self.object_list._has_filters():
+            # fallback to normal count method on filtered queryset
+            return super().count
+        else:
+            # otherwise count total rows in a separate fast query
+            return self.object_list.model.objects.count()
+    
+        # Alternative approach for PostgreSQL: fallback count takes > 200ms
+        # from django.db import connection, transaction, OperationalError
+        # with transaction.atomic(), connection.cursor() as cursor:
+        #     cursor.execute('SET LOCAL statement_timeout TO 200;')
+        #     try:
+        #         return super().count
+        #     except OperationalError:
+        #         return 9999999999999
+
+
 class ArchiveResultInline(admin.TabularInline):
 class ArchiveResultInline(admin.TabularInline):
     name = 'Archive Results Log'
     name = 'Archive Results Log'
     model = ArchiveResult
     model = ArchiveResult
+    parent_model = Snapshot
     # fk_name = 'snapshot'
     # fk_name = 'snapshot'
-    extra = 1
-    readonly_fields = ('result_id', 'start_ts', 'end_ts', 'extractor', 'command', 'cmd_version')
-    fields = ('id', *readonly_fields, 'status', 'output')
+    extra = 0
+    sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
+    readonly_fields = ('result_id', 'completed', 'extractor', 'command', 'version')
+    fields = ('id', 'start_ts', 'end_ts', *readonly_fields, 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
+    # exclude = ('id',)
+    ordering = ('end_ts',)
     show_change_link = True
     show_change_link = True
     # # classes = ['collapse']
     # # classes = ['collapse']
     # # list_display_links = ['abid']
     # # list_display_links = ['abid']
 
 
+    def get_parent_object_from_request(self, request):
+        resolved = resolve(request.path_info)
+        return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
+
+    @admin.display(
+        description='Completed',
+        ordering='end_ts',
+    )
+    def completed(self, obj):
+        return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
+
     def result_id(self, obj):
     def result_id(self, obj):
-        return format_html('<a href="{}"><small><code>[{}]</code></small></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
+        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
     
     
     def command(self, obj):
     def command(self, obj):
         return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
         return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
+    
+    def version(self, obj):
+        return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
+    
+    def get_formset(self, request, obj=None, **kwargs):
+        formset = super().get_formset(request, obj, **kwargs)
+        snapshot = self.get_parent_object_from_request(request)
+
+        # import ipdb; ipdb.set_trace()
+        formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
+        
+        # default values for new entries
+        formset.form.base_fields['status'].initial = 'succeeded'
+        formset.form.base_fields['start_ts'].initial = timezone.now()
+        formset.form.base_fields['end_ts'].initial = timezone.now()
+        formset.form.base_fields['cmd_version'].initial = '-'
+        formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
+        formset.form.base_fields['created_by'].initial = request.user
+        formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
+        formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
+        
+        if obj is not None:
+            # hidden values for existing entries and new entries
+            formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
+            formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
+            formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
+            formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
+            formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
+            formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
+        return formset
+    
+    def get_readonly_fields(self, request, obj=None):
+        if obj is not None:
+            return self.readonly_fields
+        else:
+            return []
 
 
 
 
 class TagInline(admin.TabularInline):
 class TagInline(admin.TabularInline):
@@ -222,25 +302,22 @@ def get_abid_info(self, obj):
 
 
 @admin.register(Snapshot, site=archivebox_admin)
 @admin.register(Snapshot, site=archivebox_admin)
 class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
 class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
-    class Meta:
-        model = Snapshot
-
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
-    # list_editable = ('title',)
     sort_fields = ('title_str', 'url_str', 'added', 'files')
     sort_fields = ('title_str', 'url_str', 'added', 'files')
-    readonly_fields = ('tags', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
+    readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
     search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
     search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
-    list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags')
+    list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
     fields = ('url', 'created_by', 'title', *readonly_fields)
     fields = ('url', 'created_by', 'title', *readonly_fields)
     ordering = ['-added']
     ordering = ['-added']
     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
-    autocomplete_fields = ['tags']
     inlines = [TagInline, ArchiveResultInline]
     inlines = [TagInline, ArchiveResultInline]
-    list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
+    list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000)
 
 
     action_form = SnapshotActionForm
     action_form = SnapshotActionForm
+    paginator = AccelleratedPaginator
 
 
     save_on_top = True
     save_on_top = True
+    show_full_result_count = False
 
 
     def changelist_view(self, request, extra_context=None):
     def changelist_view(self, request, extra_context=None):
         extra_context = extra_context or {}
         extra_context = extra_context or {}
@@ -286,12 +363,15 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
         ]
         ]
         return custom_urls + urls
         return custom_urls + urls
 
 
-    def get_queryset(self, request):
-        self.request = request
-        return super().get_queryset(request).prefetch_related('tags', 'archiveresult_set').annotate(archiveresult_count=Count('archiveresult'))
+    # def get_queryset(self, request):
+    #     # tags_qs = SnapshotTag.objects.all().select_related('tag')
+    #     # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
+
+    #     self.request = request
+    #     return super().get_queryset(request).prefetch_related('archiveresult_set').distinct()  # .annotate(archiveresult_count=Count('archiveresult'))
 
 
     def tag_list(self, obj):
     def tag_list(self, obj):
-        return ', '.join(obj.tags.values_list('name', flat=True))
+        return ', '.join(tag.name for tag in obj.tags.all())
 
 
     # TODO: figure out a different way to do this, you cant nest forms so this doenst work
     # TODO: figure out a different way to do this, you cant nest forms so this doenst work
     # def action(self, obj):
     # def action(self, obj):
@@ -360,21 +440,20 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
         ordering='title',
         ordering='title',
     )
     )
     def title_str(self, obj):
     def title_str(self, obj):
-        canon = obj.as_link().canonical_outputs()
         tags = ''.join(
         tags = ''.join(
-            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.id, tag)
+            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.pk, tag.name)
             for tag in obj.tags.all()
             for tag in obj.tags.all()
-            if str(tag).strip()
+            if str(tag.name).strip()
         )
         )
         return format_html(
         return format_html(
             '<a href="/{}">'
             '<a href="/{}">'
-                '<img src="/{}/{}" class="favicon" onerror="this.remove()">'
+                '<img src="/{}/favicon.ico" class="favicon" onerror="this.remove()">'
             '</a>'
             '</a>'
             '<a href="/{}/index.html">'
             '<a href="/{}/index.html">'
                 '<b class="status-{}">{}</b>'
                 '<b class="status-{}">{}</b>'
             '</a>',
             '</a>',
             obj.archive_path,
             obj.archive_path,
-            obj.archive_path, canon['favicon_path'],
+            obj.archive_path,
             obj.archive_path,
             obj.archive_path,
             'fetched' if obj.latest_title or obj.title else 'pending',
             'fetched' if obj.latest_title or obj.title else 'pending',
             urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
             urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
@@ -382,14 +461,14 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
 
 
     @admin.display(
     @admin.display(
         description='Files Saved',
         description='Files Saved',
-        ordering='archiveresult_count',
+        # ordering='archiveresult_count',
     )
     )
     def files(self, obj):
     def files(self, obj):
         return snapshot_icons(obj)
         return snapshot_icons(obj)
 
 
 
 
     @admin.display(
     @admin.display(
-        ordering='archiveresult_count'
+        # ordering='archiveresult_count'
     )
     )
     def size(self, obj):
     def size(self, obj):
         archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
         archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
@@ -536,6 +615,8 @@ class TagAdmin(ABIDModelAdmin):
     actions = ['delete_selected']
     actions = ['delete_selected']
     ordering = ['-created']
     ordering = ['-created']
 
 
+    paginator = AccelleratedPaginator
+
     def API(self, obj):
     def API(self, obj):
         try:
         try:
             return get_abid_info(self, obj)
             return get_abid_info(self, obj)
@@ -574,6 +655,8 @@ class ArchiveResultAdmin(ABIDModelAdmin):
     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
     ordering = ['-start_ts']
     ordering = ['-start_ts']
     list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
     list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
+    
+    paginator = AccelleratedPaginator
 
 
     @admin.display(
     @admin.display(
         description='Snapshot Info'
         description='Snapshot Info'

+ 64 - 11
archivebox/core/models.py

@@ -125,6 +125,12 @@ class SnapshotTag(models.Model):
         db_table = 'core_snapshot_tags'
         db_table = 'core_snapshot_tags'
         unique_together = [('snapshot', 'tag')]
         unique_together = [('snapshot', 'tag')]
 
 
+
+class SnapshotManager(models.Manager):
+    def get_queryset(self):
+        return super().get_queryset().prefetch_related('tags', 'archiveresult_set')  # .annotate(archiveresult_count=models.Count('archiveresult')).distinct()
+
+
 class Snapshot(ABIDModel):
 class Snapshot(ABIDModel):
     abid_prefix = 'snp_'
     abid_prefix = 'snp_'
     abid_ts_src = 'self.added'
     abid_ts_src = 'self.added'
@@ -150,6 +156,8 @@ class Snapshot(ABIDModel):
 
 
     archiveresult_set: models.Manager['ArchiveResult']
     archiveresult_set: models.Manager['ArchiveResult']
 
 
+    objects = SnapshotManager()
+
     @property
     @property
     def uuid(self):
     def uuid(self):
         return self.id
         return self.id
@@ -177,8 +185,7 @@ class Snapshot(ABIDModel):
     def as_json(self, *args) -> dict:
     def as_json(self, *args) -> dict:
         args = args or self.keys
         args = args or self.keys
         return {
         return {
-            key: getattr(self, key)
-            if key != 'tags' else self.tags_str()
+            key: getattr(self, key) if key != 'tags' else self.tags_str(nocache=False)
             for key in args
             for key in args
         }
         }
 
 
@@ -190,8 +197,14 @@ class Snapshot(ABIDModel):
         return load_link_details(self.as_link())
         return load_link_details(self.as_link())
 
 
     def tags_str(self, nocache=True) -> str | None:
     def tags_str(self, nocache=True) -> str | None:
+        calc_tags_str = lambda: ','.join(sorted(tag.name for tag in self.tags.all()))
         cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
         cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
-        calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
+        
+        if hasattr(self, '_prefetched_objects_cache') and 'tags' in self._prefetched_objects_cache:
+            # tags are pre-fetched already, use them directly (best because db is always freshest)
+            tags_str = calc_tags_str()
+            return tags_str
+        
         if nocache:
         if nocache:
             tags_str = calc_tags_str()
             tags_str = calc_tags_str()
             cache.set(cache_key, tags_str)
             cache.set(cache_key, tags_str)
@@ -234,7 +247,10 @@ class Snapshot(ABIDModel):
 
 
     @cached_property
     @cached_property
     def num_outputs(self) -> int:
     def num_outputs(self) -> int:
-        return self.archiveresult_set.filter(status='succeeded').count()
+        # DONT DO THIS: it will trigger a separate query for every snapshot
+        # return self.archiveresult_set.filter(status='succeeded').count()
+        # this is better:
+        return sum((1 for result in self.archiveresult_set.all() if result.status == 'succeeded'))
 
 
     @cached_property
     @cached_property
     def base_url(self):
     def base_url(self):
@@ -262,10 +278,21 @@ class Snapshot(ABIDModel):
 
 
     @cached_property
     @cached_property
     def thumbnail_url(self) -> Optional[str]:
     def thumbnail_url(self) -> Optional[str]:
-        result = self.archiveresult_set.filter(
-            extractor='screenshot',
-            status='succeeded'
-        ).only('output').last()
+        if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
+            result = (sorted(
+                (
+                    result
+                    for result in self.archiveresult_set.all()
+                    if result.extractor == 'screenshot' and result.status =='succeeded' and result.output
+                ),
+                key=lambda result: result.created,
+            ) or [None])[-1]
+        else:
+            result = self.archiveresult_set.filter(
+                extractor='screenshot',
+                status='succeeded'
+            ).only('output').last()
+
         if result:
         if result:
             return reverse('Snapshot', args=[f'{str(self.timestamp)}/{result.output}'])
             return reverse('Snapshot', args=[f'{str(self.timestamp)}/{result.output}'])
         return None
         return None
@@ -292,6 +319,21 @@ class Snapshot(ABIDModel):
         if self.title:
         if self.title:
             return self.title   # whoopdedoo that was easy
             return self.title   # whoopdedoo that was easy
         
         
+        # check if ArchiveResult set has already been prefetched, if so use it instead of fetching it from db again
+        if hasattr(self, '_prefetched_objects_cache') and 'archiveresult_set' in self._prefetched_objects_cache:
+            try:
+                return (sorted(
+                    (
+                        result.output.strip()
+                        for result in self.archiveresult_set.all()
+                        if result.extractor == 'title' and result.status =='succeeded' and result.output
+                    ),
+                    key=lambda title: len(title),
+                ) or [None])[-1]
+            except IndexError:
+                pass
+        
+
         try:
         try:
             # take longest successful title from ArchiveResult db history
             # take longest successful title from ArchiveResult db history
             return sorted(
             return sorted(
@@ -355,12 +397,23 @@ class Snapshot(ABIDModel):
 
 
 class ArchiveResultManager(models.Manager):
 class ArchiveResultManager(models.Manager):
     def indexable(self, sorted: bool = True):
     def indexable(self, sorted: bool = True):
+        """Return only ArchiveResults containing text suitable for full-text search (sorted in order of typical result quality)"""
+        
         INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
         INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
-        qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS,status='succeeded')
+        qs = self.get_queryset().filter(extractor__in=INDEXABLE_METHODS, status='succeeded')
 
 
         if sorted:
         if sorted:
-            precedence = [ When(extractor=method, then=Value(precedence)) for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
-            qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence')
+            precedence = [
+                When(extractor=method, then=Value(precedence))
+                for method, precedence in ARCHIVE_METHODS_INDEXING_PRECEDENCE
+            ]
+            qs = qs.annotate(
+                indexing_precedence=Case(
+                    *precedence,
+                    default=Value(1000),
+                    output_field=IntegerField()
+                )
+            ).order_by('indexing_precedence')
         return qs
         return qs
 
 
 class ArchiveResult(ABIDModel):
 class ArchiveResult(ABIDModel):

+ 12 - 1
archivebox/extractors/wget.py

@@ -197,7 +197,7 @@ def unsafe_wget_output_path(link: Link) -> Optional[str]:
 
 
 
 
 @enforce_types
 @enforce_types
-def wget_output_path(link: Link) -> Optional[str]:
+def wget_output_path(link: Link, nocache: bool=False) -> Optional[str]:
     """calculate the path to the wgetted .html file, since wget may
     """calculate the path to the wgetted .html file, since wget may
     adjust some paths to be different than the base_url path.
     adjust some paths to be different than the base_url path.
 
 
@@ -245,6 +245,15 @@ def wget_output_path(link: Link) -> Optional[str]:
     #    https://example.com/abc/test/?v=zzVa_tX1OiI
     #    https://example.com/abc/test/?v=zzVa_tX1OiI
     #       > example.com/abc/test/index.html@v=zzVa_tX1OiI.html
     #       > example.com/abc/test/index.html@v=zzVa_tX1OiI.html
 
 
+    cache_key = f'{link.url_hash}:{link.timestamp}-{link.updated and link.updated.timestamp()}-wget-output-path'
+    
+    if not nocache:
+        from django.core.cache import cache
+        cached_result = cache.get(cache_key)
+        if cached_result:
+            return cached_result
+
+
     # There's also lots of complexity around how the urlencoding and renaming
     # There's also lots of complexity around how the urlencoding and renaming
     # is done for pages with query and hash fragments, extensions like shtml / htm / php / etc,
     # is done for pages with query and hash fragments, extensions like shtml / htm / php / etc,
     # unicode escape sequences, punycode domain names, unicode double-width characters, extensions longer than
     # unicode escape sequences, punycode domain names, unicode double-width characters, extensions longer than
@@ -271,6 +280,8 @@ def wget_output_path(link: Link) -> Optional[str]:
         output_path = None
         output_path = None
 
 
     if output_path:
     if output_path:
+        if not nocache:
+            cache.set(cache_key, output_path)
         return output_path
         return output_path
 
 
     # fallback to just the domain dir
     # fallback to just the domain dir

+ 9 - 1
archivebox/index/html.py

@@ -124,7 +124,15 @@ def snapshot_icons(snapshot) -> str:
         from core.models import ArchiveResult
         from core.models import ArchiveResult
         # start = datetime.now(timezone.utc)
         # start = datetime.now(timezone.utc)
 
 
-        archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
+        if hasattr(snapshot, '_prefetched_objects_cache') and 'archiveresult_set' in snapshot._prefetched_objects_cache:
+            archive_results = [
+                result
+                for result in snapshot.archiveresult_set.all()
+                if result.status == "succeeded" and result.output
+            ]
+        else:
+            archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
+
         link = snapshot.as_link()
         link = snapshot.as_link()
         path = link.archive_path
         path = link.archive_path
         canon = link.canonical_outputs()
         canon = link.canonical_outputs()