5 years ago · 104553489f
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -13,8 +13,8 @@ from django import forms
 
															 from core.models import Snapshot, Tag
														
 
															 from core.forms import AddLinkForm, TagField
														
 
															-from core.utils import get_icons
														
 
															+from index.html import snapshot_icons
														
 
															 from util import htmldecode, urldecode, ansi_to_html
														
 
															 from logging_util import printable_filesize
														
 
															 from main import add, remove
														
@@ -128,7 +128,7 @@ class SnapshotAdmin(admin.ModelAdmin):
 
															         ) + mark_safe(f' <span class="tags">{tags}</span>')
														
 
															     def files(self, obj):
														
 
															-        return get_icons(obj)
														
 
															+        return snapshot_icons(obj)
														
 
															     def size(self, obj):
														
 
															         archive_size = obj.archive_size
														
--- a/archivebox/core/utils.py
+++ b/archivebox/core/utils.py
@@ -1,54 +0,0 @@
 
															-from django.utils.html import format_html
														
 
															-from collections import defaultdict
														
 
															-
														
 
															-from core.models import Snapshot, EXTRACTORS
														
 
															-from pathlib import Path
														
 
															-
														
 
															-
														
 
															-def get_icons(snapshot: Snapshot) -> str:
														
 
															-    archive_results = snapshot.archiveresult_set.filter(status="succeeded")
														
 
															-    link = snapshot.as_link()
														
 
															-    path = link.archive_path
														
 
															-    canon = link.canonical_outputs()
														
 
															-    output = ""
														
 
															-    output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
														
 
															-    icons = {
														
 
															-        "singlefile": "❶",
														
 
															-        "wget": "🆆",
														
 
															-        "dom": "🅷",
														
 
															-        "pdf": "📄",
														
 
															-        "screenshot": "💻",
														
 
															-        "media": "📼",
														
 
															-        "git": "🅶",
														
 
															-        "archive_org": "🏛",
														
 
															-        "readability": "🆁",
														
 
															-        "mercury": "🅼",
														
 
															-        "warc": "📦"
														
 
															-    }
														
 
															-    exclude = ["favicon", "title", "headers", "archive_org"]
														
 
															-    # Missing specific entry for WARC
														
 
															-
														
 
															-    extractor_items = defaultdict(lambda: None)
														
 
															-    for extractor, _ in EXTRACTORS:
														
 
															-        for result in archive_results:
														
 
															-            if result.extractor == extractor:
														
 
															-                extractor_items[extractor] = result
														
 
															-
														
 
															-    for extractor, _ in EXTRACTORS:
														
 
															-        if extractor not in exclude:
														
 
															-            exists = extractor_items[extractor] is not None
														
 
															-            output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
														
 
															-                                             extractor, icons.get(extractor, "?"))
														
 
															-        if extractor == "wget":
														
 
															-            # warc isn't technically it's own extractor, so we have to add it after wget
														
 
															-            exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
														
 
															-            output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
														
 
															-
														
 
															-        if extractor == "archive_org":
														
 
															-            # The check for archive_org is different, so it has to be handled separately
														
 
															-            target_path = Path(path) / "archive.org.txt"
														
 
															-            exists = target_path.exists()
														
 
															-            output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
														
 
															-                                                                                        "archive_org", icons.get("archive_org", "?"))
														
 
															-
														
 
															-    return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
														
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -12,7 +12,6 @@ from django.views.generic import FormView
 
															 from django.contrib.auth.mixins import UserPassesTestMixin
														
 
															 from core.models import Snapshot
														
 
															-from core.utils import get_icons
														
 
															 from core.forms import AddLinkForm
														
 
															 from ..config import (
														
@@ -25,6 +24,7 @@ from ..config import (
 
															 )
														
 
															 from main import add
														
 
															 from ..util import base_url, ansi_to_html
														
 
															+from ..index.html import snapshot_icons
														
 
															 class MainIndex(View):
														
@@ -108,7 +108,7 @@ class PublicArchiveView(ListView):
 
															         if query:
														
 
															             qs = Snapshot.objects.filter(title__icontains=query)
														
 
															         for snapshot in qs:
														
 
															-            snapshot.icons = get_icons(snapshot) 
														
 
															+            snapshot.icons = snapshot_icons(snapshot)
														
 
															         return qs
														
 
															     def get(self, *args, **kwargs):
														
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -5,8 +5,13 @@ from datetime import datetime
 
															 from typing import List, Optional, Iterator, Mapping
														
 
															 from pathlib import Path
														
 
															+from django.utils.html import format_html
														
 
															+from collections import defaultdict
														
 
															+
														
 
															+from pathlib import Path
														
 
															+
														
 
															 from .schema import Link
														
 
															-from ..system import atomic_write, copy_and_overwrite
														
 
															+from ..system import atomic_write
														
 
															 from ..logging_util import printable_filesize
														
 
															 from ..util import (
														
 
															     enforce_types,
														
@@ -23,9 +28,6 @@ from ..config import (
 
															     FOOTER_INFO,
														
 
															     ARCHIVE_DIR_NAME,
														
 
															     HTML_INDEX_FILENAME,
														
 
															-    STATIC_DIR_NAME,
														
 
															-    ROBOTS_TXT_FILENAME,
														
 
															-    FAVICON_FILENAME,
														
 
															 )
														
 
															 MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
														
@@ -143,3 +145,56 @@ def render_legacy_template(template_path: str, context: Mapping[str, str]) -> st
 
															     with open(template_path, 'r', encoding='utf-8') as template:
														
 
															         template_str = template.read()
														
 
															     return Template(template_str).substitute(**context)
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+def snapshot_icons(snapshot) -> str:
														
 
															+    from core.models import Snapshot, EXTRACTORS
														
 
															+
														
 
															+    archive_results = snapshot.archiveresult_set.filter(status="succeeded")
														
 
															+    link = snapshot.as_link()
														
 
															+    path = link.archive_path
														
 
															+    canon = link.canonical_outputs()
														
 
															+    output = ""
														
 
															+    output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
														
 
															+    icons = {
														
 
															+        "singlefile": "❶",
														
 
															+        "wget": "🆆",
														
 
															+        "dom": "🅷",
														
 
															+        "pdf": "📄",
														
 
															+        "screenshot": "💻",
														
 
															+        "media": "📼",
														
 
															+        "git": "🅶",
														
 
															+        "archive_org": "🏛",
														
 
															+        "readability": "🆁",
														
 
															+        "mercury": "🅼",
														
 
															+        "warc": "📦"
														
 
															+    }
														
 
															+    exclude = ["favicon", "title", "headers", "archive_org"]
														
 
															+    # Missing specific entry for WARC
														
 
															+
														
 
															+    extractor_items = defaultdict(lambda: None)
														
 
															+    for extractor, _ in EXTRACTORS:
														
 
															+        for result in archive_results:
														
 
															+            if result.extractor == extractor:
														
 
															+                extractor_items[extractor] = result
														
 
															+
														
 
															+    for extractor, _ in EXTRACTORS:
														
 
															+        if extractor not in exclude:
														
 
															+            exists = extractor_items[extractor] is not None
														
 
															+            output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
														
 
															+                                             extractor, icons.get(extractor, "?"))
														
 
															+        if extractor == "wget":
														
 
															+            # warc isn't technically it's own extractor, so we have to add it after wget
														
 
															+            exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
														
 
															+            output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
														
 
															+
														
 
															+        if extractor == "archive_org":
														
 
															+            # The check for archive_org is different, so it has to be handled separately
														
 
															+            target_path = Path(path) / "archive.org.txt"
														
 
															+            exists = target_path.exists()
														
 
															+            output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
														
 
															+                                                                                        "archive_org", icons.get("archive_org", "?"))
														
 
															+
														
 
															+    return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
														
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -246,6 +246,7 @@ def chrome_args(**options) -> List[str]:
 
															     return cmd_args
														
 
															+
														
 
															 def ansi_to_html(text):
														
 
															     """
														
 
															     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html