|
@@ -5,8 +5,13 @@ from datetime import datetime
|
|
|
from typing import List, Optional, Iterator, Mapping
|
|
from typing import List, Optional, Iterator, Mapping
|
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
+from django.utils.html import format_html
|
|
|
|
|
+from collections import defaultdict
|
|
|
|
|
+
|
|
|
|
|
+from pathlib import Path
|
|
|
|
|
+
|
|
|
from .schema import Link
|
|
from .schema import Link
|
|
|
-from ..system import atomic_write, copy_and_overwrite
|
|
|
|
|
|
|
+from ..system import atomic_write
|
|
|
from ..logging_util import printable_filesize
|
|
from ..logging_util import printable_filesize
|
|
|
from ..util import (
|
|
from ..util import (
|
|
|
enforce_types,
|
|
enforce_types,
|
|
@@ -23,9 +28,6 @@ from ..config import (
|
|
|
FOOTER_INFO,
|
|
FOOTER_INFO,
|
|
|
ARCHIVE_DIR_NAME,
|
|
ARCHIVE_DIR_NAME,
|
|
|
HTML_INDEX_FILENAME,
|
|
HTML_INDEX_FILENAME,
|
|
|
- STATIC_DIR_NAME,
|
|
|
|
|
- ROBOTS_TXT_FILENAME,
|
|
|
|
|
- FAVICON_FILENAME,
|
|
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
|
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
|
@@ -143,3 +145,56 @@ def render_legacy_template(template_path: str, context: Mapping[str, str]) -> st
|
|
|
with open(template_path, 'r', encoding='utf-8') as template:
|
|
with open(template_path, 'r', encoding='utf-8') as template:
|
|
|
template_str = template.read()
|
|
template_str = template.read()
|
|
|
return Template(template_str).substitute(**context)
|
|
return Template(template_str).substitute(**context)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def snapshot_icons(snapshot) -> str:
|
|
|
|
|
+ from core.models import Snapshot, EXTRACTORS
|
|
|
|
|
+
|
|
|
|
|
+ archive_results = snapshot.archiveresult_set.filter(status="succeeded")
|
|
|
|
|
+ link = snapshot.as_link()
|
|
|
|
|
+ path = link.archive_path
|
|
|
|
|
+ canon = link.canonical_outputs()
|
|
|
|
|
+ output = ""
|
|
|
|
|
+ output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
|
|
|
|
+ icons = {
|
|
|
|
|
+ "singlefile": "❶",
|
|
|
|
|
+ "wget": "🆆",
|
|
|
|
|
+ "dom": "🅷",
|
|
|
|
|
+ "pdf": "📄",
|
|
|
|
|
+ "screenshot": "💻",
|
|
|
|
|
+ "media": "📼",
|
|
|
|
|
+ "git": "🅶",
|
|
|
|
|
+ "archive_org": "🏛",
|
|
|
|
|
+ "readability": "🆁",
|
|
|
|
|
+ "mercury": "🅼",
|
|
|
|
|
+ "warc": "📦"
|
|
|
|
|
+ }
|
|
|
|
|
+ exclude = ["favicon", "title", "headers", "archive_org"]
|
|
|
|
|
+ # Missing specific entry for WARC
|
|
|
|
|
+
|
|
|
|
|
+ extractor_items = defaultdict(lambda: None)
|
|
|
|
|
+ for extractor, _ in EXTRACTORS:
|
|
|
|
|
+ for result in archive_results:
|
|
|
|
|
+ if result.extractor == extractor:
|
|
|
|
|
+ extractor_items[extractor] = result
|
|
|
|
|
+
|
|
|
|
|
+ for extractor, _ in EXTRACTORS:
|
|
|
|
|
+ if extractor not in exclude:
|
|
|
|
|
+ exists = extractor_items[extractor] is not None
|
|
|
|
|
+ output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
|
|
|
|
|
+ extractor, icons.get(extractor, "?"))
|
|
|
|
|
+ if extractor == "wget":
|
|
|
|
|
+ # warc isn't technically it's own extractor, so we have to add it after wget
|
|
|
|
|
+ exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
|
|
|
|
+ output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
|
|
|
|
+
|
|
|
|
|
+ if extractor == "archive_org":
|
|
|
|
|
+ # The check for archive_org is different, so it has to be handled separately
|
|
|
|
|
+ target_path = Path(path) / "archive.org.txt"
|
|
|
|
|
+ exists = target_path.exists()
|
|
|
|
|
+ output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
|
|
|
|
+ "archive_org", icons.get("archive_org", "?"))
|
|
|
|
|
+
|
|
|
|
|
+ return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
|