utils.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. from django.utils.html import format_html
  2. from collections import defaultdict
  3. from core.models import Snapshot, EXTRACTORS
  4. from pathlib import Path
  5. def get_icons(snapshot: Snapshot) -> str:
  6. archive_results = snapshot.archiveresult_set.filter(status="succeeded")
  7. link = snapshot.as_link()
  8. path = link.archive_path
  9. canon = link.canonical_outputs()
  10. output = ""
  11. output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
  12. icons = {
  13. "singlefile": "❶",
  14. "wget": "🆆",
  15. "dom": "🅷",
  16. "pdf": "📄",
  17. "screenshot": "💻",
  18. "media": "📼",
  19. "git": "🅶",
  20. "archive_org": "🏛",
  21. "readability": "🆁",
  22. "mercury": "🅼",
  23. "warc": "📦"
  24. }
  25. exclude = ["favicon", "title", "headers", "archive_org"]
  26. # Missing specific entry for WARC
  27. extractor_items = defaultdict(lambda: None)
  28. for extractor, _ in EXTRACTORS:
  29. for result in archive_results:
  30. if result.extractor == extractor:
  31. extractor_items[extractor] = result
  32. for extractor, _ in EXTRACTORS:
  33. if extractor not in exclude:
  34. exists = extractor_items[extractor] is not None
  35. output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
  36. extractor, icons.get(extractor, "?"))
  37. if extractor == "wget":
  38. # warc isn't technically it's own extractor, so we have to add it after wget
  39. exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
  40. if exists:
  41. output += output_template.format(exists[0], "", str(bool(exists)), "warc", icons.get("warc", "?"))
  42. if extractor == "archive_org":
  43. # The check for archive_org is different, so it has to be handled separately
  44. target_path = Path(path) / "archive.org.txt"
  45. exists = target_path.exists()
  46. output += '<a href="{}" class="exists-{}" title="{}">{} </a>'.format(canon["archive_org_path"], str(exists),
  47. "archive_org", icons.get("archive_org", "?"))
  48. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')