utils.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. from django.utils.html import format_html
  2. from core.models import Snapshot, EXTRACTORS
  3. from core.settings import DEBUG
  4. from pathlib import Path
  5. def get_icons(snapshot: Snapshot) -> str:
  6. archive_results = list(snapshot.archiveresult_set.all())
  7. link = snapshot.as_link()
  8. canon = link.canonical_outputs()
  9. output = ""
  10. output_template = '<a href="/{}/{}" class="exists-True" title="{}">{} </a>'
  11. icons = {
  12. "singlefile": "❶",
  13. "wget": "🆆",
  14. "dom": "🅷",
  15. "pdf": "📄",
  16. "screenshot": "💻",
  17. "media": "📼",
  18. "git": "🅶",
  19. "archive_org": "🏛",
  20. "readability": "🆁",
  21. "mercury": "🅼",
  22. "warc": "📦"
  23. }
  24. exclude = ["favicon", "archive_org"]
  25. # Missing specific entry for WARC
  26. for extractor, _ in EXTRACTORS:
  27. for result in archive_results:
  28. if result.extractor != extractor or result.status != "succeeded":
  29. continue
  30. path = link.archive_path
  31. try:
  32. if extractor not in exclude:
  33. output += output_template.format(path, canon[f"{extractor}_path"],
  34. extractor, icons.get(extractor, "?"))
  35. if extractor == "wget":
  36. # warc isn't technically it's own extractor, so we have to add it after wget
  37. exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
  38. if exists:
  39. output += output_template.format(exists[0], "",
  40. "warc", icons.get("warc", "?"))
  41. if extractor == "archive_org":
  42. # The check for archive_org is different, so it has to be handled separately
  43. target_path = Path(path) / "archive.org.txt"
  44. exists = target_path.exists()
  45. if exists:
  46. output += '<a href="{}" class="exists-True" title="{}">{} </a>'.format(canon["archive_org_path"],
  47. "archive_org", icons.get("archive_org", "?"))
  48. except Exception as e:
  49. print(e)
  50. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')