utils.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from django.utils.html import format_html
  2. from core.models import Snapshot, EXTRACTORS
  3. from pathlib import Path
  4. def get_icons(snapshot: Snapshot) -> str:
  5. archive_results = snapshot.archiveresult_set.filter(status="succeeded")
  6. link = snapshot.as_link()
  7. canon = link.canonical_outputs()
  8. output = ""
  9. output_template = '<a href="/{}/{}" class="exists-True" title="{}">{} </a>'
  10. icons = {
  11. "singlefile": "❶",
  12. "wget": "🆆",
  13. "dom": "🅷",
  14. "pdf": "📄",
  15. "screenshot": "💻",
  16. "media": "📼",
  17. "git": "🅶",
  18. "archive_org": "🏛",
  19. "readability": "🆁",
  20. "mercury": "🅼",
  21. "warc": "📦"
  22. }
  23. exclude = ["favicon", "archive_org"]
  24. # Missing specific entry for WARC
  25. for extractor, _ in EXTRACTORS:
  26. for result in archive_results:
  27. if result.extractor != extractor:
  28. continue
  29. path = link.archive_path
  30. try:
  31. if extractor not in exclude:
  32. output += output_template.format(path, canon[f"{extractor}_path"],
  33. extractor, icons.get(extractor, "?"))
  34. if extractor == "wget":
  35. # warc isn't technically it's own extractor, so we have to add it after wget
  36. exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
  37. if exists:
  38. output += output_template.format(exists[0], "",
  39. "warc", icons.get("warc", "?"))
  40. if extractor == "archive_org":
  41. # The check for archive_org is different, so it has to be handled separately
  42. target_path = Path(path) / "archive.org.txt"
  43. exists = target_path.exists()
  44. if exists:
  45. output += '<a href="{}" class="exists-True" title="{}">{} </a>'.format(canon["archive_org_path"],
  46. "archive_org", icons.get("archive_org", "?"))
  47. except Exception as e:
  48. print(e)
  49. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')