utils.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. from django.utils.html import format_html
  2. from core.models import Snapshot, EXTRACTORS
  3. def get_icons(snapshot: Snapshot) -> str:
  4. archive_results = snapshot.archiveresult_set
  5. link = snapshot.as_link()
  6. canon = link.canonical_outputs()
  7. output = ""
  8. output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
  9. icons = {
  10. "singlefile": "❶",
  11. "wget": "🆆",
  12. "dom": "🅷",
  13. "pdf": "📄",
  14. "screenshot": "💻",
  15. "media": "📼",
  16. "git": "🅶",
  17. "archive_org": "🏛",
  18. "readability": "🆁",
  19. "mercury": "🅼",
  20. "warc": "📦"
  21. }
  22. exclude = ["favicon"]
  23. # Missing specific entry for WARC
  24. for extractor, _ in EXTRACTORS:
  25. result = archive_results.filter(extractor=extractor, status="succeeded")
  26. path, exists = link.archive_path, result.exists()
  27. try:
  28. if extractor not in exclude:
  29. output += output_template.format(path, canon[f"{extractor}_path"],
  30. exists, extractor, icons.get(extractor, "?"))
  31. if extractor == "wget":
  32. # warc isn't technically it's own extractor, so we have to add it after wget
  33. output += output_template.format(path, canon["warc_path"],
  34. exists, "warc", icons.get("warc", "?"))
  35. except Exception as e:
  36. print(e)
  37. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')
  38. #def get_icons(snapshot: Snapshot) -> str:
  39. # link = snapshot.as_link()
  40. # canon = link.canonical_outputs()
  41. # out_dir = Path(link.link_dir)
  42. #
  43. # # slow version: highlights icons based on whether files exist or not for that output
  44. # # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  45. # # fast version: all icons are highlighted without checking for outputs in filesystem
  46. # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  47. #
  48. # return format_html(
  49. # '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
  50. # '<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
  51. # '<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
  52. # '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
  53. # '<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
  54. # '<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
  55. # '<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
  56. # '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
  57. # '<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
  58. # '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
  59. # '</span>',
  60. # *link_tuple(link, 'singlefile_path'),
  61. # *link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
  62. # *link_tuple(link, 'pdf_path'),
  63. # *link_tuple(link, 'screenshot_path'),
  64. # *link_tuple(link, 'dom_path'),
  65. # *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
  66. # *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
  67. # *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
  68. # canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
  69. # )
  70. #