utils.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. from django.utils.html import format_html
  2. from core.models import Snapshot, EXTRACTORS
  3. def get_icons(snapshot: Snapshot) -> str:
  4. archive_results = snapshot.archiveresult_set
  5. link = snapshot.as_link()
  6. canon = link.canonical_outputs()
  7. output = ""
  8. output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
  9. icons = {
  10. "singlefile": "❶",
  11. "wget": "🆆",
  12. "dom": "🅷",
  13. "pdf": "📄",
  14. "screenshot": "💻",
  15. "media": "📼",
  16. "git": "🅶",
  17. "archive_org": "🏛",
  18. "readability": "🆁",
  19. "mercury": "🅼",
  20. "warc": "📦"
  21. }
  22. exclude = ["favicon"]
  23. # Missing specific entry for WARC
  24. for extractor in EXTRACTORS:
  25. result = archive_results.filter(extractor=extractor[0], status="succeeded")
  26. try:
  27. if extractor[0] not in exclude:
  28. output += output_template.format(link.archive_path, canon[f"{extractor[0]}_path"],
  29. result.exists(), extractor[0], icons.get(extractor[0], "?"))
  30. if extractor[0] == "wget":
  31. extractor = "warc"
  32. output += output_template.format(link.archive_path, canon[f"{extractor}_path"],
  33. result.exists(), extractor, icons.get(extractor, "?"))
  34. except Exception as e:
  35. print(e)
  36. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')
  37. #def get_icons(snapshot: Snapshot) -> str:
  38. # link = snapshot.as_link()
  39. # canon = link.canonical_outputs()
  40. # out_dir = Path(link.link_dir)
  41. #
  42. # # slow version: highlights icons based on whether files exist or not for that output
  43. # # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  44. # # fast version: all icons are highlighted without checking for outputs in filesystem
  45. # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  46. #
  47. # return format_html(
  48. # '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
  49. # '<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
  50. # '<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
  51. # '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
  52. # '<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
  53. # '<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
  54. # '<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
  55. # '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
  56. # '<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
  57. # '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
  58. # '</span>',
  59. # *link_tuple(link, 'singlefile_path'),
  60. # *link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
  61. # *link_tuple(link, 'pdf_path'),
  62. # *link_tuple(link, 'screenshot_path'),
  63. # *link_tuple(link, 'dom_path'),
  64. # *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
  65. # *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
  66. # *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
  67. # canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
  68. # )
  69. #