utils.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. from django.utils.html import format_html
  2. from core.models import Snapshot, EXTRACTORS
  3. def get_icons(snapshot: Snapshot) -> str:
  4. archive_results = snapshot.archiveresult_set
  5. link = snapshot.as_link()
  6. canon = link.canonical_outputs()
  7. output = ""
  8. output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
  9. icons = {
  10. "singlefile": "❶",
  11. "wget": "🆆",
  12. "dom": "🅷",
  13. "pdf": "📄",
  14. "screenshot": "💻",
  15. "media": "📼",
  16. "git": "🅶",
  17. "archive_org": "🏛",
  18. "readability": "🆁",
  19. "mercury": "🅼",
  20. }
  21. exclude = ["favicon"]
  22. # Missing specific entry for WARC
  23. for extractor in EXTRACTORS:
  24. result = archive_results.filter(extractor=extractor[0])
  25. try:
  26. if extractor[0] not in exclude:
  27. output += output_template.format(link.archive_path, canon[f"{extractor[0]}_path"],
  28. result.exists(), extractor[0], icons.get(extractor[0], "?"))
  29. except Exception as e:
  30. print(e)
  31. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')
  32. #def get_icons(snapshot: Snapshot) -> str:
  33. # link = snapshot.as_link()
  34. # canon = link.canonical_outputs()
  35. # out_dir = Path(link.link_dir)
  36. #
  37. # # slow version: highlights icons based on whether files exist or not for that output
  38. # # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  39. # # fast version: all icons are highlighted without checking for outputs in filesystem
  40. # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  41. #
  42. # return format_html(
  43. # '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
  44. # '<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
  45. # '<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
  46. # '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
  47. # '<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
  48. # '<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
  49. # '<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
  50. # '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
  51. # '<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
  52. # '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
  53. # '</span>',
  54. # *link_tuple(link, 'singlefile_path'),
  55. # *link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
  56. # *link_tuple(link, 'pdf_path'),
  57. # *link_tuple(link, 'screenshot_path'),
  58. # *link_tuple(link, 'dom_path'),
  59. # *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
  60. # *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
  61. # *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
  62. # canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
  63. # )
  64. #