utils.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from pathlib import Path
  2. from django.utils.html import format_html
  3. from core.models import Snapshot, ArchiveResult, EXTRACTORS
  4. def get_icons(snapshot: Snapshot) -> str:
  5. archive_results = snapshot.archiveresult_set
  6. link = snapshot.as_link()
  7. canon = link.canonical_outputs()
  8. output = ""
  9. output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
  10. icons = {
  11. "singlefile": "❶",
  12. "wget": "🆆",
  13. "dom": "🅷",
  14. "pdf": "📄",
  15. "screenshot": "💻",
  16. "media": "📼",
  17. "git": "🅶",
  18. "archive_org": "🏛",
  19. "readability": "🆁",
  20. "mercury": "🅼",
  21. }
  22. exclude = ["favicon"]
  23. # Missing specific entry for WARC
  24. for extractor in EXTRACTORS:
  25. result = archive_results.filter(extractor=extractor[0])
  26. try:
  27. if extractor[0] not in exclude:
  28. output += output_template.format(link.archive_path, canon[f"{extractor[0]}_path"],
  29. result.exists(), extractor[0], icons.get(extractor[0], "?"))
  30. except Exception as e:
  31. print(e)
  32. return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')
  33. #def get_icons(snapshot: Snapshot) -> str:
  34. # link = snapshot.as_link()
  35. # canon = link.canonical_outputs()
  36. # out_dir = Path(link.link_dir)
  37. #
  38. # # slow version: highlights icons based on whether files exist or not for that output
  39. # # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  40. # # fast version: all icons are highlighted without checking for outputs in filesystem
  41. # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  42. #
  43. # return format_html(
  44. # '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
  45. # '<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
  46. # '<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
  47. # '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
  48. # '<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
  49. # '<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
  50. # '<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
  51. # '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
  52. # '<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
  53. # '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
  54. # '</span>',
  55. # *link_tuple(link, 'singlefile_path'),
  56. # *link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
  57. # *link_tuple(link, 'pdf_path'),
  58. # *link_tuple(link, 'screenshot_path'),
  59. # *link_tuple(link, 'dom_path'),
  60. # *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
  61. # *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
  62. # *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
  63. # canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
  64. # )
  65. #