admin.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. __package__ = 'archivebox.core'
  2. from io import StringIO
  3. from contextlib import redirect_stdout
  4. from pathlib import Path
  5. from django.contrib import admin
  6. from django.urls import path
  7. from django.utils.html import format_html
  8. from django.utils.safestring import mark_safe
  9. from django.shortcuts import render, redirect
  10. from django.contrib.auth import get_user_model
  11. from core.models import Snapshot
  12. from core.forms import AddLinkForm
  13. from util import htmldecode, urldecode, ansi_to_html
  14. from logging_util import printable_filesize
  15. from main import add, remove
  16. from config import OUTPUT_DIR
  17. from extractors import archive_links
  18. # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
  19. def update_snapshots(modeladmin, request, queryset):
  20. archive_links([
  21. snapshot.as_link()
  22. for snapshot in queryset
  23. ], out_dir=OUTPUT_DIR)
  24. update_snapshots.short_description = "Archive"
  25. def update_titles(modeladmin, request, queryset):
  26. archive_links([
  27. snapshot.as_link()
  28. for snapshot in queryset
  29. ], overwrite=True, methods=('title',), out_dir=OUTPUT_DIR)
  30. update_titles.short_description = "Pull title"
  31. def overwrite_snapshots(modeladmin, request, queryset):
  32. archive_links([
  33. snapshot.as_link()
  34. for snapshot in queryset
  35. ], overwrite=True, out_dir=OUTPUT_DIR)
  36. overwrite_snapshots.short_description = "Re-archive (overwrite)"
  37. def verify_snapshots(modeladmin, request, queryset):
  38. for snapshot in queryset:
  39. print(snapshot.timestamp, snapshot.url, snapshot.is_archived, snapshot.archive_size, len(snapshot.history))
  40. verify_snapshots.short_description = "Check"
  41. def delete_snapshots(modeladmin, request, queryset):
  42. remove(links=[snapshot.as_link() for snapshot in queryset], yes=True, delete=True, out_dir=OUTPUT_DIR)
  43. delete_snapshots.short_description = "Delete"
  44. class SnapshotAdmin(admin.ModelAdmin):
  45. list_display = ('added', 'title_str', 'url_str', 'files', 'size')
  46. sort_fields = ('title_str', 'url_str', 'added')
  47. readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
  48. search_fields = ('url', 'timestamp', 'title', 'tags')
  49. fields = ('title', 'tags', *readonly_fields)
  50. list_filter = ('added', 'updated', 'tags')
  51. ordering = ['-added']
  52. actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
  53. actions_template = 'admin/actions_as_select.html'
  54. def id_str(self, obj):
  55. return format_html(
  56. '<code style="font-size: 10px">{}</code>',
  57. obj.url_hash[:8],
  58. )
  59. def title_str(self, obj):
  60. canon = obj.as_link().canonical_outputs()
  61. tags = ''.join(
  62. format_html('<span>{}</span>', tag.strip())
  63. for tag in obj.tags.split(',')
  64. ) if obj.tags else ''
  65. return format_html(
  66. '<a href="/{}">'
  67. '<img src="/{}/{}" class="favicon" onerror="this.remove()">'
  68. '</a>'
  69. '<a href="/{}/{}">'
  70. '<b class="status-{}">{}</b>'
  71. '</a>',
  72. obj.archive_path,
  73. obj.archive_path, canon['favicon_path'],
  74. obj.archive_path, canon['wget_path'] or '',
  75. 'fetched' if obj.latest_title or obj.title else 'pending',
  76. urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
  77. ) + mark_safe(f'<span class="tags">{tags}</span>')
  78. def files(self, obj):
  79. link = obj.as_link()
  80. canon = link.canonical_outputs()
  81. out_dir = Path(link.link_dir)
  82. link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
  83. return format_html(
  84. '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
  85. '<a href="/{}/{}/" class="exists-{}" title="Wget clone">🌐 </a> '
  86. '<a href="/{}/{}" class="exists-{}" title="PDF">📄</a> '
  87. '<a href="/{}/{}" class="exists-{}" title="Screenshot">🖥 </a> '
  88. '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
  89. '<a href="/{}/{}/" class="exists-{}" title="WARC">🆆 </a> '
  90. '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
  91. '<a href="/{}/{}/" class="exists-{}" title="Git repos">📦 </a> '
  92. '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
  93. '</span>',
  94. *link_tuple(link, 'wget_path'),
  95. *link_tuple(link, 'pdf_path'),
  96. *link_tuple(link, 'screenshot_path'),
  97. *link_tuple(link, 'dom_path'),
  98. *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
  99. *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
  100. *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
  101. canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
  102. )
  103. def size(self, obj):
  104. return format_html(
  105. '<a href="/{}" title="View all files">{}</a>',
  106. obj.archive_path,
  107. printable_filesize(obj.archive_size) if obj.archive_size else 'pending',
  108. )
  109. def url_str(self, obj):
  110. return format_html(
  111. '<a href="{}">{}</a>',
  112. obj.url,
  113. obj.url.split('://www.', 1)[-1].split('://', 1)[-1][:64],
  114. )
  115. id_str.short_description = 'ID'
  116. title_str.short_description = 'Title'
  117. url_str.short_description = 'Original URL'
  118. id_str.admin_order_field = 'id'
  119. title_str.admin_order_field = 'title'
  120. url_str.admin_order_field = 'url'
  121. class ArchiveBoxAdmin(admin.AdminSite):
  122. site_header = 'ArchiveBox'
  123. index_title = 'Links'
  124. site_title = 'Index'
  125. def get_urls(self):
  126. return [
  127. path('core/snapshot/add/', self.add_view, name='Add'),
  128. ] + super().get_urls()
  129. def add_view(self, request):
  130. if not request.user.is_authenticated:
  131. return redirect(f'/admin/login/?next={request.path}')
  132. request.current_app = self.name
  133. context = {
  134. **self.each_context(request),
  135. 'title': 'Add URLs',
  136. }
  137. if request.method == 'GET':
  138. context['form'] = AddLinkForm()
  139. elif request.method == 'POST':
  140. form = AddLinkForm(request.POST)
  141. if form.is_valid():
  142. url = form.cleaned_data["url"]
  143. print(f'[+] Adding URL: {url}')
  144. depth = 0 if form.cleaned_data["depth"] == "0" else 1
  145. input_kwargs = {
  146. "urls": url,
  147. "depth": depth,
  148. "update_all": False,
  149. "out_dir": OUTPUT_DIR,
  150. }
  151. add_stdout = StringIO()
  152. with redirect_stdout(add_stdout):
  153. add(**input_kwargs)
  154. print(add_stdout.getvalue())
  155. context.update({
  156. "stdout": ansi_to_html(add_stdout.getvalue().strip()),
  157. "form": AddLinkForm()
  158. })
  159. else:
  160. context["form"] = form
  161. return render(template_name='add_links.html', request=request, context=context)
  162. admin.site = ArchiveBoxAdmin()
  163. admin.site.register(get_user_model())
  164. admin.site.register(Snapshot, SnapshotAdmin)
  165. admin.site.disable_action('delete_selected')