admin_archiveresults.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. __package__ = 'archivebox.core'
  2. import os
  3. from pathlib import Path
  4. from django.contrib import admin
  5. from django.utils.html import format_html, mark_safe
  6. from django.core.exceptions import ValidationError
  7. from django.urls import reverse, resolve
  8. from django.utils import timezone
  9. from django_jsonform.forms.fields import JSONFormField
  10. from huey_monitor.admin import TaskModel
  11. import abx
  12. from archivebox.config import DATA_DIR
  13. from archivebox.config.common import SERVER_CONFIG
  14. from archivebox.misc.paginators import AccelleratedPaginator
  15. from abid_utils.admin import ABIDModelAdmin
  16. from core.models import ArchiveResult, Snapshot
  17. def result_url(result: TaskModel) -> str:
  18. url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)])
  19. return format_html('<a href="{url}" class="fade-in-progress-url">See progress...</a>'.format(url=url))
  20. class ArchiveResultInline(admin.TabularInline):
  21. name = 'Archive Results Log'
  22. model = ArchiveResult
  23. parent_model = Snapshot
  24. # fk_name = 'snapshot'
  25. extra = 0
  26. sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
  27. readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
  28. fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
  29. # exclude = ('id',)
  30. ordering = ('end_ts',)
  31. show_change_link = True
  32. # # classes = ['collapse']
  33. # # list_display_links = ['abid']
  34. def get_parent_object_from_request(self, request):
  35. resolved = resolve(request.path_info)
  36. try:
  37. return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
  38. except (self.parent_model.DoesNotExist, ValidationError):
  39. return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
  40. @admin.display(
  41. description='Completed',
  42. ordering='end_ts',
  43. )
  44. def completed(self, obj):
  45. return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
  46. def result_id(self, obj):
  47. return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
  48. def command(self, obj):
  49. return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
  50. def version(self, obj):
  51. return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
  52. def get_formset(self, request, obj=None, **kwargs):
  53. formset = super().get_formset(request, obj, **kwargs)
  54. snapshot = self.get_parent_object_from_request(request)
  55. # import ipdb; ipdb.set_trace()
  56. # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
  57. # default values for new entries
  58. formset.form.base_fields['status'].initial = 'succeeded'
  59. formset.form.base_fields['start_ts'].initial = timezone.now()
  60. formset.form.base_fields['end_ts'].initial = timezone.now()
  61. formset.form.base_fields['cmd_version'].initial = '-'
  62. formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
  63. formset.form.base_fields['created_by'].initial = request.user
  64. formset.form.base_fields['cmd'] = JSONFormField(initial=['-'])
  65. formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
  66. if obj is not None:
  67. # hidden values for existing entries and new entries
  68. formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
  69. formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
  70. formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
  71. formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
  72. formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
  73. formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
  74. return formset
  75. def get_readonly_fields(self, request, obj=None):
  76. if obj is not None:
  77. return self.readonly_fields
  78. else:
  79. return []
  80. class ArchiveResultAdmin(ABIDModelAdmin):
  81. list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
  82. sort_fields = ('start_ts', 'extractor', 'status')
  83. readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
  84. search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
  85. fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
  86. autocomplete_fields = ['snapshot']
  87. list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
  88. ordering = ['-start_ts']
  89. list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
  90. paginator = AccelleratedPaginator
  91. save_on_top = True
  92. actions = ['delete_selected']
  93. class Meta:
  94. verbose_name = 'Archive Result'
  95. verbose_name_plural = 'Archive Results'
  96. def change_view(self, request, object_id, form_url="", extra_context=None):
  97. self.request = request
  98. return super().change_view(request, object_id, form_url, extra_context)
  99. @admin.display(
  100. description='Snapshot Info'
  101. )
  102. def snapshot_info(self, result):
  103. return format_html(
  104. '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
  105. result.snapshot.timestamp,
  106. result.snapshot.abid,
  107. result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
  108. result.snapshot.url[:128],
  109. )
  110. @admin.display(
  111. description='Snapshot Tags'
  112. )
  113. def tags_str(self, result):
  114. return result.snapshot.tags_str()
  115. def cmd_str(self, result):
  116. return format_html(
  117. '<pre>{}</pre>',
  118. ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
  119. )
  120. def output_str(self, result):
  121. return format_html(
  122. '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
  123. result.snapshot.timestamp,
  124. result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
  125. result.output,
  126. )
  127. def output_summary(self, result):
  128. snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
  129. output_str = format_html(
  130. '<pre style="display: inline-block">{}</pre><br/>',
  131. result.output,
  132. )
  133. output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
  134. path_from_output_str = (snapshot_dir / result.output)
  135. output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
  136. if os.access(path_from_output_str, os.R_OK):
  137. root_dir = str(path_from_output_str)
  138. else:
  139. root_dir = str(snapshot_dir)
  140. # print(root_dir, str(list(os.walk(root_dir))))
  141. for root, dirs, files in os.walk(root_dir):
  142. depth = root.replace(root_dir, '').count(os.sep) + 1
  143. if depth > 2:
  144. continue
  145. indent = ' ' * 4 * (depth)
  146. output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
  147. indentation_str = ' ' * 4 * (depth + 1)
  148. for filename in sorted(files):
  149. is_hidden = filename.startswith('.')
  150. output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
  151. return output_str + format_html('</code></pre>')
  152. @abx.hookimpl
  153. def register_admin(admin_site):
  154. admin_site.register(ArchiveResult, ArchiveResultAdmin)