admin_archiveresults.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. __package__ = 'archivebox.core'
  2. import os
  3. from pathlib import Path
  4. from django.contrib import admin
  5. from django.utils.html import format_html, mark_safe
  6. from django.core.exceptions import ValidationError
  7. from django.urls import reverse, resolve
  8. from django.utils import timezone
  9. from huey_monitor.admin import TaskModel
  10. import abx
  11. from archivebox.config import DATA_DIR
  12. from archivebox.config.common import SERVER_CONFIG
  13. from archivebox.misc.paginators import AccelleratedPaginator
  14. from archivebox.base_models.admin import ABIDModelAdmin
  15. from core.models import ArchiveResult, Snapshot
  16. def result_url(result: TaskModel) -> str:
  17. url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)])
  18. return format_html('<a href="{url}" class="fade-in-progress-url">See progress...</a>'.format(url=url))
  19. class ArchiveResultInline(admin.TabularInline):
  20. name = 'Archive Results Log'
  21. model = ArchiveResult
  22. parent_model = Snapshot
  23. # fk_name = 'snapshot'
  24. extra = 0
  25. sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
  26. readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
  27. fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'retry_at', 'output')
  28. # exclude = ('id',)
  29. ordering = ('end_ts',)
  30. show_change_link = True
  31. # # classes = ['collapse']
  32. # # list_display_links = ['abid']
  33. def get_parent_object_from_request(self, request):
  34. resolved = resolve(request.path_info)
  35. try:
  36. return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
  37. except (self.parent_model.DoesNotExist, ValidationError):
  38. return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
  39. @admin.display(
  40. description='Completed',
  41. ordering='end_ts',
  42. )
  43. def completed(self, obj):
  44. return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
  45. def result_id(self, obj):
  46. return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
  47. def command(self, obj):
  48. return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
  49. def version(self, obj):
  50. return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
  51. def get_formset(self, request, obj=None, **kwargs):
  52. formset = super().get_formset(request, obj, **kwargs)
  53. snapshot = self.get_parent_object_from_request(request)
  54. # import ipdb; ipdb.set_trace()
  55. # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
  56. # default values for new entries
  57. formset.form.base_fields['status'].initial = 'succeeded'
  58. formset.form.base_fields['start_ts'].initial = timezone.now()
  59. formset.form.base_fields['end_ts'].initial = timezone.now()
  60. formset.form.base_fields['cmd_version'].initial = '-'
  61. formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
  62. formset.form.base_fields['created_by'].initial = request.user
  63. formset.form.base_fields['cmd'].initial = '["-"]'
  64. formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
  65. if obj is not None:
  66. # hidden values for existing entries and new entries
  67. formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
  68. formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
  69. formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
  70. formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
  71. formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
  72. formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
  73. return formset
  74. def get_readonly_fields(self, request, obj=None):
  75. if obj is not None:
  76. return self.readonly_fields
  77. else:
  78. return []
  79. class ArchiveResultAdmin(ABIDModelAdmin):
  80. list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
  81. sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
  82. readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
  83. search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
  84. fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
  85. autocomplete_fields = ['snapshot']
  86. list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
  87. ordering = ['-start_ts']
  88. list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
  89. paginator = AccelleratedPaginator
  90. save_on_top = True
  91. actions = ['delete_selected']
  92. class Meta:
  93. verbose_name = 'Archive Result'
  94. verbose_name_plural = 'Archive Results'
  95. def change_view(self, request, object_id, form_url="", extra_context=None):
  96. self.request = request
  97. return super().change_view(request, object_id, form_url, extra_context)
  98. @admin.display(
  99. description='Snapshot Info'
  100. )
  101. def snapshot_info(self, result):
  102. return format_html(
  103. '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
  104. result.snapshot.timestamp,
  105. result.snapshot.abid,
  106. result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
  107. result.snapshot.url[:128],
  108. )
  109. @admin.display(
  110. description='Snapshot Tags'
  111. )
  112. def tags_str(self, result):
  113. return result.snapshot.tags_str()
  114. def cmd_str(self, result):
  115. return format_html(
  116. '<pre>{}</pre>',
  117. ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
  118. )
  119. def output_str(self, result):
  120. return format_html(
  121. '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
  122. result.snapshot.timestamp,
  123. result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
  124. result.output,
  125. )
  126. def output_summary(self, result):
  127. snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
  128. output_str = format_html(
  129. '<pre style="display: inline-block">{}</pre><br/>',
  130. result.output,
  131. )
  132. output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
  133. path_from_output_str = (snapshot_dir / (result.output or ''))
  134. output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
  135. if os.access(path_from_output_str, os.R_OK):
  136. root_dir = str(path_from_output_str)
  137. else:
  138. root_dir = str(snapshot_dir)
  139. # print(root_dir, str(list(os.walk(root_dir))))
  140. for root, dirs, files in os.walk(root_dir):
  141. depth = root.replace(root_dir, '').count(os.sep) + 1
  142. if depth > 2:
  143. continue
  144. indent = ' ' * 4 * (depth)
  145. output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
  146. indentation_str = ' ' * 4 * (depth + 1)
  147. for filename in sorted(files):
  148. is_hidden = filename.startswith('.')
  149. output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
  150. return output_str + format_html('</code></pre>')
  151. @abx.hookimpl
  152. def register_admin(admin_site):
  153. admin_site.register(ArchiveResult, ArchiveResultAdmin)