admin.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. __package__ = 'archivebox.core'
  2. import os
  3. import json
  4. from io import StringIO
  5. from pathlib import Path
  6. from contextlib import redirect_stdout
  7. from datetime import datetime, timezone
  8. from typing import Dict, Any
  9. from django.contrib import admin
  10. from django.db.models import Count, Q, Prefetch
  11. from django.urls import path, reverse, resolve
  12. from django.utils import timezone
  13. from django.utils.functional import cached_property
  14. from django.utils.html import format_html
  15. from django.utils.safestring import mark_safe
  16. from django.shortcuts import render, redirect
  17. from django.contrib.auth import get_user_model
  18. from django.core.paginator import Paginator
  19. from django.core.exceptions import ValidationError
  20. from django.conf import settings
  21. from django import forms
  22. from signal_webhooks.admin import WebhookAdmin
  23. from signal_webhooks.utils import get_webhook_model
  24. # from plugantic.admin import CustomPlugin
  25. from ..util import htmldecode, urldecode, ansi_to_html
  26. from core.models import Snapshot, ArchiveResult, Tag, SnapshotTag
  27. from core.forms import AddLinkForm
  28. from core.mixins import SearchResultsAdminMixin
  29. from api.models import APIToken
  30. from abid_utils.models import get_or_create_system_user_pk
  31. from abid_utils.admin import ABIDModelAdmin
  32. from index.html import snapshot_icons
  33. from logging_util import printable_filesize
  34. from main import add, remove
  35. from extractors import archive_links
  36. CONFIG = settings.CONFIG
  37. GLOBAL_CONTEXT = {'VERSION': CONFIG.VERSION, 'VERSIONS_AVAILABLE': CONFIG.VERSIONS_AVAILABLE, 'CAN_UPGRADE': CONFIG.CAN_UPGRADE}
  38. # Admin URLs
  39. # /admin/
  40. # /admin/login/
  41. # /admin/core/
  42. # /admin/core/snapshot/
  43. # /admin/core/snapshot/:uuid/
  44. # /admin/core/tag/
  45. # /admin/core/tag/:uuid/
  46. # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
  47. class ArchiveBoxAdmin(admin.AdminSite):
  48. site_header = 'ArchiveBox'
  49. index_title = 'Links'
  50. site_title = 'Index'
  51. namespace = 'admin'
  52. def get_urls(self):
  53. return [
  54. path('core/snapshot/add/', self.add_view, name='Add'),
  55. ] + super().get_urls()
  56. def add_view(self, request):
  57. if not request.user.is_authenticated:
  58. return redirect(f'/admin/login/?next={request.path}')
  59. request.current_app = self.name
  60. context: Dict[str, Any] = {
  61. **self.each_context(request),
  62. 'title': 'Add URLs',
  63. }
  64. if request.method == 'GET':
  65. context['form'] = AddLinkForm()
  66. elif request.method == 'POST':
  67. form = AddLinkForm(request.POST)
  68. if form.is_valid():
  69. url = form.cleaned_data["url"]
  70. print(f'[+] Adding URL: {url}')
  71. depth = 0 if form.cleaned_data["depth"] == "0" else 1
  72. input_kwargs = {
  73. "urls": url,
  74. "depth": depth,
  75. "update_all": False,
  76. "out_dir": CONFIG.OUTPUT_DIR,
  77. }
  78. add_stdout = StringIO()
  79. with redirect_stdout(add_stdout):
  80. add(**input_kwargs)
  81. print(add_stdout.getvalue())
  82. context.update({
  83. "stdout": ansi_to_html(add_stdout.getvalue().strip()),
  84. "form": AddLinkForm(),
  85. })
  86. else:
  87. context["form"] = form
  88. return render(template_name='add.html', request=request, context=context)
  89. archivebox_admin = ArchiveBoxAdmin()
  90. archivebox_admin.register(get_user_model())
  91. archivebox_admin.disable_action('delete_selected')
  92. # archivebox_admin.register(CustomPlugin)
  93. # patch admin with methods to add data views (implemented by admin_data_views package)
  94. # https://github.com/MrThearMan/django-admin-data-views
  95. # https://mrthearman.github.io/django-admin-data-views/setup/
  96. ############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
  97. from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
  98. archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
  99. archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
  100. archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
  101. archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
  102. class AccelleratedPaginator(Paginator):
  103. """
  104. Accellerated Pagniator ignores DISTINCT when counting total number of rows.
  105. Speeds up SELECT Count(*) on Admin views by >20x.
  106. https://hakibenita.com/optimizing-the-django-admin-paginator
  107. """
  108. @cached_property
  109. def count(self):
  110. if self.object_list._has_filters(): # type: ignore
  111. # fallback to normal count method on filtered queryset
  112. return super().count
  113. else:
  114. # otherwise count total rows in a separate fast query
  115. return self.object_list.model.objects.count()
  116. # Alternative approach for PostgreSQL: fallback count takes > 200ms
  117. # from django.db import connection, transaction, OperationalError
  118. # with transaction.atomic(), connection.cursor() as cursor:
  119. # cursor.execute('SET LOCAL statement_timeout TO 200;')
  120. # try:
  121. # return super().count
  122. # except OperationalError:
  123. # return 9999999999999
  124. class ArchiveResultInline(admin.TabularInline):
  125. name = 'Archive Results Log'
  126. model = ArchiveResult
  127. parent_model = Snapshot
  128. # fk_name = 'snapshot'
  129. extra = 0
  130. sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
  131. readonly_fields = ('result_id', 'completed', 'extractor', 'command', 'version')
  132. fields = ('id', 'start_ts', 'end_ts', *readonly_fields, 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
  133. # exclude = ('id',)
  134. ordering = ('end_ts',)
  135. show_change_link = True
  136. # # classes = ['collapse']
  137. # # list_display_links = ['abid']
  138. def get_parent_object_from_request(self, request):
  139. resolved = resolve(request.path_info)
  140. return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
  141. @admin.display(
  142. description='Completed',
  143. ordering='end_ts',
  144. )
  145. def completed(self, obj):
  146. return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
  147. def result_id(self, obj):
  148. return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
  149. def command(self, obj):
  150. return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
  151. def version(self, obj):
  152. return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
  153. def get_formset(self, request, obj=None, **kwargs):
  154. formset = super().get_formset(request, obj, **kwargs)
  155. snapshot = self.get_parent_object_from_request(request)
  156. # import ipdb; ipdb.set_trace()
  157. formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
  158. # default values for new entries
  159. formset.form.base_fields['status'].initial = 'succeeded'
  160. formset.form.base_fields['start_ts'].initial = timezone.now()
  161. formset.form.base_fields['end_ts'].initial = timezone.now()
  162. formset.form.base_fields['cmd_version'].initial = '-'
  163. formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
  164. formset.form.base_fields['created_by'].initial = request.user
  165. formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
  166. formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
  167. if obj is not None:
  168. # hidden values for existing entries and new entries
  169. formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
  170. formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
  171. formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
  172. formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
  173. formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
  174. formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
  175. return formset
  176. def get_readonly_fields(self, request, obj=None):
  177. if obj is not None:
  178. return self.readonly_fields
  179. else:
  180. return []
  181. class TagInline(admin.TabularInline):
  182. model = Tag.snapshot_set.through # type: ignore
  183. # fk_name = 'snapshot'
  184. fields = ('id', 'tag')
  185. extra = 1
  186. # min_num = 1
  187. max_num = 1000
  188. autocomplete_fields = (
  189. 'tag',
  190. )
  191. from django.contrib.admin.helpers import ActionForm
  192. from django.contrib.admin.widgets import FilteredSelectMultiple
  193. # class AutocompleteTags:
  194. # model = Tag
  195. # search_fields = ['name']
  196. # name = 'name'
  197. # # source_field = 'name'
  198. # remote_field = Tag._meta.get_field('name')
  199. # class AutocompleteTagsAdminStub:
  200. # name = 'admin'
  201. class SnapshotActionForm(ActionForm):
  202. tags = forms.ModelMultipleChoiceField(
  203. queryset=Tag.objects.all(),
  204. required=False,
  205. widget=FilteredSelectMultiple(
  206. 'core_tag__name',
  207. False,
  208. ),
  209. )
  210. # TODO: allow selecting actions for specific extractors? is this useful?
  211. # extractor = forms.ChoiceField(
  212. # choices=ArchiveResult.EXTRACTOR_CHOICES,
  213. # required=False,
  214. # widget=forms.MultileChoiceField(attrs={'class': "form-control"})
  215. # )
  216. def get_abid_info(self, obj):
  217. return format_html(
  218. # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
  219. '''
  220. <a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> &nbsp; &nbsp; <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
  221. <br/><hr/>
  222. <div style="opacity: 0.8">
  223. &nbsp; &nbsp; <small style="opacity: 0.8">.abid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/>
  224. &nbsp; &nbsp; <small style="opacity: 0.8">.abid.uuid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/>
  225. &nbsp; &nbsp; <small style="opacity: 0.8">.id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/>
  226. <hr/>
  227. &nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; {}</code> &nbsp; &nbsp; &nbsp;&nbsp; {}: <code style="user-select: all">{}</code><br/>
  228. &nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; "><b style="user-select: all">{}</b> &nbsp; &nbsp; {}</code> &nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; <span style="display:inline-block; vertical-align: -4px; width: 290px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}: <code style="user-select: all">{}</code></span>
  229. &nbsp; SALT: &nbsp; <code style="font-size: 10px;"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code><br/>
  230. &nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}: <code style="user-select: all">{}</code><br/>
  231. &nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}: <code style="user-select: all">{}</code>
  232. <br/><hr/>
  233. &nbsp; &nbsp; <small style="opacity: 0.5">.old_id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/>
  234. </div>
  235. ''',
  236. obj.api_url, obj.api_url, obj.api_docs_url,
  237. str(obj.abid),
  238. str(obj.ABID.uuid),
  239. str(obj.id),
  240. obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
  241. obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']),
  242. obj.ABID.uri_salt,
  243. obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']),
  244. obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
  245. str(getattr(obj, 'old_id', '')),
  246. )
  247. @admin.register(Snapshot, site=archivebox_admin)
  248. class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
  249. list_display = ('added', 'title_str', 'files', 'size', 'url_str')
  250. sort_fields = ('title_str', 'url_str', 'added', 'files')
  251. readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
  252. search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
  253. list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
  254. fields = ('url', 'created_by', 'title', *readonly_fields)
  255. ordering = ['-added']
  256. actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
  257. inlines = [TagInline, ArchiveResultInline]
  258. list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000)
  259. action_form = SnapshotActionForm
  260. paginator = AccelleratedPaginator
  261. save_on_top = True
  262. show_full_result_count = False
  263. def changelist_view(self, request, extra_context=None):
  264. extra_context = extra_context or {}
  265. try:
  266. return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
  267. except Exception as e:
  268. self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
  269. return super().changelist_view(request, GLOBAL_CONTEXT)
  270. def change_view(self, request, object_id, form_url="", extra_context=None):
  271. snapshot = None
  272. try:
  273. snapshot = snapshot or Snapshot.objects.get(id=object_id)
  274. except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
  275. pass
  276. try:
  277. snapshot = snapshot or Snapshot.objects.get(abid=Snapshot.abid_prefix + object_id.split('_', 1)[-1])
  278. except (Snapshot.DoesNotExist, ValidationError):
  279. pass
  280. try:
  281. snapshot = snapshot or Snapshot.objects.get(old_id=object_id)
  282. except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
  283. pass
  284. if snapshot:
  285. object_id = str(snapshot.id)
  286. return super().change_view(
  287. request,
  288. object_id,
  289. form_url,
  290. extra_context=extra_context,
  291. )
  292. def get_urls(self):
  293. urls = super().get_urls()
  294. custom_urls = [
  295. path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
  296. ]
  297. return custom_urls + urls
  298. # def get_queryset(self, request):
  299. # # tags_qs = SnapshotTag.objects.all().select_related('tag')
  300. # # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
  301. # self.request = request
  302. # return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
  303. def tag_list(self, obj):
  304. return ', '.join(tag.name for tag in obj.tags.all())
  305. # TODO: figure out a different way to do this, you cant nest forms so this doenst work
  306. # def action(self, obj):
  307. # # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
  308. # # action: update_snapshots
  309. # # select_across: 0
  310. # # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
  311. # return format_html(
  312. # '''
  313. # <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
  314. # <input type="hidden" name="csrfmiddlewaretoken" value="{}">
  315. # <input type="hidden" name="_selected_action" value="{}">
  316. # <button name="update_snapshots">Check</button>
  317. # <button name="update_titles">Pull title + favicon</button>
  318. # <button name="update_snapshots">Update</button>
  319. # <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
  320. # <button name="delete_snapshots">Permanently delete</button>
  321. # </form>
  322. # ''',
  323. # csrf.get_token(self.request),
  324. # obj.pk,
  325. # )
  326. def admin_actions(self, obj):
  327. return format_html(
  328. # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
  329. '''
  330. <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a> &nbsp; &nbsp;
  331. <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a> &nbsp; &nbsp;
  332. <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
  333. ''',
  334. obj.timestamp,
  335. obj.timestamp,
  336. obj.pk,
  337. )
  338. def status_info(self, obj):
  339. return format_html(
  340. # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
  341. '''
  342. Archived: {} ({} files {}) &nbsp; &nbsp;
  343. Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
  344. Status code: {} &nbsp; &nbsp;<br/>
  345. Server: {} &nbsp; &nbsp;
  346. Content type: {} &nbsp; &nbsp;
  347. Extension: {} &nbsp; &nbsp;
  348. ''',
  349. '✅' if obj.is_archived else '❌',
  350. obj.num_outputs,
  351. self.size(obj) or '0kb',
  352. f'/archive/{obj.timestamp}/favicon.ico',
  353. obj.status_code or '-',
  354. obj.headers and obj.headers.get('Server') or '-',
  355. obj.headers and obj.headers.get('Content-Type') or '-',
  356. obj.extension or '-',
  357. )
  358. def API(self, obj):
  359. try:
  360. return get_abid_info(self, obj)
  361. except Exception as e:
  362. return str(e)
  363. @admin.display(
  364. description='Title',
  365. ordering='title',
  366. )
  367. def title_str(self, obj):
  368. tags = ''.join(
  369. format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.pk, tag.name)
  370. for tag in obj.tags.all()
  371. if str(tag.name).strip()
  372. )
  373. return format_html(
  374. '<a href="/{}">'
  375. '<img src="/{}/favicon.ico" class="favicon" onerror="this.remove()">'
  376. '</a>'
  377. '<a href="/{}/index.html">'
  378. '<b class="status-{}">{}</b>'
  379. '</a>',
  380. obj.archive_path,
  381. obj.archive_path,
  382. obj.archive_path,
  383. 'fetched' if obj.latest_title or obj.title else 'pending',
  384. urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
  385. ) + mark_safe(f' <span class="tags">{tags}</span>')
  386. @admin.display(
  387. description='Files Saved',
  388. # ordering='archiveresult_count',
  389. )
  390. def files(self, obj):
  391. return snapshot_icons(obj)
  392. @admin.display(
  393. # ordering='archiveresult_count'
  394. )
  395. def size(self, obj):
  396. archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
  397. if archive_size:
  398. size_txt = printable_filesize(archive_size)
  399. if archive_size > 52428800:
  400. size_txt = mark_safe(f'<b>{size_txt}</b>')
  401. else:
  402. size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
  403. return format_html(
  404. '<a href="/{}" title="View all files">{}</a>',
  405. obj.archive_path,
  406. size_txt,
  407. )
  408. @admin.display(
  409. description='Original URL',
  410. ordering='url',
  411. )
  412. def url_str(self, obj):
  413. return format_html(
  414. '<a href="{}"><code style="user-select: all;">{}</code></a>',
  415. obj.url,
  416. obj.url[:128],
  417. )
  418. def grid_view(self, request, extra_context=None):
  419. # cl = self.get_changelist_instance(request)
  420. # Save before monkey patching to restore for changelist list view
  421. saved_change_list_template = self.change_list_template
  422. saved_list_per_page = self.list_per_page
  423. saved_list_max_show_all = self.list_max_show_all
  424. # Monkey patch here plus core_tags.py
  425. self.change_list_template = 'private_index_grid.html'
  426. self.list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
  427. self.list_max_show_all = self.list_per_page
  428. # Call monkey patched view
  429. rendered_response = self.changelist_view(request, extra_context=extra_context)
  430. # Restore values
  431. self.change_list_template = saved_change_list_template
  432. self.list_per_page = saved_list_per_page
  433. self.list_max_show_all = saved_list_max_show_all
  434. return rendered_response
  435. # for debugging, uncomment this to print all requests:
  436. # def changelist_view(self, request, extra_context=None):
  437. # print('[*] Got request', request.method, request.POST)
  438. # return super().changelist_view(request, extra_context=None)
  439. @admin.action(
  440. description="Pull"
  441. )
  442. def update_snapshots(self, request, queryset):
  443. archive_links([
  444. snapshot.as_link()
  445. for snapshot in queryset
  446. ], out_dir=CONFIG.OUTPUT_DIR)
  447. @admin.action(
  448. description="⬇️ Title"
  449. )
  450. def update_titles(self, request, queryset):
  451. archive_links([
  452. snapshot.as_link()
  453. for snapshot in queryset
  454. ], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
  455. @admin.action(
  456. description="Re-Snapshot"
  457. )
  458. def resnapshot_snapshot(self, request, queryset):
  459. for snapshot in queryset:
  460. timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds')
  461. new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
  462. add(new_url, tag=snapshot.tags_str())
  463. @admin.action(
  464. description="Reset"
  465. )
  466. def overwrite_snapshots(self, request, queryset):
  467. archive_links([
  468. snapshot.as_link()
  469. for snapshot in queryset
  470. ], overwrite=True, out_dir=CONFIG.OUTPUT_DIR)
  471. @admin.action(
  472. description="Delete"
  473. )
  474. def delete_snapshots(self, request, queryset):
  475. remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
  476. @admin.action(
  477. description="+"
  478. )
  479. def add_tags(self, request, queryset):
  480. tags = request.POST.getlist('tags')
  481. print('[+] Adding tags', tags, 'to Snapshots', queryset)
  482. for obj in queryset:
  483. obj.tags.add(*tags)
  484. @admin.action(
  485. description="–"
  486. )
  487. def remove_tags(self, request, queryset):
  488. tags = request.POST.getlist('tags')
  489. print('[-] Removing tags', tags, 'to Snapshots', queryset)
  490. for obj in queryset:
  491. obj.tags.remove(*tags)
  492. # @admin.register(SnapshotTag, site=archivebox_admin)
  493. # class SnapshotTagAdmin(ABIDModelAdmin):
  494. # list_display = ('id', 'snapshot', 'tag')
  495. # sort_fields = ('id', 'snapshot', 'tag')
  496. # search_fields = ('id', 'snapshot_id', 'tag_id')
  497. # fields = ('snapshot', 'id')
  498. # actions = ['delete_selected']
  499. # ordering = ['-id']
  500. # def API(self, obj):
  501. # return get_abid_info(self, obj)
  502. @admin.register(Tag, site=archivebox_admin)
  503. class TagAdmin(ABIDModelAdmin):
  504. list_display = ('created', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
  505. sort_fields = ('name', 'slug', 'abid', 'created_by', 'created')
  506. readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots')
  507. search_fields = ('abid', 'name', 'slug')
  508. fields = ('name', 'created_by', *readonly_fields)
  509. actions = ['delete_selected']
  510. ordering = ['-created']
  511. paginator = AccelleratedPaginator
  512. def API(self, obj):
  513. try:
  514. return get_abid_info(self, obj)
  515. except Exception as e:
  516. return str(e)
  517. def num_snapshots(self, tag):
  518. return format_html(
  519. '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
  520. tag.id,
  521. tag.snapshot_set.count(),
  522. )
  523. def snapshots(self, tag):
  524. total_count = tag.snapshot_set.count()
  525. return mark_safe('<br/>'.join(
  526. format_html(
  527. '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
  528. snap.pk,
  529. snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
  530. snap.url[:64],
  531. )
  532. for snap in tag.snapshot_set.order_by('-updated')[:10]
  533. ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">and {total_count-10} more...<a>' if tag.snapshot_set.count() > 10 else ''))
  534. @admin.register(ArchiveResult, site=archivebox_admin)
  535. class ArchiveResultAdmin(ABIDModelAdmin):
  536. list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
  537. sort_fields = ('start_ts', 'extractor', 'status')
  538. readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary')
  539. search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
  540. fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
  541. autocomplete_fields = ['snapshot']
  542. list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
  543. ordering = ['-start_ts']
  544. list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
  545. paginator = AccelleratedPaginator
  546. @admin.display(
  547. description='Snapshot Info'
  548. )
  549. def snapshot_info(self, result):
  550. return format_html(
  551. '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
  552. result.snapshot.timestamp,
  553. result.snapshot.abid,
  554. result.snapshot.added.strftime('%Y-%m-%d %H:%M'),
  555. result.snapshot.url[:128],
  556. )
  557. def API(self, obj):
  558. try:
  559. return get_abid_info(self, obj)
  560. except Exception as e:
  561. raise e
  562. return str(e)
  563. @admin.display(
  564. description='Snapshot Tags'
  565. )
  566. def tags_str(self, result):
  567. return result.snapshot.tags_str()
  568. def cmd_str(self, result):
  569. return format_html(
  570. '<pre>{}</pre>',
  571. ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
  572. )
  573. def output_str(self, result):
  574. return format_html(
  575. '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
  576. result.snapshot.timestamp,
  577. result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
  578. result.output,
  579. )
  580. def output_summary(self, result):
  581. snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
  582. output_str = format_html(
  583. '<pre style="display: inline-block">{}</pre><br/>',
  584. result.output,
  585. )
  586. output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
  587. path_from_output_str = (snapshot_dir / result.output)
  588. output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
  589. if path_from_output_str.exists():
  590. root_dir = str(path_from_output_str)
  591. else:
  592. root_dir = str(snapshot_dir)
  593. # print(root_dir, str(list(os.walk(root_dir))))
  594. for root, dirs, files in os.walk(root_dir):
  595. depth = root.replace(root_dir, '').count(os.sep) + 1
  596. if depth > 2:
  597. continue
  598. indent = ' ' * 4 * (depth)
  599. output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
  600. indentation_str = ' ' * 4 * (depth + 1)
  601. for filename in sorted(files):
  602. is_hidden = filename.startswith('.')
  603. output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
  604. return output_str + format_html('</code></pre>')
  605. @admin.register(APIToken, site=archivebox_admin)
  606. class APITokenAdmin(ABIDModelAdmin):
  607. list_display = ('created', 'abid', 'created_by', 'token_redacted', 'expires')
  608. sort_fields = ('abid', 'created', 'created_by', 'expires')
  609. readonly_fields = ('abid', 'created')
  610. search_fields = ('id', 'abid', 'created_by__username', 'token')
  611. fields = ('created_by', 'token', 'expires', *readonly_fields)
  612. list_filter = ('created_by',)
  613. ordering = ['-created']
  614. list_per_page = 100
  615. @admin.register(get_webhook_model(), site=archivebox_admin)
  616. class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
  617. list_display = ('created', 'created_by', 'abid', *WebhookAdmin.list_display)
  618. sort_fields = ('created', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error')
  619. readonly_fields = ('abid', 'created', *WebhookAdmin.readonly_fields)