admin.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
  1. __package__ = 'archivebox.core'
  2. import os
  3. import json
  4. from io import StringIO
  5. from pathlib import Path
  6. from contextlib import redirect_stdout
  7. from datetime import datetime, timezone
  8. from typing import Dict, Any
  9. from django.contrib import admin
  10. from django.db.models import Count, Q, Prefetch
  11. from django.urls import path, reverse, resolve
  12. from django.utils import timezone
  13. from django.utils.functional import cached_property
  14. from django.utils.html import format_html
  15. from django.utils.safestring import mark_safe
  16. from django.shortcuts import render, redirect
  17. from django.contrib.auth import get_user_model
  18. from django.contrib.auth.admin import UserAdmin
  19. from django.core.paginator import Paginator
  20. from django.core.exceptions import ValidationError
  21. from django.conf import settings
  22. from django import forms
  23. from signal_webhooks.admin import WebhookAdmin
  24. from signal_webhooks.utils import get_webhook_model
  25. # from plugantic.admin import CustomPlugin
  26. from ..util import htmldecode, urldecode, ansi_to_html
  27. from core.models import Snapshot, ArchiveResult, Tag, SnapshotTag
  28. from core.forms import AddLinkForm
  29. from core.mixins import SearchResultsAdminMixin
  30. from api.models import APIToken
  31. from api.auth import get_or_create_api_token
  32. from abid_utils.models import get_or_create_system_user_pk
  33. from abid_utils.admin import ABIDModelAdmin
  34. from index.html import snapshot_icons
  35. from logging_util import printable_filesize
  36. from main import add, remove
  37. from extractors import archive_links
  38. CONFIG = settings.CONFIG
  39. GLOBAL_CONTEXT = {'VERSION': CONFIG.VERSION, 'VERSIONS_AVAILABLE': CONFIG.VERSIONS_AVAILABLE, 'CAN_UPGRADE': CONFIG.CAN_UPGRADE}
  40. # Admin URLs
  41. # /admin/
  42. # /admin/login/
  43. # /admin/core/
  44. # /admin/core/snapshot/
  45. # /admin/core/snapshot/:uuid/
  46. # /admin/core/tag/
  47. # /admin/core/tag/:uuid/
  48. # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
  49. class ArchiveBoxAdmin(admin.AdminSite):
  50. site_header = 'ArchiveBox'
  51. index_title = 'Links'
  52. site_title = 'Index'
  53. namespace = 'admin'
  54. def get_urls(self):
  55. return [
  56. path('core/snapshot/add/', self.add_view, name='Add'),
  57. ] + super().get_urls()
  58. def add_view(self, request):
  59. if not request.user.is_authenticated:
  60. return redirect(f'/admin/login/?next={request.path}')
  61. request.current_app = self.name
  62. context: Dict[str, Any] = {
  63. **self.each_context(request),
  64. 'title': 'Add URLs',
  65. }
  66. if request.method == 'GET':
  67. context['form'] = AddLinkForm()
  68. elif request.method == 'POST':
  69. form = AddLinkForm(request.POST)
  70. if form.is_valid():
  71. url = form.cleaned_data["url"]
  72. print(f'[+] Adding URL: {url}')
  73. depth = 0 if form.cleaned_data["depth"] == "0" else 1
  74. input_kwargs = {
  75. "urls": url,
  76. "depth": depth,
  77. "update_all": False,
  78. "out_dir": CONFIG.OUTPUT_DIR,
  79. }
  80. add_stdout = StringIO()
  81. with redirect_stdout(add_stdout):
  82. add(**input_kwargs)
  83. print(add_stdout.getvalue())
  84. context.update({
  85. "stdout": ansi_to_html(add_stdout.getvalue().strip()),
  86. "form": AddLinkForm(),
  87. })
  88. else:
  89. context["form"] = form
  90. return render(template_name='add.html', request=request, context=context)
  91. class CustomUserAdmin(UserAdmin):
  92. sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
  93. list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined']
  94. readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set')
  95. fieldsets = [*UserAdmin.fieldsets, ('Data', {'fields': readonly_fields})]
  96. @admin.display(description='Snapshots')
  97. def snapshot_set(self, obj):
  98. total_count = obj.snapshot_set.count()
  99. return mark_safe('<br/>'.join(
  100. format_html(
  101. '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
  102. snap.pk,
  103. snap.abid,
  104. snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
  105. snap.url[:64],
  106. )
  107. for snap in obj.snapshot_set.order_by('-modified')[:10]
  108. ) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
  109. @admin.display(description='Archive Result Logs')
  110. def archiveresult_set(self, obj):
  111. total_count = obj.archiveresult_set.count()
  112. return mark_safe('<br/>'.join(
  113. format_html(
  114. '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
  115. result.pk,
  116. result.abid,
  117. result.snapshot.updated.strftime('%Y-%m-%d %H:%M') if result.snapshot.updated else 'pending...',
  118. result.extractor,
  119. result.snapshot.url[:64],
  120. )
  121. for result in obj.archiveresult_set.order_by('-modified')[:10]
  122. ) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
  123. @admin.display(description='Tags')
  124. def tag_set(self, obj):
  125. total_count = obj.tag_set.count()
  126. return mark_safe(', '.join(
  127. format_html(
  128. '<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
  129. tag.pk,
  130. tag.name,
  131. )
  132. for tag in obj.tag_set.order_by('-modified')[:10]
  133. ) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
  134. @admin.display(description='API Tokens')
  135. def apitoken_set(self, obj):
  136. total_count = obj.apitoken_set.count()
  137. return mark_safe('<br/>'.join(
  138. format_html(
  139. '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
  140. apitoken.pk,
  141. apitoken.abid,
  142. apitoken.token_redacted[:64],
  143. apitoken.expires,
  144. )
  145. for apitoken in obj.apitoken_set.order_by('-modified')[:10]
  146. ) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
  147. @admin.display(description='API Outbound Webhooks')
  148. def outboundwebhook_set(self, obj):
  149. total_count = obj.outboundwebhook_set.count()
  150. return mark_safe('<br/>'.join(
  151. format_html(
  152. '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
  153. outboundwebhook.pk,
  154. outboundwebhook.abid,
  155. outboundwebhook.referenced_model,
  156. outboundwebhook.endpoint,
  157. )
  158. for outboundwebhook in obj.outboundwebhook_set.order_by('-modified')[:10]
  159. ) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
  160. archivebox_admin = ArchiveBoxAdmin()
  161. archivebox_admin.register(get_user_model(), CustomUserAdmin)
  162. archivebox_admin.disable_action('delete_selected')
  163. # archivebox_admin.register(CustomPlugin)
  164. # patch admin with methods to add data views (implemented by admin_data_views package)
  165. # https://github.com/MrThearMan/django-admin-data-views
  166. # https://mrthearman.github.io/django-admin-data-views/setup/
  167. ############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
  168. from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
  169. archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
  170. archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
  171. archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
  172. archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
  173. class AccelleratedPaginator(Paginator):
  174. """
  175. Accellerated Pagniator ignores DISTINCT when counting total number of rows.
  176. Speeds up SELECT Count(*) on Admin views by >20x.
  177. https://hakibenita.com/optimizing-the-django-admin-paginator
  178. """
  179. @cached_property
  180. def count(self):
  181. if self.object_list._has_filters(): # type: ignore
  182. # fallback to normal count method on filtered queryset
  183. return super().count
  184. else:
  185. # otherwise count total rows in a separate fast query
  186. return self.object_list.model.objects.count()
  187. # Alternative approach for PostgreSQL: fallback count takes > 200ms
  188. # from django.db import connection, transaction, OperationalError
  189. # with transaction.atomic(), connection.cursor() as cursor:
  190. # cursor.execute('SET LOCAL statement_timeout TO 200;')
  191. # try:
  192. # return super().count
  193. # except OperationalError:
  194. # return 9999999999999
  195. class ArchiveResultInline(admin.TabularInline):
  196. name = 'Archive Results Log'
  197. model = ArchiveResult
  198. parent_model = Snapshot
  199. # fk_name = 'snapshot'
  200. extra = 0
  201. sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
  202. readonly_fields = ('result_id', 'completed', 'extractor', 'command', 'version')
  203. fields = ('id', 'start_ts', 'end_ts', *readonly_fields, 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
  204. # exclude = ('id',)
  205. ordering = ('end_ts',)
  206. show_change_link = True
  207. # # classes = ['collapse']
  208. # # list_display_links = ['abid']
  209. def get_parent_object_from_request(self, request):
  210. resolved = resolve(request.path_info)
  211. return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
  212. @admin.display(
  213. description='Completed',
  214. ordering='end_ts',
  215. )
  216. def completed(self, obj):
  217. return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
  218. def result_id(self, obj):
  219. return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
  220. def command(self, obj):
  221. return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
  222. def version(self, obj):
  223. return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
  224. def get_formset(self, request, obj=None, **kwargs):
  225. formset = super().get_formset(request, obj, **kwargs)
  226. snapshot = self.get_parent_object_from_request(request)
  227. # import ipdb; ipdb.set_trace()
  228. formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
  229. # default values for new entries
  230. formset.form.base_fields['status'].initial = 'succeeded'
  231. formset.form.base_fields['start_ts'].initial = timezone.now()
  232. formset.form.base_fields['end_ts'].initial = timezone.now()
  233. formset.form.base_fields['cmd_version'].initial = '-'
  234. formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
  235. formset.form.base_fields['created_by'].initial = request.user
  236. formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
  237. formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
  238. if obj is not None:
  239. # hidden values for existing entries and new entries
  240. formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
  241. formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
  242. formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
  243. formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
  244. formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
  245. formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
  246. return formset
  247. def get_readonly_fields(self, request, obj=None):
  248. if obj is not None:
  249. return self.readonly_fields
  250. else:
  251. return []
  252. class TagInline(admin.TabularInline):
  253. model = Tag.snapshot_set.through # type: ignore
  254. # fk_name = 'snapshot'
  255. fields = ('id', 'tag')
  256. extra = 1
  257. # min_num = 1
  258. max_num = 1000
  259. autocomplete_fields = (
  260. 'tag',
  261. )
  262. from django.contrib.admin.helpers import ActionForm
  263. from django.contrib.admin.widgets import FilteredSelectMultiple
  264. # class AutocompleteTags:
  265. # model = Tag
  266. # search_fields = ['name']
  267. # name = 'name'
  268. # # source_field = 'name'
  269. # remote_field = Tag._meta.get_field('name')
  270. # class AutocompleteTagsAdminStub:
  271. # name = 'admin'
  272. class SnapshotActionForm(ActionForm):
  273. tags = forms.ModelMultipleChoiceField(
  274. label='Edit tags',
  275. queryset=Tag.objects.all(),
  276. required=False,
  277. widget=FilteredSelectMultiple(
  278. 'core_tag__name',
  279. False,
  280. ),
  281. )
  282. # TODO: allow selecting actions for specific extractors? is this useful?
  283. # extractor = forms.ChoiceField(
  284. # choices=ArchiveResult.EXTRACTOR_CHOICES,
  285. # required=False,
  286. # widget=forms.MultileChoiceField(attrs={'class': "form-control"})
  287. # )
  288. @admin.register(Snapshot, site=archivebox_admin)
  289. class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
  290. list_display = ('added', 'title_str', 'files', 'size', 'url_str')
  291. sort_fields = ('title_str', 'url_str', 'added')
  292. readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
  293. search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
  294. list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
  295. fields = ('url', 'created_by', 'title', *readonly_fields)
  296. ordering = ['-added']
  297. actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
  298. inlines = [TagInline, ArchiveResultInline]
  299. list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000)
  300. action_form = SnapshotActionForm
  301. paginator = AccelleratedPaginator
  302. save_on_top = True
  303. show_full_result_count = False
  304. def changelist_view(self, request, extra_context=None):
  305. self.request = request
  306. extra_context = extra_context or {}
  307. try:
  308. return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
  309. except Exception as e:
  310. self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
  311. return super().changelist_view(request, GLOBAL_CONTEXT)
  312. def change_view(self, request, object_id, form_url="", extra_context=None):
  313. self.request = request
  314. snapshot = None
  315. try:
  316. snapshot = snapshot or Snapshot.objects.get(id=object_id)
  317. except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
  318. pass
  319. try:
  320. snapshot = snapshot or Snapshot.objects.get(abid=Snapshot.abid_prefix + object_id.split('_', 1)[-1])
  321. except (Snapshot.DoesNotExist, ValidationError):
  322. pass
  323. try:
  324. snapshot = snapshot or Snapshot.objects.get(old_id=object_id)
  325. except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
  326. pass
  327. if snapshot:
  328. object_id = str(snapshot.id)
  329. return super().change_view(
  330. request,
  331. object_id,
  332. form_url,
  333. extra_context=extra_context,
  334. )
  335. def get_urls(self):
  336. urls = super().get_urls()
  337. custom_urls = [
  338. path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
  339. ]
  340. return custom_urls + urls
  341. # def get_queryset(self, request):
  342. # # tags_qs = SnapshotTag.objects.all().select_related('tag')
  343. # # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
  344. # self.request = request
  345. # return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
  346. def tag_list(self, obj):
  347. return ', '.join(tag.name for tag in obj.tags.all())
  348. # TODO: figure out a different way to do this, you cant nest forms so this doenst work
  349. # def action(self, obj):
  350. # # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
  351. # # action: update_snapshots
  352. # # select_across: 0
  353. # # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
  354. # return format_html(
  355. # '''
  356. # <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
  357. # <input type="hidden" name="csrfmiddlewaretoken" value="{}">
  358. # <input type="hidden" name="_selected_action" value="{}">
  359. # <button name="update_snapshots">Check</button>
  360. # <button name="update_titles">Pull title + favicon</button>
  361. # <button name="update_snapshots">Update</button>
  362. # <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
  363. # <button name="delete_snapshots">Permanently delete</button>
  364. # </form>
  365. # ''',
  366. # csrf.get_token(self.request),
  367. # obj.pk,
  368. # )
  369. def admin_actions(self, obj):
  370. return format_html(
  371. # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
  372. '''
  373. <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a> &nbsp; &nbsp;
  374. <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a> &nbsp; &nbsp;
  375. <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
  376. ''',
  377. obj.timestamp,
  378. obj.timestamp,
  379. obj.pk,
  380. )
  381. def status_info(self, obj):
  382. return format_html(
  383. # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
  384. '''
  385. Archived: {} ({} files {}) &nbsp; &nbsp;
  386. Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
  387. Status code: {} &nbsp; &nbsp;<br/>
  388. Server: {} &nbsp; &nbsp;
  389. Content type: {} &nbsp; &nbsp;
  390. Extension: {} &nbsp; &nbsp;
  391. ''',
  392. '✅' if obj.is_archived else '❌',
  393. obj.num_outputs,
  394. self.size(obj) or '0kb',
  395. f'/archive/{obj.timestamp}/favicon.ico',
  396. obj.status_code or '-',
  397. obj.headers and obj.headers.get('Server') or '-',
  398. obj.headers and obj.headers.get('Content-Type') or '-',
  399. obj.extension or '-',
  400. )
  401. @admin.display(
  402. description='Title',
  403. ordering='title',
  404. )
  405. def title_str(self, obj):
  406. tags = ''.join(
  407. format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.pk, tag.name)
  408. for tag in obj.tags.all()
  409. if str(tag.name).strip()
  410. )
  411. return format_html(
  412. '<a href="/{}">'
  413. '<img src="/{}/favicon.ico" class="favicon" onerror="this.remove()">'
  414. '</a>'
  415. '<a href="/{}/index.html">'
  416. '<b class="status-{}">{}</b>'
  417. '</a>',
  418. obj.archive_path,
  419. obj.archive_path,
  420. obj.archive_path,
  421. 'fetched' if obj.latest_title or obj.title else 'pending',
  422. urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
  423. ) + mark_safe(f' <span class="tags">{tags}</span>')
  424. @admin.display(
  425. description='Files Saved',
  426. # ordering='archiveresult_count',
  427. )
  428. def files(self, obj):
  429. # return '-'
  430. return snapshot_icons(obj)
  431. @admin.display(
  432. # ordering='archiveresult_count'
  433. )
  434. def size(self, obj):
  435. archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
  436. if archive_size:
  437. size_txt = printable_filesize(archive_size)
  438. if archive_size > 52428800:
  439. size_txt = mark_safe(f'<b>{size_txt}</b>')
  440. else:
  441. size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
  442. return format_html(
  443. '<a href="/{}" title="View all files">{}</a>',
  444. obj.archive_path,
  445. size_txt,
  446. )
  447. @admin.display(
  448. description='Original URL',
  449. ordering='url',
  450. )
  451. def url_str(self, obj):
  452. return format_html(
  453. '<a href="{}"><code style="user-select: all;">{}</code></a>',
  454. obj.url,
  455. obj.url[:128],
  456. )
  457. def grid_view(self, request, extra_context=None):
  458. # cl = self.get_changelist_instance(request)
  459. # Save before monkey patching to restore for changelist list view
  460. saved_change_list_template = self.change_list_template
  461. saved_list_per_page = self.list_per_page
  462. saved_list_max_show_all = self.list_max_show_all
  463. # Monkey patch here plus core_tags.py
  464. self.change_list_template = 'private_index_grid.html'
  465. self.list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
  466. self.list_max_show_all = self.list_per_page
  467. # Call monkey patched view
  468. rendered_response = self.changelist_view(request, extra_context=extra_context)
  469. # Restore values
  470. self.change_list_template = saved_change_list_template
  471. self.list_per_page = saved_list_per_page
  472. self.list_max_show_all = saved_list_max_show_all
  473. return rendered_response
  474. # for debugging, uncomment this to print all requests:
  475. # def changelist_view(self, request, extra_context=None):
  476. # print('[*] Got request', request.method, request.POST)
  477. # return super().changelist_view(request, extra_context=None)
  478. @admin.action(
  479. description="ℹ️ Get Title"
  480. )
  481. def update_titles(self, request, queryset):
  482. archive_links([
  483. snapshot.as_link()
  484. for snapshot in queryset
  485. ], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
  486. @admin.action(
  487. description="⬇️ Get Missing"
  488. )
  489. def update_snapshots(self, request, queryset):
  490. archive_links([
  491. snapshot.as_link()
  492. for snapshot in queryset
  493. ], out_dir=CONFIG.OUTPUT_DIR)
  494. @admin.action(
  495. description="📑 Archive again"
  496. )
  497. def resnapshot_snapshot(self, request, queryset):
  498. for snapshot in queryset:
  499. timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds')
  500. new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
  501. add(new_url, tag=snapshot.tags_str())
  502. @admin.action(
  503. description="♲ Redo"
  504. )
  505. def overwrite_snapshots(self, request, queryset):
  506. archive_links([
  507. snapshot.as_link()
  508. for snapshot in queryset
  509. ], overwrite=True, out_dir=CONFIG.OUTPUT_DIR)
  510. @admin.action(
  511. description="☠️ Delete"
  512. )
  513. def delete_snapshots(self, request, queryset):
  514. remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
  515. @admin.action(
  516. description="+"
  517. )
  518. def add_tags(self, request, queryset):
  519. tags = request.POST.getlist('tags')
  520. print('[+] Adding tags', tags, 'to Snapshots', queryset)
  521. for obj in queryset:
  522. obj.tags.add(*tags)
  523. @admin.action(
  524. description="–"
  525. )
  526. def remove_tags(self, request, queryset):
  527. tags = request.POST.getlist('tags')
  528. print('[-] Removing tags', tags, 'to Snapshots', queryset)
  529. for obj in queryset:
  530. obj.tags.remove(*tags)
  531. # @admin.register(SnapshotTag, site=archivebox_admin)
  532. # class SnapshotTagAdmin(ABIDModelAdmin):
  533. # list_display = ('id', 'snapshot', 'tag')
  534. # sort_fields = ('id', 'snapshot', 'tag')
  535. # search_fields = ('id', 'snapshot_id', 'tag_id')
  536. # fields = ('snapshot', 'id')
  537. # actions = ['delete_selected']
  538. # ordering = ['-id']
  539. @admin.register(Tag, site=archivebox_admin)
  540. class TagAdmin(ABIDModelAdmin):
  541. list_display = ('created', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
  542. list_filter = ('created', 'created_by')
  543. sort_fields = ('name', 'slug', 'abid', 'created_by', 'created')
  544. readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'snapshots')
  545. search_fields = ('abid', 'name', 'slug')
  546. fields = ('name', 'created_by', *readonly_fields)
  547. actions = ['delete_selected']
  548. ordering = ['-created']
  549. paginator = AccelleratedPaginator
  550. def num_snapshots(self, tag):
  551. return format_html(
  552. '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
  553. tag.id,
  554. tag.snapshot_set.count(),
  555. )
  556. def snapshots(self, tag):
  557. total_count = tag.snapshot_set.count()
  558. return mark_safe('<br/>'.join(
  559. format_html(
  560. '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
  561. snap.pk,
  562. snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
  563. snap.url[:64],
  564. )
  565. for snap in tag.snapshot_set.order_by('-updated')[:10]
  566. ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>'))
  567. @admin.register(ArchiveResult, site=archivebox_admin)
  568. class ArchiveResultAdmin(ABIDModelAdmin):
  569. list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
  570. sort_fields = ('start_ts', 'extractor', 'status')
  571. readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary')
  572. search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
  573. fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
  574. autocomplete_fields = ['snapshot']
  575. list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
  576. ordering = ['-start_ts']
  577. list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
  578. paginator = AccelleratedPaginator
  579. def change_view(self, request, object_id, form_url="", extra_context=None):
  580. self.request = request
  581. return super().change_view(request, object_id, form_url, extra_context)
  582. @admin.display(
  583. description='Snapshot Info'
  584. )
  585. def snapshot_info(self, result):
  586. return format_html(
  587. '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
  588. result.snapshot.timestamp,
  589. result.snapshot.abid,
  590. result.snapshot.added.strftime('%Y-%m-%d %H:%M'),
  591. result.snapshot.url[:128],
  592. )
  593. @admin.display(
  594. description='Snapshot Tags'
  595. )
  596. def tags_str(self, result):
  597. return result.snapshot.tags_str()
  598. def cmd_str(self, result):
  599. return format_html(
  600. '<pre>{}</pre>',
  601. ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
  602. )
  603. def output_str(self, result):
  604. return format_html(
  605. '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
  606. result.snapshot.timestamp,
  607. result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
  608. result.output,
  609. )
  610. def output_summary(self, result):
  611. snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
  612. output_str = format_html(
  613. '<pre style="display: inline-block">{}</pre><br/>',
  614. result.output,
  615. )
  616. output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
  617. path_from_output_str = (snapshot_dir / result.output)
  618. output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
  619. if path_from_output_str.exists():
  620. root_dir = str(path_from_output_str)
  621. else:
  622. root_dir = str(snapshot_dir)
  623. # print(root_dir, str(list(os.walk(root_dir))))
  624. for root, dirs, files in os.walk(root_dir):
  625. depth = root.replace(root_dir, '').count(os.sep) + 1
  626. if depth > 2:
  627. continue
  628. indent = ' ' * 4 * (depth)
  629. output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
  630. indentation_str = ' ' * 4 * (depth + 1)
  631. for filename in sorted(files):
  632. is_hidden = filename.startswith('.')
  633. output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
  634. return output_str + format_html('</code></pre>')
  635. @admin.register(APIToken, site=archivebox_admin)
  636. class APITokenAdmin(ABIDModelAdmin):
  637. list_display = ('created', 'abid', 'created_by', 'token_redacted', 'expires')
  638. sort_fields = ('abid', 'created', 'created_by', 'expires')
  639. readonly_fields = ('created', 'modified', 'API')
  640. search_fields = ('id', 'abid', 'created_by__username', 'token')
  641. fields = ('created_by', 'token', 'expires', *readonly_fields)
  642. list_filter = ('created_by',)
  643. ordering = ['-created']
  644. list_per_page = 100
  645. @admin.register(get_webhook_model(), site=archivebox_admin)
  646. class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
  647. list_display = ('created', 'created_by', 'abid', *WebhookAdmin.list_display)
  648. sort_fields = ('created', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error')
  649. readonly_fields = ('created', 'modified', 'API', *WebhookAdmin.readonly_fields)