views.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. __package__ = 'archivebox.core'
  2. from io import StringIO
  3. from contextlib import redirect_stdout
  4. from django.shortcuts import render, redirect
  5. from django.http import HttpResponse, Http404
  6. from django.utils.html import format_html, mark_safe
  7. from django.views import View, static
  8. from django.views.generic.list import ListView
  9. from django.views.generic import FormView
  10. from django.db.models import Q
  11. from django.contrib.auth.mixins import UserPassesTestMixin
  12. from core.models import Snapshot
  13. from core.forms import AddLinkForm
  14. from ..config import (
  15. OUTPUT_DIR,
  16. PUBLIC_INDEX,
  17. PUBLIC_SNAPSHOTS,
  18. PUBLIC_ADD_VIEW,
  19. VERSION,
  20. FOOTER_INFO,
  21. SNAPSHOTS_PER_PAGE,
  22. )
  23. from main import add
  24. from ..util import base_url, ansi_to_html
  25. from ..index.html import snapshot_icons
  26. class HomepageView(View):
  27. def get(self, request):
  28. if request.user.is_authenticated:
  29. return redirect('/admin/core/snapshot/')
  30. if PUBLIC_INDEX:
  31. return redirect('/public')
  32. return redirect(f'/admin/login/?next={request.path}')
  33. class SnapshotView(View):
  34. # render static html index from filesystem archive/<timestamp>/index.html
  35. def get(self, request, path):
  36. if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
  37. return redirect(f'/admin/login/?next={request.path}')
  38. try:
  39. slug, archivefile = path.split('/', 1)
  40. except (IndexError, ValueError):
  41. slug, archivefile = path.split('/', 1)[0], 'index.html'
  42. # slug is a timestamp
  43. if slug.replace('.','').isdigit():
  44. # missing trailing slash -> redirect to index
  45. if '/' not in path:
  46. return redirect(f'{path}/index.html')
  47. try:
  48. try:
  49. snapshot = Snapshot.objects.get(timestamp=slug)
  50. except Snapshot.DoesNotExist:
  51. if Snapshot.objects.filter(timestamp__startswith=slug).exists():
  52. raise Snapshot.MultipleObjectsReturned
  53. response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
  54. response["Link"] = f'<{snapshot.url}>; rel="canonical"'
  55. return response
  56. except Snapshot.DoesNotExist:
  57. # Snapshot does not exist
  58. return HttpResponse(
  59. format_html(
  60. (
  61. '<center><br/><br/><br/>'
  62. 'No Snapshots match the given timestamp: <code>{}</code><br/><br/>'
  63. 'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
  64. '</center>'
  65. ),
  66. slug,
  67. path,
  68. ),
  69. content_type="text/html",
  70. status=404,
  71. )
  72. except Snapshot.MultipleObjectsReturned:
  73. snapshot_hrefs = mark_safe('<br/>').join(
  74. format_html(
  75. '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
  76. snap.added.strftime('%Y-%m-%d %H:%M:%S'),
  77. snap.timestamp,
  78. snap.timestamp,
  79. snap.url,
  80. snap.title or '',
  81. )
  82. for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added')
  83. )
  84. return HttpResponse(
  85. format_html(
  86. (
  87. 'Multiple Snapshots match the given timestamp <code>{}</code><br/><pre>'
  88. ),
  89. slug,
  90. ) + snapshot_hrefs + format_html(
  91. (
  92. '</pre><br/>'
  93. 'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'
  94. )
  95. ),
  96. content_type="text/html",
  97. status=404,
  98. )
  99. except Http404:
  100. # Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png
  101. return HttpResponse(
  102. format_html(
  103. (
  104. '<center><br/><br/><br/>'
  105. '<a href="/archive/{}/index.html" target="_top">Snapshot <b><code>{}</code></b></a> exists but no file or folder <b><code>/{}</code></b> exists within.<br/><br/>'
  106. '<small>Maybe this output type is not availabe for this URL,<br/>or the archiving process has not completed for this Snapshot yet?<br/>'
  107. '<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {}</code></pre></small><br/><br/>'
  108. 'You can go back to the <a href="/archive/{}/index.html" target="_top">Snapshot <b><code>{}</code></b></a> detail page, or return to the <a href="/" target="_top">Main Index</a>'
  109. '</center>'
  110. ),
  111. snapshot.timestamp,
  112. snapshot.timestamp,
  113. archivefile,
  114. snapshot.timestamp,
  115. snapshot.timestamp,
  116. snapshot.timestamp,
  117. ),
  118. content_type="text/html",
  119. status=404,
  120. )
  121. # slug is a URL
  122. else:
  123. try:
  124. try:
  125. # try exact match on full url first
  126. snapshot = Snapshot.objects.get(
  127. Q(url='http://' + path) | Q(url='https://' + path)
  128. )
  129. except Snapshot.DoesNotExist:
  130. # fall back to match on exact base_url
  131. try:
  132. snapshot = Snapshot.objects.get(
  133. Q(url='http://' + base_url(path)) | Q(url='https://' + base_url(path))
  134. )
  135. except Snapshot.DoesNotExist:
  136. # fall back to matching base_url as prefix
  137. snapshot = Snapshot.objects.get(
  138. Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
  139. )
  140. return redirect(f'/archive/{snapshot.timestamp}/index.html')
  141. except Snapshot.DoesNotExist:
  142. return HttpResponse(
  143. format_html(
  144. (
  145. '<center><br/><br/><br/>'
  146. 'No Snapshots match the given url: <code>{}</code><br/><br/>'
  147. 'You can <a href="/add/?url=https://{}" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
  148. '</center>'
  149. ),
  150. base_url(path),
  151. path,
  152. ),
  153. content_type="text/html",
  154. status=404,
  155. )
  156. except Snapshot.MultipleObjectsReturned:
  157. snapshot_hrefs = mark_safe('<br/>').join(
  158. format_html(
  159. '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
  160. snap.added.strftime('%Y-%m-%d %H:%M:%S'),
  161. snap.timestamp,
  162. snap.timestamp,
  163. snap.url,
  164. snap.title or '',
  165. )
  166. for snap in Snapshot.objects.filter(
  167. Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
  168. ).only('url', 'timestamp', 'title', 'added').order_by('-added')
  169. )
  170. return HttpResponse(
  171. format_html(
  172. (
  173. 'Multiple Snapshots match the given URL <code>{}</code><br/><pre>'
  174. ),
  175. base_url(path),
  176. ) + snapshot_hrefs + format_html(
  177. (
  178. '</pre><br/>'
  179. 'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'
  180. )
  181. ),
  182. content_type="text/html",
  183. status=404,
  184. )
  185. class PublicIndexView(ListView):
  186. template_name = 'public_index.html'
  187. model = Snapshot
  188. paginate_by = SNAPSHOTS_PER_PAGE
  189. ordering = ['title']
  190. def get_context_data(self, **kwargs):
  191. return {
  192. **super().get_context_data(**kwargs),
  193. 'VERSION': VERSION,
  194. 'FOOTER_INFO': FOOTER_INFO,
  195. }
  196. def get_queryset(self, **kwargs):
  197. qs = super().get_queryset(**kwargs)
  198. query = self.request.GET.get('q')
  199. if query:
  200. qs = qs.filter(Q(title__icontains=query) | Q(url__icontains=query) | Q(timestamp__icontains=query) | Q(tags__name__icontains=query))
  201. for snapshot in qs:
  202. # lazy load snapshot icons, otherwise it will load icons for entire index at once
  203. snapshot.icons = lambda: snapshot_icons(snapshot)
  204. return qs
  205. def get(self, *args, **kwargs):
  206. if PUBLIC_INDEX or self.request.user.is_authenticated:
  207. response = super().get(*args, **kwargs)
  208. return response
  209. else:
  210. return redirect(f'/admin/login/?next={self.request.path}')
  211. class AddView(UserPassesTestMixin, FormView):
  212. template_name = "add.html"
  213. form_class = AddLinkForm
  214. def get_initial(self):
  215. """Prefill the AddLinkForm with the 'url' GET parameter"""
  216. if self.request.method == 'GET':
  217. url = self.request.GET.get('url', None)
  218. if url:
  219. return {'url': url}
  220. else:
  221. return super().get_initial()
  222. def test_func(self):
  223. return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
  224. def get_context_data(self, **kwargs):
  225. return {
  226. **super().get_context_data(**kwargs),
  227. 'title': "Add URLs",
  228. # We can't just call request.build_absolute_uri in the template, because it would include query parameters
  229. 'absolute_add_path': self.request.build_absolute_uri(self.request.path),
  230. 'VERSION': VERSION,
  231. 'FOOTER_INFO': FOOTER_INFO,
  232. 'stdout': '',
  233. }
  234. def form_valid(self, form):
  235. url = form.cleaned_data["url"]
  236. print(f'[+] Adding URL: {url}')
  237. depth = 0 if form.cleaned_data["depth"] == "0" else 1
  238. extractors = ','.join(form.cleaned_data["archive_methods"])
  239. input_kwargs = {
  240. "urls": url,
  241. "depth": depth,
  242. "update_all": False,
  243. "out_dir": OUTPUT_DIR,
  244. }
  245. if extractors:
  246. input_kwargs.update({"extractors": extractors})
  247. add_stdout = StringIO()
  248. with redirect_stdout(add_stdout):
  249. add(**input_kwargs)
  250. print(add_stdout.getvalue())
  251. context = self.get_context_data()
  252. context.update({
  253. "stdout": ansi_to_html(add_stdout.getvalue().strip()),
  254. "form": AddLinkForm()
  255. })
  256. return render(template_name=self.template_name, request=self.request, context=context)