views.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. __package__ = 'archivebox.core'
  2. from io import StringIO
  3. from contextlib import redirect_stdout
  4. from django.shortcuts import render, redirect
  5. from django.http import HttpResponse
  6. from django.views import View, static
  7. from django.views.generic.list import ListView
  8. from django.views.generic import FormView
  9. from django.db.models import Q
  10. from django.contrib.auth.mixins import UserPassesTestMixin
  11. from core.models import Snapshot
  12. from core.forms import AddLinkForm
  13. from ..config import (
  14. OUTPUT_DIR,
  15. PUBLIC_INDEX,
  16. PUBLIC_SNAPSHOTS,
  17. PUBLIC_ADD_VIEW,
  18. VERSION,
  19. FOOTER_INFO,
  20. )
  21. from main import add
  22. from ..util import base_url, ansi_to_html
  23. from ..index.html import snapshot_icons
  24. class HomepageView(View):
  25. def get(self, request):
  26. if request.user.is_authenticated:
  27. return redirect('/admin/core/snapshot/')
  28. if PUBLIC_INDEX:
  29. return redirect('/public')
  30. return redirect(f'/admin/login/?next={request.path}')
  31. class SnapshotView(View):
  32. # render static html index from filesystem archive/<timestamp>/index.html
  33. def get(self, request, path):
  34. # missing trailing slash -> redirect to index
  35. if '/' not in path:
  36. return redirect(f'{path}/index.html')
  37. if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
  38. return redirect(f'/admin/login/?next={request.path}')
  39. try:
  40. slug, archivefile = path.split('/', 1)
  41. except (IndexError, ValueError):
  42. slug, archivefile = path.split('/', 1)[0], 'index.html'
  43. all_pages = list(Snapshot.objects.all())
  44. # slug is a timestamp
  45. by_ts = {page.timestamp: page for page in all_pages}
  46. try:
  47. # print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
  48. response = static.serve(request, archivefile, document_root=by_ts[slug].link_dir, show_indexes=True)
  49. response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
  50. return response
  51. except KeyError:
  52. pass
  53. # slug is a hash
  54. by_hash = {page.url_hash: page for page in all_pages}
  55. try:
  56. timestamp = by_hash[slug].timestamp
  57. return redirect(f'/archive/{timestamp}/{archivefile}')
  58. except KeyError:
  59. pass
  60. # slug is a URL
  61. by_url = {page.base_url: page for page in all_pages}
  62. try:
  63. # TODO: add multiple snapshot support by showing index of all snapshots
  64. # for given url instead of redirecting to timestamp index
  65. timestamp = by_url[base_url(path)].timestamp
  66. return redirect(f'/archive/{timestamp}/index.html')
  67. except KeyError:
  68. pass
  69. return HttpResponse(
  70. 'No archived link matches the given timestamp or hash.',
  71. content_type="text/plain",
  72. status=404,
  73. )
  74. class PublicIndexView(ListView):
  75. template_name = 'public_index.html'
  76. model = Snapshot
  77. paginate_by = 100
  78. ordering = ['title']
  79. def get_context_data(self, **kwargs):
  80. return {
  81. **super().get_context_data(**kwargs),
  82. 'VERSION': VERSION,
  83. 'FOOTER_INFO': FOOTER_INFO,
  84. }
  85. def get_queryset(self, **kwargs):
  86. qs = super().get_queryset(**kwargs)
  87. query = self.request.GET.get('q')
  88. if query:
  89. qs = qs.filter(Q(title__icontains=query) | Q(url__icontains=query) | Q(timestamp__icontains=query) | Q(tags__name__icontains=query))
  90. for snapshot in qs:
  91. snapshot.icons = snapshot_icons(snapshot)
  92. return qs
  93. def get(self, *args, **kwargs):
  94. if PUBLIC_INDEX or self.request.user.is_authenticated:
  95. response = super().get(*args, **kwargs)
  96. return response
  97. else:
  98. return redirect(f'/admin/login/?next={self.request.path}')
  99. class AddView(UserPassesTestMixin, FormView):
  100. template_name = "add.html"
  101. form_class = AddLinkForm
  102. def get_initial(self):
  103. """Prefill the AddLinkForm with the 'url' GET parameter"""
  104. if self.request.method == 'GET':
  105. url = self.request.GET.get('url', None)
  106. if url:
  107. return {'url': url}
  108. else:
  109. return super().get_initial()
  110. def test_func(self):
  111. return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
  112. def get_context_data(self, **kwargs):
  113. return {
  114. **super().get_context_data(**kwargs),
  115. 'title': "Add URLs",
  116. # We can't just call request.build_absolute_uri in the template, because it would include query parameters
  117. 'absolute_add_path': self.request.build_absolute_uri(self.request.path),
  118. 'VERSION': VERSION,
  119. 'FOOTER_INFO': FOOTER_INFO,
  120. }
  121. def form_valid(self, form):
  122. url = form.cleaned_data["url"]
  123. print(f'[+] Adding URL: {url}')
  124. depth = 0 if form.cleaned_data["depth"] == "0" else 1
  125. extractors = ','.join(form.cleaned_data["archive_methods"])
  126. input_kwargs = {
  127. "urls": url,
  128. "depth": depth,
  129. "update_all": False,
  130. "out_dir": OUTPUT_DIR,
  131. }
  132. if extractors:
  133. input_kwargs.update({"extractors": extractors})
  134. add_stdout = StringIO()
  135. with redirect_stdout(add_stdout):
  136. add(**input_kwargs)
  137. print(add_stdout.getvalue())
  138. context = self.get_context_data()
  139. context.update({
  140. "stdout": ansi_to_html(add_stdout.getvalue().strip()),
  141. "form": AddLinkForm()
  142. })
  143. return render(template_name=self.template_name, request=self.request, context=context)