Browse Source

Add selector for archive modes

BlipRanger 5 years ago
parent
commit
d9fd1e3811
2 changed files with 24 additions and 1 deletions
  1. 19 1
      archivebox/core/forms.py
  2. 5 0
      archivebox/core/views.py

+ 19 - 1
archivebox/core/forms.py

@@ -10,10 +10,28 @@ CHOICES = (
     ('1', 'depth = 1 (archive these URLs and all URLs one hop away)'),
 )
 
+ARCHIVE_METHODS = [
+    ('title', 'title'),
+    ('favicon', 'favicon'),
+    ('wget', 'wget'),
+    ('warc', 'warc'),
+    ('pdf', 'pdf'),
+    ('screenshot', 'screenshot'),
+    ('dom', 'dom'),
+    ('singlefile', 'singlefile'),
+    ('git', 'git'),
+    ('media', 'media'),
+    ('archive_org', 'archive_org'),
+]
+
+
 class AddLinkForm(forms.Form):
     url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
     depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
-
+    archiveMethods = forms.MultipleChoiceField(
+    required=False,
+    widget=forms.SelectMultiple,
+    choices=ARCHIVE_METHODS,)
 
 class TagWidgetMixin:
     def format_value(self, value):

+ 5 - 0
archivebox/core/views.py

@@ -138,12 +138,17 @@ class AddView(UserPassesTestMixin, FormView):
         url = form.cleaned_data["url"]
         print(f'[+] Adding URL: {url}')
         depth = 0 if form.cleaned_data["depth"] == "0" else 1
+        extractors = ""
+        for extractor in form.cleaned_data["archiveMethods"]:
+            extractors = extractors + extractor + ','
         input_kwargs = {
             "urls": url,
             "depth": depth,
             "update_all": False,
             "out_dir": OUTPUT_DIR,
         }
+        if extractors:
+            input_kwargs.append("extractors": extractors)
         add_stdout = StringIO()
         with redirect_stdout(add_stdout):
             add(**input_kwargs)