1 年之前 · 09553d8340
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -180,12 +180,8 @@ class SnapshotActionForm(ActionForm):
 
				     )
			
 
				 
			
 
				     # TODO: allow selecting actions for specific extractors? is this useful?
			
 
				-    # EXTRACTOR_CHOICES = [
			
 
				-    #     (name, name.title())
			
 
				-    #     for name, _, _ in get_default_archive_methods()
			
 
				-    # ]
			
 
				     # extractor = forms.ChoiceField(
			
 
				-    #     choices=EXTRACTOR_CHOICES,
			
 
				+    #     choices=ArchiveResult.EXTRACTOR_CHOICES,
			
 
				     #     required=False,
			
 
				     #     widget=forms.MultileChoiceField(attrs={'class': "form-control"})
			
 
				     # )
			
--- a/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py
+++ b/archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py
@@ -38,6 +38,21 @@ class Migration(migrations.Migration):
 
				         migrations.AlterField(
			
 
				             model_name='archiveresult',
			
 
				             name='extractor',
			
 
				-            field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
			
 
				+            field=models.CharField(choices=(
			
 
				+                ('htmltotext', 'htmltotext'),
			
 
				+                ('git', 'git'),
			
 
				+                ('singlefile', 'singlefile'),
			
 
				+                ('media', 'media'),
			
 
				+                ('archive_org', 'archive_org'),
			
 
				+                ('readability', 'readability'),
			
 
				+                ('mercury', 'mercury'),
			
 
				+                ('favicon', 'favicon'),
			
 
				+                ('pdf', 'pdf'),
			
 
				+                ('headers', 'headers'),
			
 
				+                ('screenshot', 'screenshot'),
			
 
				+                ('dom', 'dom'),
			
 
				+                ('title', 'title'),
			
 
				+                ('wget', 'wget'),
			
 
				+            ), max_length=32),
			
 
				         ),
			
 
				     ]
			
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -28,13 +28,6 @@ from ..index.html import snapshot_icons
 
				 from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
			
 
				 
			
 
				 
			
 
				-EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
			
 
				-STATUS_CHOICES = [
			
 
				-    ("succeeded", "succeeded"),
			
 
				-    ("failed", "failed"),
			
 
				-    ("skipped", "skipped")
			
 
				-]
			
 
				-
			
 
				 def rand_int_id():
			
 
				     return random.getrandbits(32)
			
 
				 
			
@@ -376,7 +369,28 @@ class ArchiveResult(ABIDModel):
 
				     abid_uri_src = 'self.snapshot.url'
			
 
				     abid_subtype_src = 'self.extractor'
			
 
				     abid_rand_src = 'self.old_id'
			
 
				-    EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
			
 
				+
			
 
				+    EXTRACTOR_CHOICES = (
			
 
				+        ('htmltotext', 'htmltotext'),
			
 
				+        ('git', 'git'),
			
 
				+        ('singlefile', 'singlefile'),
			
 
				+        ('media', 'media'),
			
 
				+        ('archive_org', 'archive_org'),
			
 
				+        ('readability', 'readability'),
			
 
				+        ('mercury', 'mercury'),
			
 
				+        ('favicon', 'favicon'),
			
 
				+        ('pdf', 'pdf'),
			
 
				+        ('headers', 'headers'),
			
 
				+        ('screenshot', 'screenshot'),
			
 
				+        ('dom', 'dom'),
			
 
				+        ('title', 'title'),
			
 
				+        ('wget', 'wget'),
			
 
				+    )
			
 
				+    STATUS_CHOICES = [
			
 
				+        ("succeeded", "succeeded"),
			
 
				+        ("failed", "failed"),
			
 
				+        ("skipped", "skipped")
			
 
				+    ]
			
 
				 
			
 
				     old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
			
 
				 
			
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -121,7 +121,7 @@ def snapshot_icons(snapshot) -> str:
 
				     cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
			
 
				     
			
 
				     def calc_snapshot_icons():
			
 
				-        from core.models import EXTRACTOR_CHOICES
			
 
				+        from core.models import ArchiveResult
			
 
				         # start = datetime.now(timezone.utc)
			
 
				 
			
 
				         archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
			
@@ -147,12 +147,12 @@ def snapshot_icons(snapshot) -> str:
 
				         # Missing specific entry for WARC
			
 
				 
			
 
				         extractor_outputs = defaultdict(lambda: None)
			
 
				-        for extractor, _ in EXTRACTOR_CHOICES:
			
 
				+        for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
			
 
				             for result in archive_results:
			
 
				                 if result.extractor == extractor and result:
			
 
				                     extractor_outputs[extractor] = result
			
 
				 
			
 
				-        for extractor, _ in EXTRACTOR_CHOICES:
			
 
				+        for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
			
 
				             if extractor not in exclude:
			
 
				                 existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
			
 
				                 # Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
			
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -529,8 +529,8 @@ def log_shell_welcome_msg():
 
				     from .cli import list_subcommands
			
 
				 
			
 
				     print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
			
 
				-    print('{green}from archivebox.core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
			
 
				-    print('{green}from archivebox.cli import *\n    {}{reset}'.format("\n    ".join(list_subcommands().keys()), **ANSI))
			
 
				+    print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
			
 
				+    print('{green}from cli import *\n    {}{reset}'.format("\n    ".join(list_subcommands().keys()), **ANSI))
			
 
				     print()
			
 
				     print('[i] Welcome to the ArchiveBox Shell!')
			
 
				     print('    https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')