瀏覽代碼

hardcode EXTRACTOR_CHOICES to prevent nondeterministic migrations

Nick Sweeting 1 年之前
父節點
當前提交
09553d8340

+ 1 - 5
archivebox/core/admin.py

@@ -180,12 +180,8 @@ class SnapshotActionForm(ActionForm):
     )
 
     # TODO: allow selecting actions for specific extractors? is this useful?
-    # EXTRACTOR_CHOICES = [
-    #     (name, name.title())
-    #     for name, _, _ in get_default_archive_methods()
-    # ]
     # extractor = forms.ChoiceField(
-    #     choices=EXTRACTOR_CHOICES,
+    #     choices=ArchiveResult.EXTRACTOR_CHOICES,
     #     required=False,
     #     widget=forms.MultileChoiceField(attrs={'class': "form-control"})
     # )

+ 16 - 1
archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py

@@ -38,6 +38,21 @@ class Migration(migrations.Migration):
         migrations.AlterField(
             model_name='archiveresult',
             name='extractor',
-            field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
+            field=models.CharField(choices=(
+                ('htmltotext', 'htmltotext'),
+                ('git', 'git'),
+                ('singlefile', 'singlefile'),
+                ('media', 'media'),
+                ('archive_org', 'archive_org'),
+                ('readability', 'readability'),
+                ('mercury', 'mercury'),
+                ('favicon', 'favicon'),
+                ('pdf', 'pdf'),
+                ('headers', 'headers'),
+                ('screenshot', 'screenshot'),
+                ('dom', 'dom'),
+                ('title', 'title'),
+                ('wget', 'wget'),
+            ), max_length=32),
         ),
     ]

+ 22 - 8
archivebox/core/models.py

@@ -28,13 +28,6 @@ from ..index.html import snapshot_icons
 from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
 
 
-EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
-STATUS_CHOICES = [
-    ("succeeded", "succeeded"),
-    ("failed", "failed"),
-    ("skipped", "skipped")
-]
-
 def rand_int_id():
     return random.getrandbits(32)
 
@@ -376,7 +369,28 @@ class ArchiveResult(ABIDModel):
     abid_uri_src = 'self.snapshot.url'
     abid_subtype_src = 'self.extractor'
     abid_rand_src = 'self.old_id'
-    EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
+
+    EXTRACTOR_CHOICES = (
+        ('htmltotext', 'htmltotext'),
+        ('git', 'git'),
+        ('singlefile', 'singlefile'),
+        ('media', 'media'),
+        ('archive_org', 'archive_org'),
+        ('readability', 'readability'),
+        ('mercury', 'mercury'),
+        ('favicon', 'favicon'),
+        ('pdf', 'pdf'),
+        ('headers', 'headers'),
+        ('screenshot', 'screenshot'),
+        ('dom', 'dom'),
+        ('title', 'title'),
+        ('wget', 'wget'),
+    )
+    STATUS_CHOICES = [
+        ("succeeded", "succeeded"),
+        ("failed", "failed"),
+        ("skipped", "skipped")
+    ]
 
     old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
 

+ 3 - 3
archivebox/index/html.py

@@ -121,7 +121,7 @@ def snapshot_icons(snapshot) -> str:
     cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
     
     def calc_snapshot_icons():
-        from core.models import EXTRACTOR_CHOICES
+        from core.models import ArchiveResult
         # start = datetime.now(timezone.utc)
 
         archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
@@ -147,12 +147,12 @@ def snapshot_icons(snapshot) -> str:
         # Missing specific entry for WARC
 
         extractor_outputs = defaultdict(lambda: None)
-        for extractor, _ in EXTRACTOR_CHOICES:
+        for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
             for result in archive_results:
                 if result.extractor == extractor and result:
                     extractor_outputs[extractor] = result
 
-        for extractor, _ in EXTRACTOR_CHOICES:
+        for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
             if extractor not in exclude:
                 existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
                 # Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)

+ 2 - 2
archivebox/logging_util.py

@@ -529,8 +529,8 @@ def log_shell_welcome_msg():
     from .cli import list_subcommands
 
     print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
-    print('{green}from archivebox.core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
-    print('{green}from archivebox.cli import *\n    {}{reset}'.format("\n    ".join(list_subcommands().keys()), **ANSI))
+    print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
+    print('{green}from cli import *\n    {}{reset}'.format("\n    ".join(list_subcommands().keys()), **ANSI))
     print()
     print('[i] Welcome to the ArchiveBox Shell!')
     print('    https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')