Sfoglia il codice sorgente

feat: Add extractor field to the database

Cristian 5 anni fa
parent
commit
309a87e8fe

+ 2 - 1
archivebox/core/migrations/0007_archiveresult.py

@@ -1,4 +1,4 @@
-# Generated by Django 3.0.8 on 2020-11-03 14:52
+# Generated by Django 3.0.8 on 2020-11-04 12:25
 
 from django.db import migrations, models
 import django.db.models.deletion
@@ -21,6 +21,7 @@ class Migration(migrations.Migration):
                 ('output', models.CharField(default='', max_length=500)),
                 ('start_ts', models.DateTimeField()),
                 ('end_ts', models.DateTimeField()),
+                ('extractor', models.CharField(choices=[('title', 'title'), ('favicon', 'favicon'), ('wget', 'wget'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('headers', 'headers'), ('archive_org', 'archive_org')], max_length=20)),
                 ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.Snapshot')),
             ],
         ),

+ 5 - 1
archivebox/core/models.py

@@ -8,6 +8,9 @@ from django.utils.text import slugify
 
 from ..util import parse_date
 from ..index.schema import Link
+from ..extractors import get_default_archive_methods
+
+EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
 
 
 class Tag(models.Model):
@@ -157,4 +160,5 @@ class ArchiveResult(models.Model):
     cmd_version = models.CharField(max_length=20, default="")
     output = models.CharField(max_length=500, default="")
     start_ts = models.DateTimeField()
-    end_ts = models.DateTimeField()
+    end_ts = models.DateTimeField()
+    extractor = models.CharField(choices=EXTRACTORS, blank=False, max_length=20)