|
|
@@ -1,7 +1,13 @@
|
|
|
# Generated by hand on 2025-12-29
|
|
|
# Creates Crawl and CrawlSchedule tables using raw SQL
|
|
|
|
|
|
-from django.db import migrations
|
|
|
+from django.db import migrations, models
|
|
|
+import django.db.models.deletion
|
|
|
+import django.utils.timezone
|
|
|
+import django.core.validators
|
|
|
+from django.conf import settings
|
|
|
+from archivebox.uuid_compat import uuid7
|
|
|
+from archivebox.base_models.models import get_or_create_system_user_pk
|
|
|
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
|
@@ -10,12 +16,36 @@ class Migration(migrations.Migration):
|
|
|
|
|
|
dependencies = [
|
|
|
('auth', '0012_alter_user_first_name_max_length'),
|
|
|
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
|
|
]
|
|
|
|
|
|
operations = [
|
|
|
- migrations.RunSQL(
|
|
|
- # Forward SQL
|
|
|
- sql="""
|
|
|
+ migrations.SeparateDatabaseAndState(
|
|
|
+ database_operations=[
|
|
|
+ migrations.RunSQL(
|
|
|
+ sql="""
|
|
|
+ -- Create crawls_crawlschedule table first (circular FK will be added later)
|
|
|
+ CREATE TABLE IF NOT EXISTS crawls_crawlschedule (
|
|
|
+ id TEXT PRIMARY KEY NOT NULL,
|
|
|
+ created_at DATETIME NOT NULL,
|
|
|
+ modified_at DATETIME NOT NULL,
|
|
|
+ num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
|
|
+ num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
|
|
+
|
|
|
+ schedule VARCHAR(64) NOT NULL,
|
|
|
+ is_enabled BOOLEAN NOT NULL DEFAULT 1,
|
|
|
+ label VARCHAR(64) NOT NULL DEFAULT '',
|
|
|
+ notes TEXT NOT NULL DEFAULT '',
|
|
|
+
|
|
|
+ template_id TEXT NOT NULL,
|
|
|
+ created_by_id INTEGER NOT NULL,
|
|
|
+
|
|
|
+ FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
|
|
|
+ );
|
|
|
+ CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_at_idx ON crawls_crawlschedule(created_at);
|
|
|
+ CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_by_id_idx ON crawls_crawlschedule(created_by_id);
|
|
|
+ CREATE INDEX IF NOT EXISTS crawls_crawlschedule_template_id_idx ON crawls_crawlschedule(template_id);
|
|
|
+
|
|
|
-- Create crawls_crawl table
|
|
|
CREATE TABLE IF NOT EXISTS crawls_crawl (
|
|
|
id TEXT PRIMARY KEY NOT NULL,
|
|
|
@@ -45,33 +75,67 @@ class Migration(migrations.Migration):
|
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_retry_at_idx ON crawls_crawl(retry_at);
|
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_created_at_idx ON crawls_crawl(created_at);
|
|
|
CREATE INDEX IF NOT EXISTS crawls_crawl_created_by_id_idx ON crawls_crawl(created_by_id);
|
|
|
-
|
|
|
- -- Create crawls_crawlschedule table
|
|
|
- CREATE TABLE IF NOT EXISTS crawls_crawlschedule (
|
|
|
- id TEXT PRIMARY KEY NOT NULL,
|
|
|
- created_at DATETIME NOT NULL,
|
|
|
- modified_at DATETIME NOT NULL,
|
|
|
- num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
|
|
|
- num_uses_failed INTEGER NOT NULL DEFAULT 0,
|
|
|
-
|
|
|
- schedule VARCHAR(64) NOT NULL,
|
|
|
- is_enabled BOOLEAN NOT NULL DEFAULT 1,
|
|
|
- label VARCHAR(64) NOT NULL DEFAULT '',
|
|
|
- notes TEXT NOT NULL DEFAULT '',
|
|
|
-
|
|
|
- template_id TEXT NOT NULL,
|
|
|
- created_by_id INTEGER NOT NULL,
|
|
|
-
|
|
|
- FOREIGN KEY (template_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
|
|
|
- FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
|
|
|
- );
|
|
|
- CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_at_idx ON crawls_crawlschedule(created_at);
|
|
|
- CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_by_id_idx ON crawls_crawlschedule(created_by_id);
|
|
|
- """,
|
|
|
- # Reverse SQL
|
|
|
- reverse_sql="""
|
|
|
+ CREATE INDEX IF NOT EXISTS crawls_crawl_schedule_id_idx ON crawls_crawl(schedule_id);
|
|
|
+ """,
|
|
|
+ reverse_sql="""
|
|
|
DROP TABLE IF EXISTS crawls_crawl;
|
|
|
DROP TABLE IF EXISTS crawls_crawlschedule;
|
|
|
- """
|
|
|
+ """
|
|
|
+ ),
|
|
|
+ ],
|
|
|
+ state_operations=[
|
|
|
+ migrations.CreateModel(
|
|
|
+ name='CrawlSchedule',
|
|
|
+ fields=[
|
|
|
+ ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
|
|
+ ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
|
|
+ ('modified_at', models.DateTimeField(auto_now=True)),
|
|
|
+ ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
|
|
+ ('num_uses_failed', models.PositiveIntegerField(default=0)),
|
|
|
+ ('schedule', models.CharField(max_length=64)),
|
|
|
+ ('is_enabled', models.BooleanField(default=True)),
|
|
|
+ ('label', models.CharField(blank=True, default='', max_length=64)),
|
|
|
+ ('notes', models.TextField(blank=True, default='')),
|
|
|
+ ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
|
|
+ ],
|
|
|
+ options={
|
|
|
+ 'verbose_name': 'Scheduled Crawl',
|
|
|
+ 'verbose_name_plural': 'Scheduled Crawls',
|
|
|
+ 'app_label': 'crawls',
|
|
|
+ },
|
|
|
+ ),
|
|
|
+ migrations.CreateModel(
|
|
|
+ name='Crawl',
|
|
|
+ fields=[
|
|
|
+ ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
|
|
|
+ ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
|
|
|
+ ('modified_at', models.DateTimeField(auto_now=True)),
|
|
|
+ ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
|
|
|
+ ('num_uses_failed', models.PositiveIntegerField(default=0)),
|
|
|
+ ('urls', models.TextField(help_text='Newline-separated list of URLs to crawl')),
|
|
|
+ ('config', models.JSONField(blank=True, default=dict, null=True)),
|
|
|
+ ('max_depth', models.PositiveSmallIntegerField(default=0, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(4)])),
|
|
|
+ ('tags_str', models.CharField(blank=True, default='', max_length=1024)),
|
|
|
+ ('persona_id', models.UUIDField(blank=True, null=True)),
|
|
|
+ ('label', models.CharField(blank=True, default='', max_length=64)),
|
|
|
+ ('notes', models.TextField(blank=True, default='')),
|
|
|
+ ('output_dir', models.CharField(blank=True, default='', max_length=512)),
|
|
|
+ ('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15)),
|
|
|
+ ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
|
|
|
+ ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
|
|
+ ('schedule', models.ForeignKey(blank=True, editable=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='crawls.crawlschedule')),
|
|
|
+ ],
|
|
|
+ options={
|
|
|
+ 'verbose_name': 'Crawl',
|
|
|
+ 'verbose_name_plural': 'Crawls',
|
|
|
+ 'app_label': 'crawls',
|
|
|
+ },
|
|
|
+ ),
|
|
|
+ migrations.AddField(
|
|
|
+ model_name='crawlschedule',
|
|
|
+ name='template',
|
|
|
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawls.crawl'),
|
|
|
+ ),
|
|
|
+ ],
|
|
|
),
|
|
|
]
|