| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- # Generated by hand on 2025-12-29
- # Creates Crawl and CrawlSchedule tables using raw SQL
- from django.db import migrations, models
- import django.db.models.deletion
- import django.utils.timezone
- import django.core.validators
- from django.conf import settings
- from archivebox.uuid_compat import uuid7
- from archivebox.base_models.models import get_or_create_system_user_pk
- class Migration(migrations.Migration):
- initial = True
- dependencies = [
- ('auth', '0012_alter_user_first_name_max_length'),
- migrations.swappable_dependency(settings.AUTH_USER_MODEL),
- ]
- operations = [
- migrations.SeparateDatabaseAndState(
- database_operations=[
- migrations.RunSQL(
- sql="""
- -- Create crawls_crawlschedule table first (circular FK will be added later)
- CREATE TABLE IF NOT EXISTS crawls_crawlschedule (
- id TEXT PRIMARY KEY NOT NULL,
- created_at DATETIME NOT NULL,
- modified_at DATETIME NOT NULL,
- num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
- num_uses_failed INTEGER NOT NULL DEFAULT 0,
- schedule VARCHAR(64) NOT NULL,
- is_enabled BOOLEAN NOT NULL DEFAULT 1,
- label VARCHAR(64) NOT NULL DEFAULT '',
- notes TEXT NOT NULL DEFAULT '',
- template_id TEXT NOT NULL,
- created_by_id INTEGER NOT NULL,
- FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE
- );
- CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_at_idx ON crawls_crawlschedule(created_at);
- CREATE INDEX IF NOT EXISTS crawls_crawlschedule_created_by_id_idx ON crawls_crawlschedule(created_by_id);
- CREATE INDEX IF NOT EXISTS crawls_crawlschedule_template_id_idx ON crawls_crawlschedule(template_id);
- -- Create crawls_crawl table
- CREATE TABLE IF NOT EXISTS crawls_crawl (
- id TEXT PRIMARY KEY NOT NULL,
- created_at DATETIME NOT NULL,
- modified_at DATETIME NOT NULL,
- num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
- num_uses_failed INTEGER NOT NULL DEFAULT 0,
- urls TEXT NOT NULL,
- config TEXT,
- max_depth INTEGER NOT NULL DEFAULT 0,
- tags_str VARCHAR(1024) NOT NULL DEFAULT '',
- persona_id TEXT,
- label VARCHAR(64) NOT NULL DEFAULT '',
- notes TEXT NOT NULL DEFAULT '',
- output_dir VARCHAR(512) NOT NULL DEFAULT '',
- status VARCHAR(15) NOT NULL DEFAULT 'queued',
- retry_at DATETIME,
- created_by_id INTEGER NOT NULL,
- schedule_id TEXT,
- FOREIGN KEY (created_by_id) REFERENCES auth_user(id) ON DELETE CASCADE,
- FOREIGN KEY (schedule_id) REFERENCES crawls_crawlschedule(id) ON DELETE SET NULL
- );
- CREATE INDEX IF NOT EXISTS crawls_crawl_status_idx ON crawls_crawl(status);
- CREATE INDEX IF NOT EXISTS crawls_crawl_retry_at_idx ON crawls_crawl(retry_at);
- CREATE INDEX IF NOT EXISTS crawls_crawl_created_at_idx ON crawls_crawl(created_at);
- CREATE INDEX IF NOT EXISTS crawls_crawl_created_by_id_idx ON crawls_crawl(created_by_id);
- CREATE INDEX IF NOT EXISTS crawls_crawl_schedule_id_idx ON crawls_crawl(schedule_id);
- """,
- reverse_sql="""
- DROP TABLE IF EXISTS crawls_crawl;
- DROP TABLE IF EXISTS crawls_crawlschedule;
- """
- ),
- ],
- state_operations=[
- migrations.CreateModel(
- name='CrawlSchedule',
- fields=[
- ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
- ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
- ('modified_at', models.DateTimeField(auto_now=True)),
- ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
- ('num_uses_failed', models.PositiveIntegerField(default=0)),
- ('schedule', models.CharField(max_length=64)),
- ('is_enabled', models.BooleanField(default=True)),
- ('label', models.CharField(blank=True, default='', max_length=64)),
- ('notes', models.TextField(blank=True, default='')),
- ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
- ],
- options={
- 'verbose_name': 'Scheduled Crawl',
- 'verbose_name_plural': 'Scheduled Crawls',
- 'app_label': 'crawls',
- },
- ),
- migrations.CreateModel(
- name='Crawl',
- fields=[
- ('id', models.UUIDField(default=uuid7, editable=False, primary_key=True, serialize=False, unique=True)),
- ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
- ('modified_at', models.DateTimeField(auto_now=True)),
- ('num_uses_succeeded', models.PositiveIntegerField(default=0)),
- ('num_uses_failed', models.PositiveIntegerField(default=0)),
- ('urls', models.TextField(help_text='Newline-separated list of URLs to crawl')),
- ('config', models.JSONField(blank=True, default=dict, null=True)),
- ('max_depth', models.PositiveSmallIntegerField(default=0, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(4)])),
- ('tags_str', models.CharField(blank=True, default='', max_length=1024)),
- ('persona_id', models.UUIDField(blank=True, null=True)),
- ('label', models.CharField(blank=True, default='', max_length=64)),
- ('notes', models.TextField(blank=True, default='')),
- ('output_dir', models.CharField(blank=True, default='', max_length=512)),
- ('status', models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('sealed', 'Sealed')], db_index=True, default='queued', max_length=15)),
- ('retry_at', models.DateTimeField(blank=True, db_index=True, default=django.utils.timezone.now, null=True)),
- ('created_by', models.ForeignKey(default=get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
- ('schedule', models.ForeignKey(blank=True, editable=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='crawls.crawlschedule')),
- ],
- options={
- 'verbose_name': 'Crawl',
- 'verbose_name_plural': 'Crawls',
- 'app_label': 'crawls',
- },
- ),
- migrations.AddField(
- model_name='crawlschedule',
- name='template',
- field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawls.crawl'),
- ),
- ],
- ),
- ]
|