| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- # Generated by hand on 2025-12-29
- # Creates a default crawl for v0.7.2 migrated snapshots and makes crawl_id NOT NULL
- from django.db import migrations, models
- import uuid
- def create_default_crawl_and_assign_snapshots(apps, schema_editor):
- """
- Create a default crawl for migrated snapshots and assign all snapshots without a crawl to it.
- Uses raw SQL because the app registry isn't fully populated during migrations.
- """
- from django.db import connection
- import uuid as uuid_lib
- from datetime import datetime
- cursor = connection.cursor()
- # Check if there are any snapshots without a crawl
- cursor.execute("SELECT COUNT(*) FROM core_snapshot WHERE crawl_id IS NULL")
- snapshots_without_crawl = cursor.fetchone()[0]
- if snapshots_without_crawl == 0:
- print('✓ Fresh install or all snapshots already have crawls')
- return
- # Get or create system user (pk=1)
- cursor.execute("SELECT id FROM auth_user WHERE id = 1")
- if not cursor.fetchone():
- cursor.execute("""
- INSERT INTO auth_user (id, password, is_superuser, username, first_name, last_name, email, is_staff, is_active, date_joined)
- VALUES (1, '!', 1, 'system', '', '', '', 1, 1, ?)
- """, [datetime.now().isoformat()])
- # Create a default crawl for migrated snapshots
- # At this point crawls_crawl is guaranteed to have v0.9.0 schema (crawls/0002 ran first)
- crawl_id = str(uuid_lib.uuid4())
- now = datetime.now().isoformat()
- cursor.execute("""
- INSERT INTO crawls_crawl (
- id, created_at, modified_at, num_uses_succeeded, num_uses_failed,
- urls, max_depth, tags_str, label, notes, output_dir,
- status, retry_at, created_by_id, schedule_id, config, persona_id
- ) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2/v0.8.6',
- 'Auto-created crawl for migrated snapshots', '',
- 'sealed', ?, 1, NULL, '{}', NULL)
- """, [crawl_id, now, now, now])
- # Assign all snapshots without a crawl to the default crawl
- cursor.execute("UPDATE core_snapshot SET crawl_id = ? WHERE crawl_id IS NULL", [crawl_id])
- print(f'✓ Assigned {snapshots_without_crawl} snapshots to default crawl {crawl_id}')
- class Migration(migrations.Migration):
- dependencies = [
- ('core', '0023_upgrade_to_0_9_0'),
- ('crawls', '0002_upgrade_from_0_8_6'),
- ('auth', '0012_alter_user_first_name_max_length'),
- ]
- operations = [
- migrations.RunPython(
- create_default_crawl_and_assign_snapshots,
- reverse_code=migrations.RunPython.noop,
- ),
- migrations.SeparateDatabaseAndState(
- database_operations=[
- # Now make crawl_id NOT NULL
- migrations.RunSQL(
- sql="""
- -- Rebuild snapshot table with NOT NULL crawl_id
- CREATE TABLE core_snapshot_final (
- id TEXT PRIMARY KEY NOT NULL,
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
- modified_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
- url TEXT NOT NULL,
- timestamp VARCHAR(32) NOT NULL UNIQUE,
- bookmarked_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
- crawl_id TEXT NOT NULL,
- parent_snapshot_id TEXT,
- title VARCHAR(512),
- downloaded_at DATETIME,
- depth INTEGER NOT NULL DEFAULT 0,
- fs_version VARCHAR(10) NOT NULL DEFAULT '0.9.0',
- config TEXT NOT NULL DEFAULT '{}',
- notes TEXT NOT NULL DEFAULT '',
- num_uses_succeeded INTEGER NOT NULL DEFAULT 0,
- num_uses_failed INTEGER NOT NULL DEFAULT 0,
- status VARCHAR(15) NOT NULL DEFAULT 'queued',
- retry_at DATETIME,
- current_step INTEGER NOT NULL DEFAULT 0,
- FOREIGN KEY (crawl_id) REFERENCES crawls_crawl(id) ON DELETE CASCADE,
- FOREIGN KEY (parent_snapshot_id) REFERENCES core_snapshot(id) ON DELETE SET NULL
- );
- INSERT INTO core_snapshot_final (
- id, url, timestamp, title,
- bookmarked_at, created_at, modified_at,
- crawl_id, parent_snapshot_id,
- downloaded_at, depth, fs_version,
- config, notes,
- num_uses_succeeded, num_uses_failed,
- status, retry_at, current_step
- )
- SELECT
- id, url, timestamp, title,
- bookmarked_at, created_at, modified_at,
- crawl_id, parent_snapshot_id,
- downloaded_at, depth, fs_version,
- COALESCE(config, '{}'), COALESCE(notes, ''),
- num_uses_succeeded, num_uses_failed,
- status, retry_at, current_step
- FROM core_snapshot;
- DROP TABLE core_snapshot;
- ALTER TABLE core_snapshot_final RENAME TO core_snapshot;
- CREATE INDEX core_snapshot_url_idx ON core_snapshot(url);
- CREATE INDEX core_snapshot_timestamp_idx ON core_snapshot(timestamp);
- CREATE INDEX core_snapshot_bookmarked_at_idx ON core_snapshot(bookmarked_at);
- CREATE INDEX core_snapshot_crawl_id_idx ON core_snapshot(crawl_id);
- CREATE INDEX core_snapshot_status_idx ON core_snapshot(status);
- CREATE INDEX core_snapshot_retry_at_idx ON core_snapshot(retry_at);
- CREATE INDEX core_snapshot_created_at_idx ON core_snapshot(created_at);
- CREATE UNIQUE INDEX core_snapshot_url_crawl_unique ON core_snapshot(url, crawl_id);
- """,
- reverse_sql=migrations.RunSQL.noop,
- ),
- ],
- state_operations=[
- migrations.AddField(
- model_name='snapshot',
- name='crawl',
- field=models.ForeignKey(
- on_delete=models.deletion.CASCADE,
- to='crawls.crawl',
- help_text='Crawl that created this snapshot'
- ),
- ),
- ],
- ),
- ]
|