| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- # Generated by Django 5.0.6 on 2024-08-18 02:48
- from django.db import migrations
- from datetime import datetime
- from archivebox.base_models.abid import ABID, abid_from_values, DEFAULT_ABID_URI_SALT
- def calculate_abid(self):
- """
- Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
- """
- prefix = self.abid_prefix
- ts = eval(self.abid_ts_src)
- uri = eval(self.abid_uri_src)
- subtype = eval(self.abid_subtype_src)
- rand = eval(self.abid_rand_src)
- if (not prefix) or prefix == 'obj_':
- suggested_abid = self.__class__.__name__[:3].lower()
- raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
- if not ts:
- ts = datetime.utcfromtimestamp(0)
- print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
- if not uri:
- uri = str(self)
- print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
- if not subtype:
- subtype = self.__class__.__name__
- print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
- if not rand:
- rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
- print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
- abid = abid_from_values(
- prefix=prefix,
- ts=ts,
- uri=uri,
- subtype=subtype,
- rand=rand,
- salt=DEFAULT_ABID_URI_SALT,
- )
- assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
- return abid
- def update_snapshot_ids(apps, schema_editor):
- Snapshot = apps.get_model("core", "Snapshot")
- num_total = Snapshot.objects.all().count()
- print(f' Updating {num_total} Snapshot.id, Snapshot.uuid values in place...')
- for idx, snapshot in enumerate(Snapshot.objects.all().only('abid').iterator(chunk_size=500)):
- assert snapshot.abid
- snapshot.abid_prefix = 'snp_'
- snapshot.abid_ts_src = 'self.added'
- snapshot.abid_uri_src = 'self.url'
- snapshot.abid_subtype_src = '"01"'
- snapshot.abid_rand_src = 'self.uuid'
- snapshot.abid = calculate_abid(snapshot)
- snapshot.uuid = snapshot.abid.uuid
- snapshot.save(update_fields=["abid", "uuid"])
- assert str(ABID.parse(snapshot.abid).uuid) == str(snapshot.uuid)
- if idx % 1000 == 0:
- print(f'Migrated {idx}/{num_total} Snapshot objects...')
- def update_archiveresult_ids(apps, schema_editor):
- Snapshot = apps.get_model("core", "Snapshot")
- ArchiveResult = apps.get_model("core", "ArchiveResult")
- num_total = ArchiveResult.objects.all().count()
- print(f' Updating {num_total} ArchiveResult.id, ArchiveResult.uuid values in place... (may take an hour or longer for large collections...)')
- for idx, result in enumerate(ArchiveResult.objects.all().only('abid', 'snapshot_id').iterator(chunk_size=500)):
- assert result.abid
- result.abid_prefix = 'res_'
- result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
- result.snapshot_added = result.snapshot.added
- result.snapshot_url = result.snapshot.url
- result.abid_ts_src = 'self.snapshot_added'
- result.abid_uri_src = 'self.snapshot_url'
- result.abid_subtype_src = 'self.extractor'
- result.abid_rand_src = 'self.id'
- result.abid = calculate_abid(result)
- result.uuid = result.abid.uuid
- result.uuid = ABID.parse(result.abid).uuid
- result.save(update_fields=["abid", "uuid"])
- assert str(ABID.parse(result.abid).uuid) == str(result.uuid)
- if idx % 5000 == 0:
- print(f'Migrated {idx}/{num_total} ArchiveResult objects...')
- class Migration(migrations.Migration):
- dependencies = [
- ('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
- ]
- operations = [
- migrations.RunPython(update_snapshot_ids, reverse_code=migrations.RunPython.noop),
- migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop),
- ]
|