0027_update_snapshot_ids.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # Generated by Django 5.0.6 on 2024-08-18 02:48
  2. from django.db import migrations
  3. from datetime import datetime
  4. from archivebox.base_models.abid import ABID, abid_from_values, DEFAULT_ABID_URI_SALT
  5. def calculate_abid(self):
  6. """
  7. Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
  8. """
  9. prefix = self.abid_prefix
  10. ts = eval(self.abid_ts_src)
  11. uri = eval(self.abid_uri_src)
  12. subtype = eval(self.abid_subtype_src)
  13. rand = eval(self.abid_rand_src)
  14. if (not prefix) or prefix == 'obj_':
  15. suggested_abid = self.__class__.__name__[:3].lower()
  16. raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
  17. if not ts:
  18. ts = datetime.utcfromtimestamp(0)
  19. print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
  20. if not uri:
  21. uri = str(self)
  22. print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
  23. if not subtype:
  24. subtype = self.__class__.__name__
  25. print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
  26. if not rand:
  27. rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
  28. print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
  29. abid = abid_from_values(
  30. prefix=prefix,
  31. ts=ts,
  32. uri=uri,
  33. subtype=subtype,
  34. rand=rand,
  35. salt=DEFAULT_ABID_URI_SALT,
  36. )
  37. assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
  38. return abid
  39. def update_snapshot_ids(apps, schema_editor):
  40. Snapshot = apps.get_model("core", "Snapshot")
  41. num_total = Snapshot.objects.all().count()
  42. print(f' Updating {num_total} Snapshot.id, Snapshot.uuid values in place...')
  43. for idx, snapshot in enumerate(Snapshot.objects.all().only('abid').iterator(chunk_size=500)):
  44. assert snapshot.abid
  45. snapshot.abid_prefix = 'snp_'
  46. snapshot.abid_ts_src = 'self.added'
  47. snapshot.abid_uri_src = 'self.url'
  48. snapshot.abid_subtype_src = '"01"'
  49. snapshot.abid_rand_src = 'self.uuid'
  50. snapshot.abid = calculate_abid(snapshot)
  51. snapshot.uuid = snapshot.abid.uuid
  52. snapshot.save(update_fields=["abid", "uuid"])
  53. assert str(ABID.parse(snapshot.abid).uuid) == str(snapshot.uuid)
  54. if idx % 1000 == 0:
  55. print(f'Migrated {idx}/{num_total} Snapshot objects...')
  56. def update_archiveresult_ids(apps, schema_editor):
  57. Snapshot = apps.get_model("core", "Snapshot")
  58. ArchiveResult = apps.get_model("core", "ArchiveResult")
  59. num_total = ArchiveResult.objects.all().count()
  60. print(f' Updating {num_total} ArchiveResult.id, ArchiveResult.uuid values in place... (may take an hour or longer for large collections...)')
  61. for idx, result in enumerate(ArchiveResult.objects.all().only('abid', 'snapshot_id').iterator(chunk_size=500)):
  62. assert result.abid
  63. result.abid_prefix = 'res_'
  64. result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
  65. result.snapshot_added = result.snapshot.added
  66. result.snapshot_url = result.snapshot.url
  67. result.abid_ts_src = 'self.snapshot_added'
  68. result.abid_uri_src = 'self.snapshot_url'
  69. result.abid_subtype_src = 'self.extractor'
  70. result.abid_rand_src = 'self.id'
  71. result.abid = calculate_abid(result)
  72. result.uuid = result.abid.uuid
  73. result.uuid = ABID.parse(result.abid).uuid
  74. result.save(update_fields=["abid", "uuid"])
  75. assert str(ABID.parse(result.abid).uuid) == str(result.uuid)
  76. if idx % 5000 == 0:
  77. print(f'Migrated {idx}/{num_total} ArchiveResult objects...')
  78. class Migration(migrations.Migration):
  79. dependencies = [
  80. ('core', '0026_archiveresult_created_archiveresult_created_by_and_more'),
  81. ]
  82. operations = [
  83. migrations.RunPython(update_snapshot_ids, reverse_code=migrations.RunPython.noop),
  84. migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop),
  85. ]