file_migrations.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # __package__ = 'archivebox.filestore'
  2. # import re
  3. # from pathlib import Path
  4. # from functools import wraps
  5. # from enum import Enum
  6. # import archivebox
  7. # from archivebox import CONSTANTS
  8. # from core.models import Snapshot
  9. # from .models import File
  10. # class FilestoreVersion(Enum):
  11. # v0_7_2 = 'v0.7.2'
  12. # v0_8_6 = 'v0.8.6'
  13. # v0_9_0 = 'v0.9.0'
  14. # LATEST_VERSION = FilestoreVersion.v0_9_0
  15. # def migration(src_ver: FilestoreVersion, dst_ver: FilestoreVersion, pattern: str, timeout_seconds: int = 600):
  16. # """Decorator for a migration function that will only run on files that match the given pattern and are at the given version."""
  17. # def decorator(migration_func):
  18. # @wraps(migration_func)
  19. # def wrapper(file: File) -> None:
  20. # # skip if this migration doesn't apply to this file
  21. # if file.version != src_ver:
  22. # return None
  23. # if not re.match(pattern, file.file.name):
  24. # return None
  25. # # acquire lock, run migration + update version, then unlock
  26. # try:
  27. # file.acquire_lock(timeout_seconds)
  28. # migration_func(file)
  29. # file.version = dst_ver
  30. # except Exception as e:
  31. # # logger.error(f"Failed to migrate file {file.id}: {e}")
  32. # print(f"Failed to migrate file {file.id}: {e}")
  33. # file.version = src_ver # roll back version to original version
  34. # finally:
  35. # file.release_lock()
  36. # file.save()
  37. # wrapper.src_ver = src_ver # type: ignore
  38. # wrapper.dst_ver = dst_ver # type: ignore
  39. # wrapper.pattern = pattern # type: ignore
  40. # wrapper.timeout_seconds = timeout_seconds # type: ignore
  41. # return wrapper
  42. # return decorator
  43. # def detect_archiveresult(path: Path) -> 'ArchiveResult' | None:
  44. # # archive/1723423525.0/singlefile.html
  45. # timestamp = path.parts[1]
  46. # snapshot = Snapshot.objects.filter(timestamp=timestamp).last()
  47. # if not snapshot:
  48. # return
  49. # result = snapshot.archiveresult_set.filter(output=path.name).last()
  50. # if not result:
  51. # return
  52. # return result
  53. # # @hookimpl(hook_name='migrate_file')
  54. # @migration(FilestoreVersion.v0_7_2, FilestoreVersion.v0_8_6, r'archive/([0-9\.]+)/.+', timeout_seconds=600)
  55. # def migrate_v07_to_v08_singlefile(file: File) -> None:
  56. # result = detect_archiveresult(file.relpath)
  57. # new_path = result.OUTPUT_DIR / 'index.html'
  58. # file.move_to(new_path)
  59. # # @hookimpl(hook_name='migrate_file')
  60. # @migration(FilestoreVersion.v0_8_6, FilestoreVersion.v0_9_0, r'archive/([0-9\.]+)/singlefile.html', timeout_seconds=600)
  61. # def migrate_v08_to_v09_singlefile(file: File) -> None:
  62. # result = detect_archiveresult(file.relpath)
  63. # new_path = result.OUTPUT_DIR / 'index.html'
  64. # file.move_to(new_path)
  65. # def migrate_all_files(target=LATEST_VERSION, batch_size: int = 100):
  66. # File.release_expired_locks()
  67. # pending_files = (
  68. # File.objects
  69. # .filter(status='unlocked')
  70. # .exclude(version=target)
  71. # .iterator(chunk_size=batch_size)
  72. # )
  73. # for file in pending_files:
  74. # try:
  75. # archivebox.pm.hook.migrate_file(file=file)
  76. # except Exception as e:
  77. # print(f"Failed to migrate file {file.id}: {e}")