models.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. """Base models using UUIDv7 for all id fields."""
  2. __package__ = 'archivebox.base_models'
  3. import io
  4. import csv
  5. import json
  6. from uuid import UUID
  7. from archivebox.uuid_compat import uuid7
  8. from typing import Any, Iterable, ClassVar
  9. from pathlib import Path
  10. from django.contrib import admin
  11. from django.db import models
  12. from django.db.models import F
  13. from django.utils import timezone
  14. from django.contrib.auth import get_user_model
  15. from django.urls import reverse_lazy
  16. from django.conf import settings
  17. from django_stubs_ext.db.models import TypedModelMeta
  18. from archivebox import DATA_DIR
  19. from archivebox.misc.util import to_json
  20. from archivebox.misc.hashing import get_dir_info
  21. def get_or_create_system_user_pk(username='system'):
  22. User = get_user_model()
  23. # If there's exactly one superuser, use that for all system operations
  24. if User.objects.filter(is_superuser=True).count() == 1:
  25. return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
  26. # Otherwise get or create the system user
  27. user, _ = User.objects.get_or_create(
  28. username=username,
  29. defaults={'is_staff': True, 'is_superuser': True, 'email': '', 'password': '!'}
  30. )
  31. return user.pk
  32. class AutoDateTimeField(models.DateTimeField):
  33. """DateTimeField that automatically updates on save (legacy compatibility)."""
  34. def pre_save(self, model_instance, add):
  35. if add or not getattr(model_instance, self.attname):
  36. value = timezone.now()
  37. setattr(model_instance, self.attname, value)
  38. return value
  39. return super().pre_save(model_instance, add)
  40. class ModelWithUUID(models.Model):
  41. id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
  42. created_at = models.DateTimeField(default=timezone.now, db_index=True)
  43. modified_at = models.DateTimeField(auto_now=True)
  44. created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False, db_index=True)
  45. class Meta(TypedModelMeta):
  46. abstract = True
  47. def __str__(self):
  48. return f'[{self.id}] {self.__class__.__name__}'
  49. @property
  50. def admin_change_url(self) -> str:
  51. return f"/admin/{self._meta.app_label}/{self._meta.model_name}/{self.pk}/change/"
  52. @property
  53. def api_url(self) -> str:
  54. return reverse_lazy('api-1:get_any', args=[self.id])
  55. @property
  56. def api_docs_url(self) -> str:
  57. return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
  58. def as_json(self, keys: Iterable[str] = ()) -> dict:
  59. default_keys = ('id', 'created_at', 'modified_at')
  60. return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
  61. class ModelWithSerializers(ModelWithUUID):
  62. class Meta(TypedModelMeta):
  63. abstract = True
  64. def as_csv_row(self, keys: Iterable[str] = (), separator: str = ',') -> str:
  65. buffer = io.StringIO()
  66. csv.writer(buffer, delimiter=separator).writerow(str(getattr(self, key, '')) for key in (keys or self.as_json().keys()))
  67. return buffer.getvalue()
  68. def as_jsonl_row(self, keys: Iterable[str] = (), **json_kwargs) -> str:
  69. return json.dumps({key: getattr(self, key, '') for key in (keys or self.as_json().keys())}, sort_keys=True, indent=None, **json_kwargs)
  70. class ModelWithNotes(models.Model):
  71. """Mixin for models with a notes field."""
  72. notes = models.TextField(blank=True, null=False, default='')
  73. class Meta:
  74. abstract = True
  75. class ModelWithHealthStats(models.Model):
  76. """Mixin for models with health tracking fields."""
  77. num_uses_failed = models.PositiveIntegerField(default=0)
  78. num_uses_succeeded = models.PositiveIntegerField(default=0)
  79. class Meta:
  80. abstract = True
  81. @property
  82. def health(self) -> int:
  83. total = max(self.num_uses_failed + self.num_uses_succeeded, 1)
  84. return round((self.num_uses_succeeded / total) * 100)
  85. def increment_health_stats(self, success: bool):
  86. """Atomically increment success or failure counter using F() expression."""
  87. field = 'num_uses_succeeded' if success else 'num_uses_failed'
  88. type(self).objects.filter(pk=self.pk).update(**{field: F(field) + 1})
  89. class ModelWithConfig(models.Model):
  90. """Mixin for models with a JSON config field."""
  91. config = models.JSONField(default=dict, null=True, blank=True, editable=True)
  92. class Meta:
  93. abstract = True
  94. class ModelWithOutputDir(ModelWithSerializers):
  95. class Meta:
  96. abstract = True
  97. def save(self, *args, **kwargs):
  98. super().save(*args, **kwargs)
  99. self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
  100. self.save_json_index()
  101. @property
  102. def output_dir_parent(self) -> str:
  103. return f'{self._meta.model_name}s'
  104. @property
  105. def output_dir_name(self) -> str:
  106. return str(self.id)
  107. @property
  108. def output_dir_str(self) -> str:
  109. return f'{self.output_dir_parent}/{self.output_dir_name}'
  110. @property
  111. def OUTPUT_DIR(self) -> Path:
  112. return DATA_DIR / self.output_dir_str
  113. def save_json_index(self):
  114. (self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json()))