models.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. """
  2. This file provides the Django ABIDField and ABIDModel base model to inherit from.
  3. """
  4. from typing import Any, Dict, Union, List, Set, cast
  5. from uuid import uuid4
  6. from functools import partial
  7. from charidfield import CharIDField # type: ignore[import-untyped]
  8. from django.contrib import admin
  9. from django.core.exceptions import ValidationError, NON_FIELD_ERRORS
  10. from django.db import models
  11. from django.utils import timezone
  12. from django.db.utils import OperationalError
  13. from django.contrib.auth import get_user_model
  14. from django.urls import reverse_lazy
  15. from django_stubs_ext.db.models import TypedModelMeta
  16. from .abid import (
  17. ABID,
  18. ABID_LEN,
  19. ABID_RAND_LEN,
  20. ABID_SUFFIX_LEN,
  21. DEFAULT_ABID_PREFIX,
  22. DEFAULT_ABID_URI_SALT,
  23. abid_part_from_prefix,
  24. abid_hashes_from_values,
  25. ts_from_abid,
  26. abid_part_from_ts,
  27. )
  28. ####################################################
  29. # Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
  30. ABIDField = partial(
  31. CharIDField,
  32. max_length=ABID_LEN,
  33. help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
  34. default=None,
  35. null=True,
  36. blank=True,
  37. db_index=True,
  38. unique=True,
  39. )
  40. def get_or_create_system_user_pk(username='system'):
  41. """Get or create a system user with is_superuser=True to be the default owner for new DB rows"""
  42. User = get_user_model()
  43. # if only one user exists total, return that user
  44. if User.objects.filter(is_superuser=True).count() == 1:
  45. return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
  46. # otherwise, create a dedicated "system" user
  47. user, _was_created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''})
  48. return user.pk
  49. class AutoDateTimeField(models.DateTimeField):
  50. # def pre_save(self, model_instance, add):
  51. # return timezone.now()
  52. pass
  53. class ABIDError(Exception):
  54. pass
  55. class ABIDModel(models.Model):
  56. """
  57. Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface.
  58. """
  59. abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_'
  60. abid_ts_src = 'self.created_at' # e.g. 'self.created_at'
  61. abid_uri_src = 'None' # e.g. 'self.uri' (MUST BE SET)
  62. abid_subtype_src = 'self.__class__.__name__' # e.g. 'self.extractor'
  63. abid_rand_src = 'self.id' # e.g. 'self.uuid' or 'self.id'
  64. abid_salt: str = DEFAULT_ABID_URI_SALT # combined with self.uri to anonymize hashes on a per-install basis (default is shared globally with all users, means everyone will hash ABC to -> 123 the same around the world, makes it easy to share ABIDs across installs and see if they are for the same URI. Change this if you dont want your hashes to be guessable / in the same hash space as all other users)
  65. abid_drift_allowed: bool = False # set to True to allow abid_field values to change after a fixed ABID has been issued (NOT RECOMMENDED: means values can drift out of sync from original ABID)
  66. # id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
  67. # abid = ABIDField(prefix=abid_prefix)
  68. # created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
  69. # created_at = AutoDateTimeField(default=None, null=False, db_index=True)
  70. # modified_at = models.DateTimeField(auto_now=True)
  71. _prefetched_objects_cache: Dict[str, Any]
  72. class Meta(TypedModelMeta):
  73. abstract = True
  74. @admin.display(description='Summary')
  75. def __str__(self) -> str:
  76. return f'[{self.abid or (self.abid_prefix + "NEW")}] {self.__class__.__name__} {eval(self.abid_uri_src)}'
  77. def __init__(self, *args: Any, **kwargs: Any) -> None:
  78. """Overriden __init__ method ensures we have a stable creation timestamp that fields can use within initialization code pre-saving to DB."""
  79. super().__init__(*args, **kwargs)
  80. # pre-compute a stable timestamp of the obj init time (with abid.ts precision limit applied) for use when object is first created,
  81. # some other fields depend on a timestamp at creation time, and it's nice to have one common timestamp they can all share.
  82. # Used as an alternative to auto_now_add=True + auto_now=True which can produce two different times & requires saving to DB to get the TS.
  83. # (ordinarily fields cant depend on other fields until the obj is saved to db and recalled)
  84. self._init_timestamp = ts_from_abid(abid_part_from_ts(timezone.now()))
  85. def clean(self, abid_drift_allowed: bool | None=None) -> None:
  86. if self._state.adding:
  87. # only runs once when a new object is first saved to the DB
  88. # sets self.id, self.pk, self.created_by, self.created_at, self.modified_at
  89. self._previous_abid = None
  90. self.abid = str(self.issue_new_abid())
  91. else:
  92. # otherwise if updating, make sure none of the field changes would invalidate existing ABID
  93. abid_diffs = self.ABID_FRESH_DIFFS
  94. if abid_diffs:
  95. # change has invalidated the existing ABID, raise a nice ValidationError pointing out which fields caused the issue
  96. keys_changed = ', '.join(diff['abid_src'] for diff in abid_diffs.values())
  97. full_summary = (
  98. f"This {self.__class__.__name__}(abid={str(self.ABID)}) was assigned a fixed, unique ID (ABID) based on its contents when it was created. " +
  99. f"\nYou must reduce your changes to not affect these fields [{keys_changed}], or create a new {self.__class__.__name__} object instead."
  100. )
  101. change_error = ValidationError({
  102. **{
  103. # url: ValidationError('Cannot update self.url= https://example.com/old -> https://example.com/new ...')
  104. diff['abid_src'].replace('self.', '')
  105. if (diff['old_val'] != diff['new_val']) and hasattr(self, diff['abid_src'].replace('self.', ''))
  106. else NON_FIELD_ERRORS
  107. : ValidationError(
  108. 'Cannot update %(abid_src)s= "%(old_val)s" -> "%(new_val)s" (would alter %(model)s.ABID.%(key)s=%(old_hash)s to %(new_hash)s)',
  109. code='ABIDConflict',
  110. params=diff,
  111. )
  112. for diff in abid_diffs.values()
  113. },
  114. NON_FIELD_ERRORS: ValidationError(full_summary),
  115. })
  116. allowed_to_invalidate_abid = self.abid_drift_allowed if (abid_drift_allowed is None) else abid_drift_allowed
  117. if allowed_to_invalidate_abid:
  118. # print(f'\n#### WARNING: Change allowed despite it invalidating the ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})!', self.abid)
  119. # print(change_error)
  120. # print('--------------------------------------------------------------------------------------------------')
  121. pass
  122. else:
  123. print(f'\n#### ERROR: Change blocked because it would invalidate ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})', self.abid)
  124. print(change_error)
  125. print('--------------------------------------------------------------------------------------------------')
  126. raise change_error
  127. def save(self, *args: Any, abid_drift_allowed: bool | None=None, **kwargs: Any) -> None:
  128. """Overriden save method ensures new ABID is generated while a new object is first saving."""
  129. self.clean(abid_drift_allowed=abid_drift_allowed)
  130. return super().save(*args, **kwargs)
  131. @classmethod
  132. def id_from_abid(cls, abid: str) -> str:
  133. return str(cls.objects.only('pk').get(abid=cls.abid_prefix + str(abid).split('_', 1)[-1]).pk)
  134. @property
  135. def ABID_SOURCES(self) -> Dict[str, str]:
  136. """"Get the dict of fresh ABID component values based on the live object's properties."""
  137. assert self.abid_prefix
  138. return {
  139. 'prefix': 'self.abid_prefix', # defined as static class vars at build time
  140. 'ts': self.abid_ts_src,
  141. 'uri': self.abid_uri_src,
  142. 'subtype': self.abid_subtype_src,
  143. 'rand': self.abid_rand_src,
  144. 'salt': 'self.abid_salt', # defined as static class vars at build time
  145. }
  146. @property
  147. def ABID_FRESH_VALUES(self) -> Dict[str, Any]:
  148. """"Get the dict of fresh ABID component values based on the live object's properties."""
  149. abid_sources = self.ABID_SOURCES
  150. assert all(src != 'None' for src in abid_sources.values())
  151. return {
  152. 'prefix': eval(abid_sources['prefix']),
  153. 'ts': eval(abid_sources['ts']),
  154. 'uri': eval(abid_sources['uri']),
  155. 'subtype': eval(abid_sources['subtype']),
  156. 'rand': eval(abid_sources['rand']),
  157. 'salt': eval(abid_sources['salt']),
  158. }
  159. @property
  160. def ABID_FRESH_HASHES(self) -> Dict[str, str]:
  161. """"Get the dict of fresh ABID component hashes based on the live object's properties."""
  162. abid_values = self.ABID_FRESH_VALUES
  163. assert all(val for val in abid_values.values())
  164. return abid_hashes_from_values(
  165. prefix=abid_values['prefix'],
  166. ts=abid_values['ts'],
  167. uri=abid_values['uri'],
  168. subtype=abid_values['subtype'],
  169. rand=abid_values['rand'],
  170. salt=abid_values['salt'],
  171. )
  172. @property
  173. def ABID_FRESH_DIFFS(self) -> Dict[str, Dict[str, Any]]:
  174. """Get the dict of discrepancies between the existing saved ABID and a new fresh ABID computed based on the live object."""
  175. existing_abid = self.ABID
  176. existing_values = {} if self._state.adding else self.__class__.objects.get(pk=self.pk).ABID_FRESH_VALUES
  177. abid_sources = self.ABID_SOURCES
  178. fresh_values = self.ABID_FRESH_VALUES
  179. fresh_hashes = self.ABID_FRESH_HASHES
  180. return {
  181. key: {
  182. 'key': key,
  183. 'model': self.__class__.__name__,
  184. 'pk': self.pk,
  185. 'abid_src': abid_sources[key],
  186. 'old_val': existing_values.get(key, None),
  187. 'old_hash': getattr(existing_abid, key),
  188. 'new_val': fresh_values[key],
  189. 'new_hash': new_hash,
  190. 'summary': f'{abid_sources[key]}= "{existing_values.get(key, None)}" -> "{fresh_values[key]}" (would alter {self.__class__.__name__.lower()}.ABID.{key}={getattr(existing_abid, key)} to {new_hash})',
  191. }
  192. for key, new_hash in fresh_hashes.items()
  193. if getattr(existing_abid, key) != new_hash
  194. }
  195. def issue_new_abid(self, overwrite=False) -> ABID:
  196. """
  197. Issue a new ABID based on the current object's properties, can only be called once on new objects (before they are saved to DB).
  198. """
  199. if not overwrite:
  200. assert self._state.adding, 'Can only issue new ABID when model._state.adding is True'
  201. assert eval(self.abid_uri_src), f'Can only issue new ABID if self.abid_uri_src is defined ({self.abid_uri_src}={eval(self.abid_uri_src)})'
  202. # Setup Field defaults to be ready for ABID generation
  203. self.abid = None
  204. self.id = self.id or uuid4()
  205. self.pk = self.id
  206. self.created_at = self.created_at or self._init_timestamp # cut off precision to match precision of TS component
  207. self.modified_at = self.modified_at or self.created_at
  208. self.created_by_id = (hasattr(self, 'created_by_id') and self.created_by_id) or get_or_create_system_user_pk()
  209. # Compute fresh ABID values & hashes based on object's live properties
  210. abid_fresh_values = self.ABID_FRESH_VALUES
  211. assert all(abid_fresh_values.values()), f'All ABID_FRESH_VALUES must be set {abid_fresh_values}'
  212. abid_fresh_hashes = self.ABID_FRESH_HASHES
  213. assert all(abid_fresh_hashes.values()), f'All ABID_FRESH_HASHES must be able to be generated {abid_fresh_hashes}'
  214. new_abid = ABID(**abid_fresh_hashes)
  215. assert new_abid.ulid and new_abid.uuid and new_abid.typeid, f'Failed to calculate {abid_fresh_values["prefix"]}_ABID for {self.__class__.__name__}'
  216. return new_abid
  217. @property
  218. def ABID(self) -> ABID:
  219. """
  220. Get the object's existing ABID (from self.abid if it's already saved to DB, otherwise generated fresh)
  221. e.g. -> ABID(ts='01HX9FPYTR', uri='E4A5CCD9', subtype='00', rand='ZYEBQE')
  222. """
  223. if self.abid:
  224. return ABID.parse(cast(str, self.abid))
  225. return self.issue_new_abid()
  226. # These are all example helpers to make it easy to access alternate formats of the ABID.*, only add them if you actually need them
  227. # @property
  228. # def UUID(self) -> UUID:
  229. # """
  230. # Get a uuid.UUID (v4) representation of the object's ABID.
  231. # """
  232. # return self.ABID.uuid
  233. # @property
  234. # def uuid(self) -> str:
  235. # """
  236. # Get a str uuid.UUID (v4) representation of the object's ABID.
  237. # """
  238. # return str(self.ABID.uuid)
  239. # @property
  240. # def ULID(self) -> ULID:
  241. # """
  242. # Get a ulid.ULID representation of the object's ABID.
  243. # """
  244. # return self.ABID.ulid
  245. # @property
  246. # def TypeID(self) -> TypeID:
  247. # """
  248. # Get a typeid.TypeID (stripe-style) representation of the object's ABID.
  249. # """
  250. # return self.ABID.typeid
  251. @property
  252. def api_url(self) -> str:
  253. """
  254. Compute the REST API URL to access this object.
  255. e.g. /api/v1/core/snapshot/snp_01BJQMF54D093DXEAWZ6JYRP
  256. """
  257. return reverse_lazy('api-1:get_any', args=[self.abid]) # + f'?api_key={get_or_create_api_token(request.user)}'
  258. @property
  259. def api_docs_url(self) -> str:
  260. """
  261. Compute the REST API Documentation URL to learn about accessing this object.
  262. e.g. /api/v1/docs#/Core%20Models/api_v1_core_get_snapshots
  263. """
  264. return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
  265. @property
  266. def admin_change_url(self) -> str:
  267. return f"/admin/{self._meta.app_label}/{self._meta.model_name}/{self.pk}/change/"
  268. def get_absolute_url(self):
  269. return self.api_docs_url
  270. class ModelWithHealthStats(models.Model):
  271. num_uses_failed = models.PositiveIntegerField(default=0)
  272. num_uses_succeeded = models.PositiveIntegerField(default=0)
  273. class Meta:
  274. abstract = True
  275. def record_health_failure(self) -> None:
  276. self.num_uses_failed += 1
  277. self.save()
  278. def record_health_success(self) -> None:
  279. self.num_uses_succeeded += 1
  280. self.save()
  281. def reset_health(self) -> None:
  282. # move all the failures to successes when resetting so we dont lose track of the total count
  283. self.num_uses_succeeded = self.num_uses_failed + self.num_uses_succeeded
  284. self.num_uses_failed = 0
  285. self.save()
  286. @property
  287. def health(self) -> int:
  288. total_uses = max((self.num_uses_failed + self.num_uses_succeeded, 1))
  289. success_pct = (self.num_uses_succeeded / total_uses) * 100
  290. return round(success_pct)
  291. ####################################################
  292. # Django helpers
  293. def find_all_abid_prefixes() -> Dict[str, type[models.Model]]:
  294. """
  295. Return the mapping of all ABID prefixes to their models.
  296. e.g. {'tag_': core.models.Tag, 'snp_': core.models.Snapshot, ...}
  297. """
  298. import django.apps
  299. prefix_map = {}
  300. for model in django.apps.apps.get_models():
  301. abid_prefix = getattr(model, 'abid_prefix', None)
  302. if abid_prefix:
  303. prefix_map[abid_prefix] = model
  304. return prefix_map
  305. def find_prefix_for_abid(abid: ABID) -> str:
  306. """
  307. Find the correct prefix for a given ABID that may have be missing a prefix (slow).
  308. e.g. ABID('obj_01BJQMF54D093DXEAWZ6JYRPAQ') -> 'snp_'
  309. """
  310. # if existing abid prefix is correct, lookup is easy
  311. model = find_model_from_abid(abid)
  312. if model:
  313. assert issubclass(model, ABIDModel)
  314. return model.abid_prefix
  315. # prefix might be obj_ or missing, fuzzy-search to find any object that matches
  316. return find_obj_from_abid_rand(abid)[0].abid_prefix
  317. def find_model_from_abid_prefix(prefix: str) -> type[ABIDModel] | None:
  318. """
  319. Return the Django Model that corresponds to a given ABID prefix.
  320. e.g. 'tag_' -> core.models.Tag
  321. """
  322. prefix = abid_part_from_prefix(prefix)
  323. import django.apps
  324. for model in django.apps.apps.get_models():
  325. if not issubclass(model, ABIDModel): continue # skip non-ABID-enabled models
  326. if not hasattr(model, 'objects'): continue # skip abstract models
  327. if (model.abid_prefix == prefix):
  328. return model
  329. return None
  330. def find_model_from_abid(abid: ABID) -> type[models.Model] | None:
  331. """
  332. Shortcut for find_model_from_abid_prefix(abid.prefix)
  333. """
  334. return find_model_from_abid_prefix(abid.prefix)
  335. def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
  336. """
  337. Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
  338. e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
  339. Honestly should only be used for debugging, no reason to expose this ability to users.
  340. """
  341. # convert str to ABID if necessary
  342. if isinstance(rand, ABID):
  343. abid: ABID = rand
  344. else:
  345. rand = str(rand)
  346. if len(rand) < ABID_SUFFIX_LEN:
  347. padding_needed = ABID_SUFFIX_LEN - len(rand)
  348. rand = ('0'*padding_needed) + rand
  349. abid = ABID.parse(rand)
  350. import django.apps
  351. partial_matches: List[ABIDModel] = []
  352. models_to_try = cast(Set[type[models.Model]], set(filter(bool, (
  353. model,
  354. find_model_from_abid(abid),
  355. *django.apps.apps.get_models(),
  356. ))))
  357. # print(abid, abid.rand, abid.uuid, models_to_try)
  358. for model in models_to_try:
  359. if not issubclass(model, ABIDModel): continue # skip Models that arent ABID-enabled
  360. if not hasattr(model, 'objects'): continue # skip abstract Models
  361. assert hasattr(model, 'objects') # force-fix for type hint nit about missing manager https://github.com/typeddjango/django-stubs/issues/1684
  362. # continue on to try fuzzy searching by randomness portion derived from uuid field
  363. try:
  364. qs = []
  365. if hasattr(model, 'abid'):
  366. qs = model.objects.filter(abid__endswith=abid.rand)
  367. elif hasattr(model, 'uuid'):
  368. qs = model.objects.filter(uuid__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
  369. elif hasattr(model, 'id'):
  370. # NOTE: this only works on SQLite where every column is a string
  371. # other DB backends like postgres dont let you do __endswith if this is a BigAutoInteger field
  372. # try to search for uuid=...-2354352
  373. # try to search for id=...2354352
  374. # try to search for id=2354352
  375. qs = model.objects.filter(
  376. models.Q(id__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
  377. | models.Q(id__endswith=abid.rand)
  378. | models.Q(id__startswith=str(int(abid.rand)) if abid.rand.isdigit() else abid.rand)
  379. )
  380. for obj in qs:
  381. if abid in (str(obj.ABID), str(obj.id), str(obj.pk), str(obj.abid)):
  382. # found exact match, no need to keep iterating
  383. return [obj]
  384. partial_matches.append(obj)
  385. except OperationalError as err:
  386. print(f'[!] WARNING: Got error while trying to iterate through QuerySet for {model}:', err, '\n')
  387. return partial_matches
  388. def find_obj_from_abid(abid: ABID, model=None, fuzzy=False) -> Any:
  389. """
  390. Find an object with a given ABID by filtering possible models for a matching abid/uuid/id (fast).
  391. e.g. 'snp_01BJQMF54D093DXEAWZ6JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
  392. """
  393. model = model or find_model_from_abid(abid)
  394. assert model, f'Could not find model that could match this ABID type: {abid}'
  395. try:
  396. if hasattr(model, 'abid'):
  397. return model.objects.get(abid__endswith=abid.suffix)
  398. if hasattr(model, 'uuid'):
  399. return model.objects.get(uuid=abid.uuid)
  400. return model.objects.get(id=abid.uuid)
  401. except model.DoesNotExist:
  402. # if the model has an abid field then it shouldve matched, pointless to fuzzy search in that case
  403. if hasattr(model, 'abid') or (not fuzzy):
  404. raise
  405. # continue on to try fuzzy searching by randomness portion derived from uuid field
  406. match_by_rand = find_obj_from_abid_rand(abid, model=model)
  407. if match_by_rand:
  408. if match_by_rand[0].abid_prefix != abid.prefix:
  409. print(f'[!] WARNING: fetched object {match_by_rand} even though prefix {abid.prefix} doesnt match!', abid, '\n')
  410. return match_by_rand
  411. raise model.DoesNotExist