2
0

models.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. """
  2. This file provides the Django ABIDField and ABIDModel base model to inherit from.
  3. It implements the ArchiveBox ID (ABID) interfaces including abid_values, generate_abid, .abid, .uuid, .id.
  4. """
  5. from typing import Any, Dict, Union, List, Set, NamedTuple, cast
  6. from ulid import ULID
  7. from uuid import uuid4, UUID
  8. from typeid import TypeID # type: ignore[import-untyped]
  9. from datetime import datetime
  10. from functools import partial
  11. from charidfield import CharIDField # type: ignore[import-untyped]
  12. from django.conf import settings
  13. from django.db import models
  14. from django.utils import timezone
  15. from django.db.utils import OperationalError
  16. from django.contrib.auth import get_user_model
  17. from django_stubs_ext.db.models import TypedModelMeta
  18. from .abid import (
  19. ABID,
  20. ABID_LEN,
  21. ABID_RAND_LEN,
  22. ABID_SUFFIX_LEN,
  23. DEFAULT_ABID_PREFIX,
  24. DEFAULT_ABID_URI_SALT,
  25. abid_part_from_prefix,
  26. abid_from_values
  27. )
  28. ####################################################
  29. # Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
  30. ABIDField = partial(
  31. CharIDField,
  32. max_length=ABID_LEN,
  33. help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
  34. default=None,
  35. null=True,
  36. blank=True,
  37. db_index=True,
  38. unique=True,
  39. )
  40. def get_or_create_system_user_pk(username='system'):
  41. """Get or create a system user with is_superuser=True to be the default owner for new DB rows"""
  42. User = get_user_model()
  43. # if only one user exists total, return that user
  44. if User.objects.filter(is_superuser=True).count() == 1:
  45. return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
  46. # otherwise, create a dedicated "system" user
  47. user, created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''})
  48. return user.pk
  49. class ABIDModel(models.Model):
  50. """
  51. Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface.
  52. """
  53. abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_'
  54. abid_ts_src = 'None' # e.g. 'self.created'
  55. abid_uri_src = 'None' # e.g. 'self.uri'
  56. abid_subtype_src = 'None' # e.g. 'self.extractor'
  57. abid_rand_src = 'None' # e.g. 'self.uuid' or 'self.id'
  58. # id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
  59. # uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
  60. abid = ABIDField(prefix=abid_prefix)
  61. created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
  62. created = models.DateTimeField(auto_now_add=True)
  63. modified = models.DateTimeField(auto_now=True)
  64. class Meta(TypedModelMeta):
  65. abstract = True
  66. def save(self, *args: Any, **kwargs: Any) -> None:
  67. # when first creating a row, self.ABID is the source of truth
  68. # overwrite default prefilled self.id & self.abid with generated self.ABID value
  69. if self._state.adding or not self.id:
  70. self.id = self.ABID.uuid
  71. if self._state.adding or not self.abid:
  72. self.abid = str(self.ABID)
  73. super().save(*args, **kwargs)
  74. assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}'
  75. assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
  76. @property
  77. def abid_values(self) -> Dict[str, Any]:
  78. return {
  79. 'prefix': self.abid_prefix,
  80. 'ts': eval(self.abid_ts_src),
  81. 'uri': eval(self.abid_uri_src),
  82. 'subtype': eval(self.abid_subtype_src),
  83. 'rand': eval(self.abid_rand_src),
  84. }
  85. def generate_abid(self) -> ABID:
  86. """
  87. Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
  88. """
  89. prefix, ts, uri, subtype, rand = self.abid_values.values()
  90. if (not prefix) or prefix == DEFAULT_ABID_PREFIX:
  91. suggested_abid = self.__class__.__name__[:3].lower()
  92. raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
  93. if not ts:
  94. # default to unix epoch with 00:00:00 UTC
  95. ts = datetime.fromtimestamp(0, timezone.utc) # equivalent to: ts = datetime.utcfromtimestamp(0)
  96. print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
  97. if not uri:
  98. uri = str(self)
  99. print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
  100. if not subtype:
  101. subtype = self.__class__.__name__
  102. print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
  103. if not rand:
  104. rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
  105. print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
  106. abid = abid_from_values(
  107. prefix=prefix,
  108. ts=ts,
  109. uri=uri,
  110. subtype=subtype,
  111. rand=rand,
  112. salt=DEFAULT_ABID_URI_SALT,
  113. )
  114. assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
  115. return abid
  116. @property
  117. def ABID(self) -> ABID:
  118. """
  119. ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
  120. """
  121. # if object is not yet saved to DB, always generate fresh ABID from values
  122. if self._state.adding:
  123. return self.generate_abid()
  124. # otherwise DB is single source of truth, load ABID from existing db pk
  125. abid: ABID | None = None
  126. try:
  127. abid = abid or ABID.parse(self.pk)
  128. except Exception:
  129. pass
  130. try:
  131. abid = abid or ABID.parse(self.id)
  132. except Exception:
  133. pass
  134. try:
  135. abid = abid or ABID.parse(cast(str, self.abid))
  136. except Exception:
  137. pass
  138. abid = abid or self.generate_abid()
  139. return abid
  140. @property
  141. def ULID(self) -> ULID:
  142. """
  143. Get a ulid.ULID representation of the object's ABID.
  144. """
  145. return self.ABID.ulid
  146. @property
  147. def UUID(self) -> UUID:
  148. """
  149. Get a uuid.UUID (v4) representation of the object's ABID.
  150. """
  151. return self.ABID.uuid
  152. @property
  153. def TypeID(self) -> TypeID:
  154. """
  155. Get a typeid.TypeID (stripe-style) representation of the object's ABID.
  156. """
  157. return self.ABID.typeid
  158. ####################################################
  159. # Django helpers
  160. def find_all_abid_prefixes() -> Dict[str, type[models.Model]]:
  161. """
  162. Return the mapping of all ABID prefixes to their models.
  163. e.g. {'tag_': core.models.Tag, 'snp_': core.models.Snapshot, ...}
  164. """
  165. import django.apps
  166. prefix_map = {}
  167. for model in django.apps.apps.get_models():
  168. abid_prefix = getattr(model, 'abid_prefix', None)
  169. if abid_prefix:
  170. prefix_map[abid_prefix] = model
  171. return prefix_map
  172. def find_prefix_for_abid(abid: ABID) -> str:
  173. """
  174. Find the correct prefix for a given ABID that may have be missing a prefix (slow).
  175. e.g. ABID('obj_01BJQMF54D093DXEAWZ6JYRPAQ') -> 'snp_'
  176. """
  177. # if existing abid prefix is correct, lookup is easy
  178. model = find_model_from_abid(abid)
  179. if model:
  180. assert issubclass(model, ABIDModel)
  181. return model.abid_prefix
  182. # prefix might be obj_ or missing, fuzzy-search to find any object that matches
  183. return find_obj_from_abid_rand(abid)[0].abid_prefix
  184. def find_model_from_abid_prefix(prefix: str) -> type[ABIDModel] | None:
  185. """
  186. Return the Django Model that corresponds to a given ABID prefix.
  187. e.g. 'tag_' -> core.models.Tag
  188. """
  189. prefix = abid_part_from_prefix(prefix)
  190. import django.apps
  191. for model in django.apps.apps.get_models():
  192. if not issubclass(model, ABIDModel): continue # skip non-ABID-enabled models
  193. if not hasattr(model, 'objects'): continue # skip abstract models
  194. if (model.abid_prefix == prefix):
  195. return model
  196. return None
  197. def find_model_from_abid(abid: ABID) -> type[models.Model] | None:
  198. """
  199. Shortcut for find_model_from_abid_prefix(abid.prefix)
  200. """
  201. return find_model_from_abid_prefix(abid.prefix)
  202. def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
  203. """
  204. Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
  205. e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
  206. """
  207. # convert str to ABID if necessary
  208. if isinstance(rand, ABID):
  209. abid: ABID = rand
  210. else:
  211. rand = str(rand)
  212. if len(rand) < ABID_SUFFIX_LEN:
  213. padding_needed = ABID_SUFFIX_LEN - len(rand)
  214. rand = ('0'*padding_needed) + rand
  215. abid = ABID.parse(rand)
  216. import django.apps
  217. partial_matches: List[ABIDModel] = []
  218. models_to_try = cast(Set[type[models.Model]], set(filter(bool, (
  219. model,
  220. find_model_from_abid(abid),
  221. *django.apps.apps.get_models(),
  222. ))))
  223. # print(abid, abid.rand, abid.uuid, models_to_try)
  224. for model in models_to_try:
  225. if not issubclass(model, ABIDModel): continue # skip Models that arent ABID-enabled
  226. if not hasattr(model, 'objects'): continue # skip abstract Models
  227. assert hasattr(model, 'objects') # force-fix for type hint nit about missing manager https://github.com/typeddjango/django-stubs/issues/1684
  228. # continue on to try fuzzy searching by randomness portion derived from uuid field
  229. try:
  230. qs = []
  231. if hasattr(model, 'abid'):
  232. qs = model.objects.filter(abid__endswith=abid.rand)
  233. elif hasattr(model, 'uuid'):
  234. qs = model.objects.filter(uuid__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
  235. elif hasattr(model, 'id'):
  236. # NOTE: this only works on SQLite where every column is a string
  237. # other DB backends like postgres dont let you do __endswith if this is a BigAutoInteger field
  238. # try to search for uuid=...-2354352
  239. # try to search for id=...2354352
  240. # try to search for id=2354352
  241. qs = model.objects.filter(
  242. models.Q(id__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
  243. | models.Q(id__endswith=abid.rand)
  244. | models.Q(id__startswith=str(int(abid.rand)) if abid.rand.isdigit() else abid.rand)
  245. )
  246. for obj in qs:
  247. if obj.generate_abid() == abid:
  248. # found exact match, no need to keep iterating
  249. return [obj]
  250. partial_matches.append(obj)
  251. except OperationalError as err:
  252. print(f'[!] WARNING: Got error while trying to iterate through QuerySet for {model}:', err, '\n')
  253. return partial_matches
  254. def find_obj_from_abid(abid: ABID, model=None, fuzzy=False) -> Any:
  255. """
  256. Find an object with a given ABID by filtering possible models for a matching abid/uuid/id (fast).
  257. e.g. 'snp_01BJQMF54D093DXEAWZ6JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
  258. """
  259. model = model or find_model_from_abid(abid)
  260. assert model, f'Could not find model that could match this ABID type: {abid}'
  261. try:
  262. if hasattr(model, 'abid'):
  263. return model.objects.get(abid__endswith=abid.suffix)
  264. if hasattr(model, 'uuid'):
  265. return model.objects.get(uuid=abid.uuid)
  266. return model.objects.get(id=abid.uuid)
  267. except model.DoesNotExist:
  268. # if the model has an abid field then it shouldve matched, pointless to fuzzy search in that case
  269. if hasattr(model, 'abid') or (not fuzzy):
  270. raise
  271. # continue on to try fuzzy searching by randomness portion derived from uuid field
  272. match_by_rand = find_obj_from_abid_rand(abid, model=model)
  273. if match_by_rand:
  274. if match_by_rand[0].abid_prefix != abid.prefix:
  275. print(f'[!] WARNING: fetched object {match_by_rand} even though prefix {abid.prefix} doesnt match!', abid, '\n')
  276. return match_by_rand
  277. raise model.DoesNotExist