abid.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. from typing import NamedTuple, Any, Union, Optional
  2. import ulid
  3. import uuid6
  4. import hashlib
  5. from urllib.parse import urlparse
  6. from uuid import UUID
  7. from typeid import TypeID # type: ignore[import-untyped]
  8. from datetime import datetime
  9. ABID_PREFIX_LEN = 4
  10. ABID_SUFFIX_LEN = 26
  11. ABID_LEN = 30
  12. ABID_TS_LEN = 10
  13. ABID_URI_LEN = 8
  14. ABID_SUBTYPE_LEN = 2
  15. ABID_RAND_LEN = 6
  16. DEFAULT_ABID_PREFIX = 'obj_'
  17. # allows people to keep their uris secret on a per-instance basis by changing the salt.
  18. # the default means everyone can share the same namespace for URI hashes,
  19. # meaning anyone who has a URI and wants to check if you have it can guess the ABID
  20. DEFAULT_ABID_URI_SALT = '687c2fff14e3a7780faa5a40c237b19b5b51b089'
  21. class ABID(NamedTuple):
  22. """
  23. e.g. ABID('obj_01HX9FPYTRE4A5CCD901ZYEBQE')
  24. """
  25. prefix: str # e.g. obj_
  26. ts: str # e.g. 01HX9FPYTR
  27. uri: str # e.g. E4A5CCD9
  28. subtype: str # e.g. 01
  29. rand: str # e.g. ZYEBQE
  30. # salt: str = DEFAULT_ABID_URI_SALT
  31. def __getattr__(self, attr: str) -> Any:
  32. return getattr(self.ulid, attr)
  33. def __eq__(self, other: Any) -> bool:
  34. try:
  35. return self.ulid == other.ulid
  36. except AttributeError:
  37. return NotImplemented
  38. def __str__(self) -> str:
  39. return self.prefix + self.suffix
  40. def __len__(self) -> int:
  41. return len(self.prefix + self.suffix)
  42. @classmethod
  43. def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
  44. assert buffer, f'Attempted to create ABID from null value {buffer}'
  45. buffer = str(buffer)
  46. if '_' in buffer:
  47. prefix, suffix = buffer.split('_')
  48. else:
  49. prefix, suffix = prefix.strip('_'), buffer
  50. assert len(prefix) == ABID_PREFIX_LEN - 1 # length without trailing _
  51. assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
  52. return cls(
  53. prefix=abid_part_from_prefix(prefix),
  54. ts=suffix[0:10].upper(),
  55. uri=suffix[10:18].upper(),
  56. subtype=suffix[18:20].upper(),
  57. rand=suffix[20:26].upper(),
  58. )
  59. @property
  60. def uri_salt(self) -> str:
  61. return DEFAULT_ABID_URI_SALT
  62. @property
  63. def suffix(self):
  64. return ''.join((self.ts, self.uri, self.subtype, self.rand))
  65. @property
  66. def ulid(self) -> ulid.ULID:
  67. return ulid.parse(self.suffix)
  68. @property
  69. def uuid(self) -> UUID:
  70. return self.ulid.uuid
  71. @property
  72. def uuid6(self) -> uuid6.UUID:
  73. return uuid6.UUID(hex=self.uuid.hex)
  74. @property
  75. def typeid(self) -> TypeID:
  76. return TypeID.from_uuid(prefix=self.prefix.strip('_'), suffix=self.uuid6)
  77. @property
  78. def datetime(self) -> datetime:
  79. return self.ulid.timestamp().datetime
  80. ####################################################
  81. def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
  82. """
  83. 'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
  84. """
  85. if isinstance(uri, bytes):
  86. uri_str: str = uri.decode()
  87. else:
  88. uri_str = str(uri)
  89. # only hash the domain part of URLs
  90. if '://' in uri_str:
  91. try:
  92. domain = urlparse(uri_str).netloc
  93. if domain:
  94. uri_str = domain
  95. except AttributeError:
  96. pass
  97. uri_bytes = uri_str.encode('utf-8') + salt.encode('utf-8')
  98. return hashlib.sha256(uri_bytes).hexdigest().upper()
  99. def abid_part_from_prefix(prefix: Optional[str]) -> str:
  100. """
  101. 'snp_'
  102. """
  103. if prefix is None:
  104. return 'obj_'
  105. prefix = prefix.strip('_').lower()
  106. assert len(prefix) == 3
  107. return prefix + '_'
  108. def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
  109. """
  110. 'E4A5CCD9' # takes first 8 characters of sha256(url)
  111. """
  112. uri = str(uri)
  113. return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
  114. def abid_part_from_ts(ts: Optional[datetime]) -> str:
  115. """
  116. '01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
  117. """
  118. return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
  119. def abid_part_from_subtype(subtype: str) -> str:
  120. """
  121. Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
  122. Also allows us to change the ulid spec later by putting special sigil values here.
  123. """
  124. subtype = str(subtype)
  125. if len(subtype) == ABID_SUBTYPE_LEN:
  126. return subtype
  127. return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
  128. def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
  129. """
  130. 'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
  131. """
  132. if rand is None:
  133. # if it's None we generate a new random 6 character hex string
  134. return str(ulid.new())[-ABID_RAND_LEN:]
  135. elif isinstance(rand, UUID):
  136. # if it's a uuid we take the last 6 characters of the ULID represation of it
  137. return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
  138. elif isinstance(rand, int):
  139. # if it's a BigAutoInteger field we convert it from an int to a 0-padded string
  140. rand_str = str(rand)[-ABID_RAND_LEN:]
  141. padding_needed = ABID_RAND_LEN - len(rand_str)
  142. rand_str = ('0'*padding_needed) + rand_str
  143. return rand_str
  144. # otherwise treat it as a string, take the last 6 characters of it verbatim
  145. return str(rand)[-ABID_RAND_LEN:].upper()
  146. def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
  147. """
  148. Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
  149. """
  150. abid = ABID(
  151. prefix=abid_part_from_prefix(prefix),
  152. ts=abid_part_from_ts(ts),
  153. uri=abid_part_from_uri(uri, salt=salt),
  154. subtype=abid_part_from_subtype(subtype),
  155. rand=abid_part_from_rand(rand),
  156. )
  157. assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
  158. return abid