v1_core.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. __package__ = 'archivebox.api'
  2. import math
  3. from uuid import UUID
  4. from typing import List, Optional, Union, Any
  5. from datetime import datetime
  6. from django.db.models import Q
  7. from django.shortcuts import get_object_or_404
  8. from django.core.exceptions import ValidationError
  9. from django.contrib.auth import get_user_model
  10. from ninja import Router, Schema, FilterSchema, Field, Query
  11. from ninja.pagination import paginate, PaginationBase
  12. from core.models import Snapshot, ArchiveResult, Tag
  13. from abid_utils.abid import ABID
  14. router = Router(tags=['Core Models'])
  15. class CustomPagination(PaginationBase):
  16. class Input(Schema):
  17. limit: int = 200
  18. offset: int = 0
  19. page: int = 0
  20. class Output(Schema):
  21. total_items: int
  22. total_pages: int
  23. page: int
  24. limit: int
  25. offset: int
  26. num_items: int
  27. items: List[Any]
  28. def paginate_queryset(self, queryset, pagination: Input, **params):
  29. limit = min(pagination.limit, 500)
  30. offset = pagination.offset or (pagination.page * limit)
  31. total = queryset.count()
  32. total_pages = math.ceil(total / limit)
  33. current_page = math.ceil(offset / (limit + 1))
  34. items = queryset[offset : offset + limit]
  35. return {
  36. 'total_items': total,
  37. 'total_pages': total_pages,
  38. 'page': current_page,
  39. 'limit': limit,
  40. 'offset': offset,
  41. 'num_items': len(items),
  42. 'items': items,
  43. }
  44. ### ArchiveResult #########################################################################
  45. class ArchiveResultSchema(Schema):
  46. TYPE: str = 'core.models.ArchiveResult'
  47. id: UUID
  48. old_id: int
  49. abid: str
  50. modified: datetime
  51. created: datetime
  52. created_by_id: str
  53. created_by_username: str
  54. snapshot_abid: str
  55. snapshot_timestamp: str
  56. snapshot_url: str
  57. snapshot_tags: str
  58. extractor: str
  59. cmd_version: Optional[str]
  60. cmd: List[str]
  61. pwd: str
  62. status: str
  63. output: str
  64. @staticmethod
  65. def resolve_created_by_id(obj):
  66. return str(obj.created_by_id)
  67. @staticmethod
  68. def resolve_created_by_username(obj):
  69. User = get_user_model()
  70. return User.objects.get(id=obj.created_by_id).username
  71. @staticmethod
  72. def resolve_pk(obj):
  73. return str(obj.pk)
  74. @staticmethod
  75. def resolve_uuid(obj):
  76. return str(obj.uuid)
  77. @staticmethod
  78. def resolve_abid(obj):
  79. return str(obj.ABID)
  80. @staticmethod
  81. def resolve_created(obj):
  82. return obj.start_ts
  83. @staticmethod
  84. def resolve_snapshot_timestamp(obj):
  85. return obj.snapshot.timestamp
  86. @staticmethod
  87. def resolve_snapshot_url(obj):
  88. return obj.snapshot.url
  89. @staticmethod
  90. def resolve_snapshot_abid(obj):
  91. return str(obj.snapshot.ABID)
  92. @staticmethod
  93. def resolve_snapshot_tags(obj):
  94. return obj.snapshot.tags_str()
  95. class ArchiveResultFilterSchema(FilterSchema):
  96. id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  97. search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  98. snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  99. snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
  100. snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
  101. status: Optional[str] = Field(None, q='status')
  102. output: Optional[str] = Field(None, q='output__icontains')
  103. extractor: Optional[str] = Field(None, q='extractor__icontains')
  104. cmd: Optional[str] = Field(None, q='cmd__0__icontains')
  105. pwd: Optional[str] = Field(None, q='pwd__icontains')
  106. cmd_version: Optional[str] = Field(None, q='cmd_version')
  107. created: Optional[datetime] = Field(None, q='updated')
  108. created__gte: Optional[datetime] = Field(None, q='updated__gte')
  109. created__lt: Optional[datetime] = Field(None, q='updated__lt')
  110. @router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
  111. @paginate(CustomPagination)
  112. def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
  113. """List all ArchiveResult entries matching these filters."""
  114. qs = ArchiveResult.objects.all()
  115. results = filters.filter(qs).distinct()
  116. return results
  117. @router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
  118. def get_archiveresult(request, archiveresult_id: str):
  119. """Get a specific ArchiveResult by pk, abid, or old_id."""
  120. return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(old_id__icontains=archiveresult_id))
  121. # @router.post("/archiveresult", response=ArchiveResultSchema)
  122. # def create_archiveresult(request, payload: ArchiveResultSchema):
  123. # archiveresult = ArchiveResult.objects.create(**payload.dict())
  124. # return archiveresult
  125. #
  126. # @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
  127. # def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
  128. # archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
  129. #
  130. # for attr, value in payload.dict().items():
  131. # setattr(archiveresult, attr, value)
  132. # archiveresult.save()
  133. #
  134. # return archiveresult
  135. #
  136. # @router.delete("/archiveresult/{archiveresult_id}")
  137. # def delete_archiveresult(request, archiveresult_id: str):
  138. # archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
  139. # archiveresult.delete()
  140. # return {"success": True}
  141. ### Snapshot #########################################################################
  142. class SnapshotSchema(Schema):
  143. TYPE: str = 'core.models.Snapshot'
  144. id: UUID
  145. old_id: UUID
  146. abid: str
  147. modified: datetime
  148. created: datetime
  149. created_by_id: str
  150. created_by_username: str
  151. url: str
  152. tags: str
  153. title: Optional[str]
  154. timestamp: str
  155. archive_path: str
  156. bookmarked: datetime
  157. added: datetime
  158. updated: Optional[datetime]
  159. num_archiveresults: int
  160. archiveresults: List[ArchiveResultSchema]
  161. @staticmethod
  162. def resolve_created_by_id(obj):
  163. return str(obj.created_by_id)
  164. @staticmethod
  165. def resolve_created_by_username(obj):
  166. User = get_user_model()
  167. return User.objects.get(id=obj.created_by_id).username
  168. @staticmethod
  169. def resolve_pk(obj):
  170. return str(obj.pk)
  171. @staticmethod
  172. def resolve_uuid(obj):
  173. return str(obj.uuid)
  174. @staticmethod
  175. def resolve_abid(obj):
  176. return str(obj.ABID)
  177. @staticmethod
  178. def resolve_tags(obj):
  179. return obj.tags_str()
  180. @staticmethod
  181. def resolve_num_archiveresults(obj, context):
  182. return obj.archiveresult_set.all().distinct().count()
  183. @staticmethod
  184. def resolve_archiveresults(obj, context):
  185. if context['request'].with_archiveresults:
  186. return obj.archiveresult_set.all().distinct()
  187. return ArchiveResult.objects.none()
  188. class SnapshotFilterSchema(FilterSchema):
  189. id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith'])
  190. old_id: Optional[str] = Field(None, q='old_id__icontains')
  191. abid: Optional[str] = Field(None, q='abid__icontains')
  192. created_by_id: str = Field(None, q='created_by_id')
  193. created_by_username: str = Field(None, q='created_by__username__icontains')
  194. created__gte: datetime = Field(None, q='created__gte')
  195. created__lt: datetime = Field(None, q='created__lt')
  196. created: datetime = Field(None, q='created')
  197. modified: datetime = Field(None, q='modified')
  198. modified__gte: datetime = Field(None, q='modified__gte')
  199. modified__lt: datetime = Field(None, q='modified__lt')
  200. search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith'])
  201. url: Optional[str] = Field(None, q='url')
  202. tag: Optional[str] = Field(None, q='tags__name')
  203. title: Optional[str] = Field(None, q='title__icontains')
  204. timestamp: Optional[str] = Field(None, q='timestamp__startswith')
  205. added__gte: Optional[datetime] = Field(None, q='added__gte')
  206. added__lt: Optional[datetime] = Field(None, q='added__lt')
  207. @router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
  208. @paginate(CustomPagination)
  209. def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
  210. """List all Snapshot entries matching these filters."""
  211. request.with_archiveresults = with_archiveresults
  212. qs = Snapshot.objects.all()
  213. results = filters.filter(qs).distinct()
  214. return results
  215. @router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
  216. def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
  217. """Get a specific Snapshot by abid, uuid, or pk."""
  218. request.with_archiveresults = with_archiveresults
  219. snapshot = None
  220. try:
  221. snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(old_id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
  222. except Snapshot.DoesNotExist:
  223. pass
  224. try:
  225. snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id))
  226. except Snapshot.DoesNotExist:
  227. pass
  228. if not snapshot:
  229. raise Snapshot.DoesNotExist
  230. return snapshot
  231. # @router.post("/snapshot", response=SnapshotSchema)
  232. # def create_snapshot(request, payload: SnapshotSchema):
  233. # snapshot = Snapshot.objects.create(**payload.dict())
  234. # return snapshot
  235. #
  236. # @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
  237. # def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
  238. # snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
  239. #
  240. # for attr, value in payload.dict().items():
  241. # setattr(snapshot, attr, value)
  242. # snapshot.save()
  243. #
  244. # return snapshot
  245. #
  246. # @router.delete("/snapshot/{snapshot_id}")
  247. # def delete_snapshot(request, snapshot_id: str):
  248. # snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
  249. # snapshot.delete()
  250. # return {"success": True}
  251. ### Tag #########################################################################
  252. class TagSchema(Schema):
  253. TYPE: str = 'core.models.Tag'
  254. id: UUID
  255. old_id: str
  256. abid: str
  257. modified: datetime
  258. created: datetime
  259. created_by_id: str
  260. created_by_username: str
  261. name: str
  262. slug: str
  263. num_snapshots: int
  264. snapshots: List[SnapshotSchema]
  265. @staticmethod
  266. def resolve_old_id(obj):
  267. return str(obj.old_id)
  268. @staticmethod
  269. def resolve_created_by_id(obj):
  270. return str(obj.created_by_id)
  271. @staticmethod
  272. def resolve_created_by_username(obj):
  273. User = get_user_model()
  274. return User.objects.get(id=obj.created_by_id).username
  275. @staticmethod
  276. def resolve_num_snapshots(obj, context):
  277. return obj.snapshot_set.all().distinct().count()
  278. @staticmethod
  279. def resolve_snapshots(obj, context):
  280. if context['request'].with_snapshots:
  281. return obj.snapshot_set.all().distinct()
  282. return Snapshot.objects.none()
  283. @router.get("/tags", response=List[TagSchema], url_name="get_tags")
  284. @paginate(CustomPagination)
  285. def get_tags(request):
  286. request.with_snapshots = False
  287. request.with_archiveresults = False
  288. return Tag.objects.all().distinct()
  289. @router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
  290. def get_tag(request, tag_id: str, with_snapshots: bool=True):
  291. request.with_snapshots = with_snapshots
  292. request.with_archiveresults = False
  293. tag = None
  294. try:
  295. tag = tag or Tag.objects.get(old_id__icontains=tag_id)
  296. except (Tag.DoesNotExist, ValidationError, ValueError):
  297. pass
  298. try:
  299. tag = Tag.objects.get(abid__icontains=tag_id)
  300. except (Tag.DoesNotExist, ValidationError):
  301. pass
  302. try:
  303. tag = tag or Tag.objects.get(id__icontains=tag_id)
  304. except (Tag.DoesNotExist, ValidationError):
  305. pass
  306. return tag
  307. @router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
  308. def get_any(request, abid: str):
  309. request.with_snapshots = False
  310. request.with_archiveresults = False
  311. response = None
  312. try:
  313. response = response or get_snapshot(request, abid)
  314. except Exception:
  315. pass
  316. try:
  317. response = response or get_archiveresult(request, abid)
  318. except Exception:
  319. pass
  320. try:
  321. response = response or get_tag(request, abid)
  322. except Exception:
  323. pass
  324. return response