v1_core.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. __package__ = 'archivebox.api'
  2. import math
  3. from uuid import UUID
  4. from typing import List, Optional, Union, Any
  5. from datetime import datetime
  6. from django.db.models import Q
  7. from django.shortcuts import get_object_or_404
  8. from django.core.exceptions import ValidationError
  9. from django.contrib.auth import get_user_model
  10. from ninja import Router, Schema, FilterSchema, Field, Query
  11. from ninja.pagination import paginate, PaginationBase
  12. from ninja.errors import HttpError
  13. from core.models import Snapshot, ArchiveResult, Tag
  14. from api.models import APIToken, OutboundWebhook
  15. from abid_utils.abid import ABID
  16. from .auth import API_AUTH_METHODS
  17. router = Router(tags=['Core Models'], auth=API_AUTH_METHODS)
  18. class CustomPagination(PaginationBase):
  19. class Input(Schema):
  20. limit: int = 200
  21. offset: int = 0
  22. page: int = 0
  23. class Output(Schema):
  24. total_items: int
  25. total_pages: int
  26. page: int
  27. limit: int
  28. offset: int
  29. num_items: int
  30. items: List[Any]
  31. def paginate_queryset(self, queryset, pagination: Input, **params):
  32. limit = min(pagination.limit, 500)
  33. offset = pagination.offset or (pagination.page * limit)
  34. total = queryset.count()
  35. total_pages = math.ceil(total / limit)
  36. current_page = math.ceil(offset / (limit + 1))
  37. items = queryset[offset : offset + limit]
  38. return {
  39. 'total_items': total,
  40. 'total_pages': total_pages,
  41. 'page': current_page,
  42. 'limit': limit,
  43. 'offset': offset,
  44. 'num_items': len(items),
  45. 'items': items,
  46. }
  47. ### ArchiveResult #########################################################################
  48. class MinimalArchiveResultSchema(Schema):
  49. TYPE: str = 'core.models.ArchiveResult'
  50. id: UUID
  51. abid: str
  52. modified_at: datetime
  53. created_at: datetime
  54. created_by_id: str
  55. created_by_username: str
  56. extractor: str
  57. cmd_version: Optional[str]
  58. cmd: List[str]
  59. pwd: str
  60. status: str
  61. output: str
  62. start_ts: Optional[datetime]
  63. end_ts: Optional[datetime]
  64. @staticmethod
  65. def resolve_created_by_id(obj):
  66. return str(obj.created_by_id)
  67. @staticmethod
  68. def resolve_created_by_username(obj):
  69. User = get_user_model()
  70. return User.objects.get(id=obj.created_by_id).username
  71. @staticmethod
  72. def resolve_abid(obj):
  73. return str(obj.ABID)
  74. @staticmethod
  75. def resolve_created_at(obj):
  76. return obj.start_ts
  77. @staticmethod
  78. def resolve_snapshot_timestamp(obj):
  79. return obj.snapshot.timestamp
  80. @staticmethod
  81. def resolve_snapshot_url(obj):
  82. return obj.snapshot.url
  83. @staticmethod
  84. def resolve_snapshot_id(obj):
  85. return str(obj.snapshot_id)
  86. @staticmethod
  87. def resolve_snapshot_abid(obj):
  88. return str(obj.snapshot.ABID)
  89. @staticmethod
  90. def resolve_snapshot_tags(obj):
  91. return sorted(tag.name for tag in obj.snapshot.tags.all())
  92. class ArchiveResultSchema(MinimalArchiveResultSchema):
  93. TYPE: str = 'core.models.ArchiveResult'
  94. # ... Extends MinimalArchiveResultSchema fields ...
  95. snapshot_id: UUID
  96. snapshot_abid: str
  97. snapshot_timestamp: str
  98. snapshot_url: str
  99. snapshot_tags: List[str]
  100. class ArchiveResultFilterSchema(FilterSchema):
  101. id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  102. search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  103. snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  104. snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
  105. snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
  106. status: Optional[str] = Field(None, q='status')
  107. output: Optional[str] = Field(None, q='output__icontains')
  108. extractor: Optional[str] = Field(None, q='extractor__icontains')
  109. cmd: Optional[str] = Field(None, q='cmd__0__icontains')
  110. pwd: Optional[str] = Field(None, q='pwd__icontains')
  111. cmd_version: Optional[str] = Field(None, q='cmd_version')
  112. created_at: Optional[datetime] = Field(None, q='created_at')
  113. created_at__gte: Optional[datetime] = Field(None, q='created_at__gte')
  114. created_at__lt: Optional[datetime] = Field(None, q='created_at__lt')
  115. @router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
  116. @paginate(CustomPagination)
  117. def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
  118. """List all ArchiveResult entries matching these filters."""
  119. qs = ArchiveResult.objects.all()
  120. results = filters.filter(qs).distinct()
  121. return results
  122. @router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
  123. def get_archiveresult(request, archiveresult_id: str):
  124. """Get a specific ArchiveResult by id or abid."""
  125. return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id))
  126. # @router.post("/archiveresult", response=ArchiveResultSchema)
  127. # def create_archiveresult(request, payload: ArchiveResultSchema):
  128. # archiveresult = ArchiveResult.objects.create(**payload.dict())
  129. # return archiveresult
  130. #
  131. # @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
  132. # def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
  133. # archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
  134. #
  135. # for attr, value in payload.dict().items():
  136. # setattr(archiveresult, attr, value)
  137. # archiveresult.save()
  138. #
  139. # return archiveresult
  140. #
  141. # @router.delete("/archiveresult/{archiveresult_id}")
  142. # def delete_archiveresult(request, archiveresult_id: str):
  143. # archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
  144. # archiveresult.delete()
  145. # return {"success": True}
  146. ### Snapshot #########################################################################
  147. class SnapshotSchema(Schema):
  148. TYPE: str = 'core.models.Snapshot'
  149. id: UUID
  150. abid: str
  151. created_by_id: str
  152. created_by_username: str
  153. created_at: datetime
  154. modified_at: datetime
  155. bookmarked_at: datetime
  156. downloaded_at: Optional[datetime]
  157. url: str
  158. tags: List[str]
  159. title: Optional[str]
  160. timestamp: str
  161. archive_path: str
  162. # url_for_admin: str
  163. # url_for_view: str
  164. num_archiveresults: int
  165. archiveresults: List[MinimalArchiveResultSchema]
  166. @staticmethod
  167. def resolve_created_by_id(obj):
  168. return str(obj.created_by_id)
  169. @staticmethod
  170. def resolve_created_by_username(obj):
  171. User = get_user_model()
  172. return User.objects.get(id=obj.created_by_id).username
  173. @staticmethod
  174. def resolve_abid(obj):
  175. return str(obj.ABID)
  176. @staticmethod
  177. def resolve_tags(obj):
  178. return sorted(tag.name for tag in obj.tags.all())
  179. # @staticmethod
  180. # def resolve_url_for_admin(obj):
  181. # return f"/admin/core/snapshot/{obj.id}/change/"
  182. # @staticmethod
  183. # def resolve_url_for_view(obj):
  184. # return f"/{obj.archive_path}"
  185. @staticmethod
  186. def resolve_num_archiveresults(obj, context):
  187. return obj.archiveresult_set.all().distinct().count()
  188. @staticmethod
  189. def resolve_archiveresults(obj, context):
  190. if context['request'].with_archiveresults:
  191. return obj.archiveresult_set.all().distinct()
  192. return ArchiveResult.objects.none()
  193. class SnapshotFilterSchema(FilterSchema):
  194. id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'timestamp__startswith'])
  195. abid: Optional[str] = Field(None, q='abid__icontains')
  196. created_by_id: str = Field(None, q='created_by_id')
  197. created_by_username: str = Field(None, q='created_by__username__icontains')
  198. created_at__gte: datetime = Field(None, q='created_at__gte')
  199. created_at__lt: datetime = Field(None, q='created_at__lt')
  200. created_at: datetime = Field(None, q='created_at')
  201. modified_at: datetime = Field(None, q='modified_at')
  202. modified_at__gte: datetime = Field(None, q='modified_at__gte')
  203. modified_at__lt: datetime = Field(None, q='modified_at__lt')
  204. search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith'])
  205. url: Optional[str] = Field(None, q='url')
  206. tag: Optional[str] = Field(None, q='tags__name')
  207. title: Optional[str] = Field(None, q='title__icontains')
  208. timestamp: Optional[str] = Field(None, q='timestamp__startswith')
  209. bookmarked_at__gte: Optional[datetime] = Field(None, q='bookmarked_at__gte')
  210. bookmarked_at__lt: Optional[datetime] = Field(None, q='bookmarked_at__lt')
  211. @router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
  212. @paginate(CustomPagination)
  213. def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
  214. """List all Snapshot entries matching these filters."""
  215. request.with_archiveresults = with_archiveresults
  216. qs = Snapshot.objects.all()
  217. results = filters.filter(qs).distinct()
  218. return results
  219. @router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
  220. def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
  221. """Get a specific Snapshot by abid or id."""
  222. request.with_archiveresults = with_archiveresults
  223. snapshot = None
  224. try:
  225. snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
  226. except Snapshot.DoesNotExist:
  227. pass
  228. try:
  229. snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
  230. except Snapshot.DoesNotExist:
  231. pass
  232. if not snapshot:
  233. raise Snapshot.DoesNotExist
  234. return snapshot
  235. # @router.post("/snapshot", response=SnapshotSchema)
  236. # def create_snapshot(request, payload: SnapshotSchema):
  237. # snapshot = Snapshot.objects.create(**payload.dict())
  238. # return snapshot
  239. #
  240. # @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
  241. # def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
  242. # snapshot = get_object_or_404(Snapshot, id=snapshot_id)
  243. #
  244. # for attr, value in payload.dict().items():
  245. # setattr(snapshot, attr, value)
  246. # snapshot.save()
  247. #
  248. # return snapshot
  249. #
  250. # @router.delete("/snapshot/{snapshot_id}")
  251. # def delete_snapshot(request, snapshot_id: str):
  252. # snapshot = get_object_or_404(Snapshot, id=snapshot_id)
  253. # snapshot.delete()
  254. # return {"success": True}
  255. ### Tag #########################################################################
  256. class TagSchema(Schema):
  257. TYPE: str = 'core.models.Tag'
  258. id: UUID
  259. abid: str
  260. modified_at: datetime
  261. created_at: datetime
  262. created_by_id: str
  263. created_by_username: str
  264. name: str
  265. slug: str
  266. num_snapshots: int
  267. snapshots: List[SnapshotSchema]
  268. @staticmethod
  269. def resolve_created_by_id(obj):
  270. return str(obj.created_by_id)
  271. @staticmethod
  272. def resolve_created_by_username(obj):
  273. User = get_user_model()
  274. return User.objects.get(id=obj.created_by_id).username
  275. @staticmethod
  276. def resolve_num_snapshots(obj, context):
  277. return obj.snapshot_set.all().distinct().count()
  278. @staticmethod
  279. def resolve_snapshots(obj, context):
  280. if context['request'].with_snapshots:
  281. return obj.snapshot_set.all().distinct()
  282. return Snapshot.objects.none()
  283. @router.get("/tags", response=List[TagSchema], url_name="get_tags")
  284. @paginate(CustomPagination)
  285. def get_tags(request):
  286. request.with_snapshots = False
  287. request.with_archiveresults = False
  288. return Tag.objects.all().distinct()
  289. @router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
  290. def get_tag(request, tag_id: str, with_snapshots: bool=True):
  291. request.with_snapshots = with_snapshots
  292. request.with_archiveresults = False
  293. tag = None
  294. try:
  295. tag = Tag.objects.get(abid__icontains=tag_id)
  296. except (Tag.DoesNotExist, ValidationError):
  297. pass
  298. try:
  299. tag = tag or Tag.objects.get(id__icontains=tag_id)
  300. except (Tag.DoesNotExist, ValidationError):
  301. pass
  302. return tag
  303. @router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
  304. def get_any(request, abid: str):
  305. request.with_snapshots = False
  306. request.with_archiveresults = False
  307. response = None
  308. try:
  309. response = response or get_snapshot(request, abid)
  310. except Exception:
  311. pass
  312. try:
  313. response = response or get_archiveresult(request, abid)
  314. except Exception:
  315. pass
  316. try:
  317. response = response or get_tag(request, abid)
  318. except Exception:
  319. pass
  320. if abid.startswith(APIToken.abid_prefix):
  321. raise HttpError(403, 'APIToken objects are not accessible via REST API')
  322. if abid.startswith(OutboundWebhook.abid_prefix):
  323. raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
  324. raise HttpError(404, 'Object with given ABID not found')