v1_core.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. __package__ = 'archivebox.api'
  2. import math
  3. from uuid import UUID
  4. from typing import List, Optional, Union, Any
  5. from datetime import datetime
  6. from django.db.models import Q
  7. from django.core.exceptions import ValidationError
  8. from django.contrib.auth import get_user_model
  9. from ninja import Router, Schema, FilterSchema, Field, Query
  10. from ninja.pagination import paginate, PaginationBase
  11. from ninja.errors import HttpError
  12. from core.models import Snapshot, ArchiveResult, Tag
  13. from api.models import APIToken, OutboundWebhook
  14. from abid_utils.abid import ABID
  15. from .auth import API_AUTH_METHODS
  16. router = Router(tags=['Core Models'], auth=API_AUTH_METHODS)
  17. class CustomPagination(PaginationBase):
  18. class Input(Schema):
  19. limit: int = 200
  20. offset: int = 0
  21. page: int = 0
  22. class Output(Schema):
  23. total_items: int
  24. total_pages: int
  25. page: int
  26. limit: int
  27. offset: int
  28. num_items: int
  29. items: List[Any]
  30. def paginate_queryset(self, queryset, pagination: Input, **params):
  31. limit = min(pagination.limit, 500)
  32. offset = pagination.offset or (pagination.page * limit)
  33. total = queryset.count()
  34. total_pages = math.ceil(total / limit)
  35. current_page = math.ceil(offset / (limit + 1))
  36. items = queryset[offset : offset + limit]
  37. return {
  38. 'total_items': total,
  39. 'total_pages': total_pages,
  40. 'page': current_page,
  41. 'limit': limit,
  42. 'offset': offset,
  43. 'num_items': len(items),
  44. 'items': items,
  45. }
  46. ### ArchiveResult #########################################################################
  47. class MinimalArchiveResultSchema(Schema):
  48. TYPE: str = 'core.models.ArchiveResult'
  49. id: UUID
  50. abid: str
  51. modified_at: datetime
  52. created_at: datetime
  53. created_by_id: str
  54. created_by_username: str
  55. extractor: str
  56. cmd_version: Optional[str]
  57. cmd: List[str]
  58. pwd: str
  59. status: str
  60. output: str
  61. start_ts: Optional[datetime]
  62. end_ts: Optional[datetime]
  63. @staticmethod
  64. def resolve_created_by_id(obj):
  65. return str(obj.created_by_id)
  66. @staticmethod
  67. def resolve_created_by_username(obj):
  68. User = get_user_model()
  69. return User.objects.get(id=obj.created_by_id).username
  70. @staticmethod
  71. def resolve_abid(obj):
  72. return str(obj.ABID)
  73. @staticmethod
  74. def resolve_created_at(obj):
  75. return obj.start_ts
  76. @staticmethod
  77. def resolve_snapshot_timestamp(obj):
  78. return obj.snapshot.timestamp
  79. @staticmethod
  80. def resolve_snapshot_url(obj):
  81. return obj.snapshot.url
  82. @staticmethod
  83. def resolve_snapshot_id(obj):
  84. return str(obj.snapshot_id)
  85. @staticmethod
  86. def resolve_snapshot_abid(obj):
  87. return str(obj.snapshot.ABID)
  88. @staticmethod
  89. def resolve_snapshot_tags(obj):
  90. return sorted(tag.name for tag in obj.snapshot.tags.all())
  91. class ArchiveResultSchema(MinimalArchiveResultSchema):
  92. TYPE: str = 'core.models.ArchiveResult'
  93. # ... Extends MinimalArchiveResultSchema fields ...
  94. snapshot_id: UUID
  95. snapshot_abid: str
  96. snapshot_timestamp: str
  97. snapshot_url: str
  98. snapshot_tags: List[str]
  99. class ArchiveResultFilterSchema(FilterSchema):
  100. id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  101. search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  102. snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith'])
  103. snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
  104. snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
  105. status: Optional[str] = Field(None, q='status')
  106. output: Optional[str] = Field(None, q='output__icontains')
  107. extractor: Optional[str] = Field(None, q='extractor__icontains')
  108. cmd: Optional[str] = Field(None, q='cmd__0__icontains')
  109. pwd: Optional[str] = Field(None, q='pwd__icontains')
  110. cmd_version: Optional[str] = Field(None, q='cmd_version')
  111. created_at: Optional[datetime] = Field(None, q='created_at')
  112. created_at__gte: Optional[datetime] = Field(None, q='created_at__gte')
  113. created_at__lt: Optional[datetime] = Field(None, q='created_at__lt')
  114. @router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
  115. @paginate(CustomPagination)
  116. def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
  117. """List all ArchiveResult entries matching these filters."""
  118. qs = ArchiveResult.objects.all()
  119. results = filters.filter(qs).distinct()
  120. return results
  121. @router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult")
  122. def get_archiveresult(request, archiveresult_id: str):
  123. """Get a specific ArchiveResult by id or abid."""
  124. return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id))
  125. # @router.post("/archiveresult", response=ArchiveResultSchema)
  126. # def create_archiveresult(request, payload: ArchiveResultSchema):
  127. # archiveresult = ArchiveResult.objects.create(**payload.dict())
  128. # return archiveresult
  129. #
  130. # @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
  131. # def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
  132. # archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
  133. #
  134. # for attr, value in payload.dict().items():
  135. # setattr(archiveresult, attr, value)
  136. # archiveresult.save()
  137. #
  138. # return archiveresult
  139. #
  140. # @router.delete("/archiveresult/{archiveresult_id}")
  141. # def delete_archiveresult(request, archiveresult_id: str):
  142. # archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
  143. # archiveresult.delete()
  144. # return {"success": True}
  145. ### Snapshot #########################################################################
  146. class SnapshotSchema(Schema):
  147. TYPE: str = 'core.models.Snapshot'
  148. id: UUID
  149. abid: str
  150. created_by_id: str
  151. created_by_username: str
  152. created_at: datetime
  153. modified_at: datetime
  154. bookmarked_at: datetime
  155. downloaded_at: Optional[datetime]
  156. url: str
  157. tags: List[str]
  158. title: Optional[str]
  159. timestamp: str
  160. archive_path: str
  161. # url_for_admin: str
  162. # url_for_view: str
  163. num_archiveresults: int
  164. archiveresults: List[MinimalArchiveResultSchema]
  165. @staticmethod
  166. def resolve_created_by_id(obj):
  167. return str(obj.created_by_id)
  168. @staticmethod
  169. def resolve_created_by_username(obj):
  170. User = get_user_model()
  171. return User.objects.get(id=obj.created_by_id).username
  172. @staticmethod
  173. def resolve_abid(obj):
  174. return str(obj.ABID)
  175. @staticmethod
  176. def resolve_tags(obj):
  177. return sorted(tag.name for tag in obj.tags.all())
  178. # @staticmethod
  179. # def resolve_url_for_admin(obj):
  180. # return f"/admin/core/snapshot/{obj.id}/change/"
  181. # @staticmethod
  182. # def resolve_url_for_view(obj):
  183. # return f"/{obj.archive_path}"
  184. @staticmethod
  185. def resolve_num_archiveresults(obj, context):
  186. return obj.archiveresult_set.all().distinct().count()
  187. @staticmethod
  188. def resolve_archiveresults(obj, context):
  189. if context['request'].with_archiveresults:
  190. return obj.archiveresult_set.all().distinct()
  191. return ArchiveResult.objects.none()
  192. class SnapshotFilterSchema(FilterSchema):
  193. id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'timestamp__startswith'])
  194. abid: Optional[str] = Field(None, q='abid__icontains')
  195. created_by_id: str = Field(None, q='created_by_id')
  196. created_by_username: str = Field(None, q='created_by__username__icontains')
  197. created_at__gte: datetime = Field(None, q='created_at__gte')
  198. created_at__lt: datetime = Field(None, q='created_at__lt')
  199. created_at: datetime = Field(None, q='created_at')
  200. modified_at: datetime = Field(None, q='modified_at')
  201. modified_at__gte: datetime = Field(None, q='modified_at__gte')
  202. modified_at__lt: datetime = Field(None, q='modified_at__lt')
  203. search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith'])
  204. url: Optional[str] = Field(None, q='url')
  205. tag: Optional[str] = Field(None, q='tags__name')
  206. title: Optional[str] = Field(None, q='title__icontains')
  207. timestamp: Optional[str] = Field(None, q='timestamp__startswith')
  208. bookmarked_at__gte: Optional[datetime] = Field(None, q='bookmarked_at__gte')
  209. bookmarked_at__lt: Optional[datetime] = Field(None, q='bookmarked_at__lt')
  210. @router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots")
  211. @paginate(CustomPagination)
  212. def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False):
  213. """List all Snapshot entries matching these filters."""
  214. request.with_archiveresults = with_archiveresults
  215. qs = Snapshot.objects.all()
  216. results = filters.filter(qs).distinct()
  217. return results
  218. @router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
  219. def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
  220. """Get a specific Snapshot by abid or id."""
  221. request.with_archiveresults = with_archiveresults
  222. snapshot = None
  223. try:
  224. snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id))
  225. except Snapshot.DoesNotExist:
  226. pass
  227. try:
  228. snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id))
  229. except Snapshot.DoesNotExist:
  230. pass
  231. if not snapshot:
  232. raise Snapshot.DoesNotExist
  233. return snapshot
  234. # @router.post("/snapshot", response=SnapshotSchema)
  235. # def create_snapshot(request, payload: SnapshotSchema):
  236. # snapshot = Snapshot.objects.create(**payload.dict())
  237. # return snapshot
  238. #
  239. # @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
  240. # def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
  241. # snapshot = get_object_or_404(Snapshot, id=snapshot_id)
  242. #
  243. # for attr, value in payload.dict().items():
  244. # setattr(snapshot, attr, value)
  245. # snapshot.save()
  246. #
  247. # return snapshot
  248. #
  249. # @router.delete("/snapshot/{snapshot_id}")
  250. # def delete_snapshot(request, snapshot_id: str):
  251. # snapshot = get_object_or_404(Snapshot, id=snapshot_id)
  252. # snapshot.delete()
  253. # return {"success": True}
  254. ### Tag #########################################################################
  255. class TagSchema(Schema):
  256. TYPE: str = 'core.models.Tag'
  257. id: UUID
  258. abid: str
  259. modified_at: datetime
  260. created_at: datetime
  261. created_by_id: str
  262. created_by_username: str
  263. name: str
  264. slug: str
  265. num_snapshots: int
  266. snapshots: List[SnapshotSchema]
  267. @staticmethod
  268. def resolve_created_by_id(obj):
  269. return str(obj.created_by_id)
  270. @staticmethod
  271. def resolve_created_by_username(obj):
  272. User = get_user_model()
  273. return User.objects.get(id=obj.created_by_id).username
  274. @staticmethod
  275. def resolve_num_snapshots(obj, context):
  276. return obj.snapshot_set.all().distinct().count()
  277. @staticmethod
  278. def resolve_snapshots(obj, context):
  279. if context['request'].with_snapshots:
  280. return obj.snapshot_set.all().distinct()
  281. return Snapshot.objects.none()
  282. @router.get("/tags", response=List[TagSchema], url_name="get_tags")
  283. @paginate(CustomPagination)
  284. def get_tags(request):
  285. request.with_snapshots = False
  286. request.with_archiveresults = False
  287. return Tag.objects.all().distinct()
  288. @router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag")
  289. def get_tag(request, tag_id: str, with_snapshots: bool=True):
  290. request.with_snapshots = with_snapshots
  291. request.with_archiveresults = False
  292. tag = None
  293. try:
  294. tag = Tag.objects.get(abid__icontains=tag_id)
  295. except (Tag.DoesNotExist, ValidationError):
  296. pass
  297. try:
  298. tag = tag or Tag.objects.get(id__icontains=tag_id)
  299. except (Tag.DoesNotExist, ValidationError):
  300. pass
  301. return tag
  302. @router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
  303. def get_any(request, abid: str):
  304. request.with_snapshots = False
  305. request.with_archiveresults = False
  306. response = None
  307. try:
  308. response = response or get_snapshot(request, abid)
  309. except Exception:
  310. pass
  311. try:
  312. response = response or get_archiveresult(request, abid)
  313. except Exception:
  314. pass
  315. try:
  316. response = response or get_tag(request, abid)
  317. except Exception:
  318. pass
  319. if abid.startswith(APIToken.abid_prefix):
  320. raise HttpError(403, 'APIToken objects are not accessible via REST API')
  321. if abid.startswith(OutboundWebhook.abid_prefix):
  322. raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
  323. raise HttpError(404, 'Object with given ABID not found')