v1_cli.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. __package__ = 'archivebox.api'
  2. import json
  3. from typing import List, Dict, Any, Optional
  4. from enum import Enum
  5. from ninja import Router, Schema
  6. from archivebox.misc.util import ansi_to_html
  7. from archivebox.config.common import ARCHIVING_CONFIG
  8. # from .auth import API_AUTH_METHODS
  9. # router for API that exposes archivebox cli subcommands as REST endpoints
  10. router = Router(tags=['ArchiveBox CLI Sub-Commands'])
  11. # Schemas
  12. JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
  13. class CLICommandResponseSchema(Schema):
  14. success: bool
  15. errors: List[str]
  16. result: JSONType
  17. result_format: str = 'str'
  18. stdout: str
  19. stderr: str
  20. class FilterTypeChoices(str, Enum):
  21. exact = 'exact'
  22. substring = 'substring'
  23. regex = 'regex'
  24. domain = 'domain'
  25. tag = 'tag'
  26. timestamp = 'timestamp'
  27. class StatusChoices(str, Enum):
  28. indexed = 'indexed'
  29. archived = 'archived'
  30. unarchived = 'unarchived'
  31. present = 'present'
  32. valid = 'valid'
  33. invalid = 'invalid'
  34. duplicate = 'duplicate'
  35. orphaned = 'orphaned'
  36. corrupted = 'corrupted'
  37. unrecognized = 'unrecognized'
  38. class AddCommandSchema(Schema):
  39. urls: List[str]
  40. tag: str = ""
  41. depth: int = 0
  42. parser: str = "auto"
  43. plugins: str = ""
  44. update: bool = not ARCHIVING_CONFIG.ONLY_NEW # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
  45. overwrite: bool = False
  46. index_only: bool = False
  47. class UpdateCommandSchema(Schema):
  48. resume: Optional[float] = 0
  49. only_new: bool = ARCHIVING_CONFIG.ONLY_NEW
  50. index_only: bool = False
  51. overwrite: bool = False
  52. after: Optional[float] = 0
  53. before: Optional[float] = 999999999999999
  54. status: Optional[StatusChoices] = StatusChoices.unarchived
  55. filter_type: Optional[str] = FilterTypeChoices.substring
  56. filter_patterns: Optional[List[str]] = ['https://example.com']
  57. plugins: Optional[str] = ""
  58. class ScheduleCommandSchema(Schema):
  59. import_path: Optional[str] = None
  60. add: bool = False
  61. every: Optional[str] = None
  62. tag: str = ''
  63. depth: int = 0
  64. overwrite: bool = False
  65. update: bool = not ARCHIVING_CONFIG.ONLY_NEW
  66. clear: bool = False
  67. class ListCommandSchema(Schema):
  68. filter_patterns: Optional[List[str]] = ['https://example.com']
  69. filter_type: str = FilterTypeChoices.substring
  70. status: StatusChoices = StatusChoices.indexed
  71. after: Optional[float] = 0
  72. before: Optional[float] = 999999999999999
  73. sort: str = 'bookmarked_at'
  74. as_json: bool = True
  75. as_html: bool = False
  76. as_csv: str | None = 'timestamp,url'
  77. with_headers: bool = False
  78. class RemoveCommandSchema(Schema):
  79. delete: bool = True
  80. after: Optional[float] = 0
  81. before: Optional[float] = 999999999999999
  82. filter_type: str = FilterTypeChoices.exact
  83. filter_patterns: Optional[List[str]] = ['https://example.com']
  84. @router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
  85. def cli_add(request, args: AddCommandSchema):
  86. from archivebox.cli.archivebox_add import add
  87. result = add(
  88. urls=args.urls,
  89. tag=args.tag,
  90. depth=args.depth,
  91. update=args.update,
  92. index_only=args.index_only,
  93. overwrite=args.overwrite,
  94. plugins=args.plugins,
  95. parser=args.parser,
  96. bg=True, # Always run in background for API calls
  97. created_by_id=request.user.pk,
  98. )
  99. return {
  100. "success": True,
  101. "errors": [],
  102. "result": result,
  103. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  104. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  105. }
  106. @router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
  107. def cli_update(request, args: UpdateCommandSchema):
  108. from archivebox.cli.archivebox_update import update
  109. result = update(
  110. resume=args.resume,
  111. only_new=args.only_new,
  112. index_only=args.index_only,
  113. overwrite=args.overwrite,
  114. before=args.before,
  115. after=args.after,
  116. status=args.status,
  117. filter_type=args.filter_type,
  118. filter_patterns=args.filter_patterns,
  119. plugins=args.plugins,
  120. )
  121. return {
  122. "success": True,
  123. "errors": [],
  124. "result": result,
  125. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  126. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  127. }
  128. @router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
  129. def cli_schedule(request, args: ScheduleCommandSchema):
  130. from archivebox.cli.archivebox_schedule import schedule
  131. result = schedule(
  132. import_path=args.import_path,
  133. add=args.add,
  134. show=args.show,
  135. clear=args.clear,
  136. every=args.every,
  137. tag=args.tag,
  138. depth=args.depth,
  139. overwrite=args.overwrite,
  140. update=args.update,
  141. )
  142. return {
  143. "success": True,
  144. "errors": [],
  145. "result": result,
  146. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  147. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  148. }
  149. @router.post("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
  150. def cli_search(request, args: ListCommandSchema):
  151. from archivebox.cli.archivebox_search import search
  152. result = search(
  153. filter_patterns=args.filter_patterns,
  154. filter_type=args.filter_type,
  155. status=args.status,
  156. after=args.after,
  157. before=args.before,
  158. sort=args.sort,
  159. csv=args.as_csv,
  160. json=args.as_json,
  161. html=args.as_html,
  162. with_headers=args.with_headers,
  163. )
  164. result_format = 'txt'
  165. if args.as_json:
  166. result_format = "json"
  167. result = json.loads(result)
  168. elif args.as_html:
  169. result_format = "html"
  170. elif args.as_csv:
  171. result_format = "csv"
  172. return {
  173. "success": True,
  174. "errors": [],
  175. "result": result,
  176. "result_format": result_format,
  177. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  178. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  179. }
  180. @router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
  181. def cli_remove(request, args: RemoveCommandSchema):
  182. from archivebox.cli.archivebox_remove import remove
  183. result = remove(
  184. yes=True, # no way to interactively ask for confirmation via API, so we force yes
  185. delete=args.delete,
  186. before=args.before,
  187. after=args.after,
  188. filter_type=args.filter_type,
  189. filter_patterns=args.filter_patterns,
  190. )
  191. return {
  192. "success": True,
  193. "errors": [],
  194. "result": result,
  195. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  196. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  197. }