v1_cli.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. __package__ = 'archivebox.api'
  2. from typing import List, Dict, Any, Optional
  3. from enum import Enum
  4. from ninja import Router, Schema
  5. from ..main import (
  6. add,
  7. remove,
  8. update,
  9. list_all,
  10. schedule,
  11. )
  12. from archivebox.misc.util import ansi_to_html
  13. from archivebox.config.common import ARCHIVING_CONFIG
  14. from .auth import API_AUTH_METHODS
  15. # router for API that exposes archivebox cli subcommands as REST endpoints
  16. router = Router(tags=['ArchiveBox CLI Sub-Commands'], auth=API_AUTH_METHODS)
  17. # Schemas
  18. JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
  19. class CLICommandResponseSchema(Schema):
  20. success: bool
  21. errors: List[str]
  22. result: JSONType
  23. stdout: str
  24. stderr: str
  25. class FilterTypeChoices(str, Enum):
  26. exact = 'exact'
  27. substring = 'substring'
  28. regex = 'regex'
  29. domain = 'domain'
  30. tag = 'tag'
  31. timestamp = 'timestamp'
  32. class StatusChoices(str, Enum):
  33. indexed = 'indexed'
  34. archived = 'archived'
  35. unarchived = 'unarchived'
  36. present = 'present'
  37. valid = 'valid'
  38. invalid = 'invalid'
  39. duplicate = 'duplicate'
  40. orphaned = 'orphaned'
  41. corrupted = 'corrupted'
  42. unrecognized = 'unrecognized'
  43. class AddCommandSchema(Schema):
  44. urls: List[str]
  45. tag: str = ""
  46. depth: int = 0
  47. update: bool = not ARCHIVING_CONFIG.ONLY_NEW # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
  48. update_all: bool = False
  49. index_only: bool = False
  50. overwrite: bool = False
  51. init: bool = False
  52. extractors: str = ""
  53. parser: str = "auto"
  54. class UpdateCommandSchema(Schema):
  55. resume: Optional[float] = 0
  56. only_new: bool = ARCHIVING_CONFIG.ONLY_NEW
  57. index_only: bool = False
  58. overwrite: bool = False
  59. after: Optional[float] = 0
  60. before: Optional[float] = 999999999999999
  61. status: Optional[StatusChoices] = StatusChoices.unarchived
  62. filter_type: Optional[str] = FilterTypeChoices.substring
  63. filter_patterns: Optional[List[str]] = ['https://example.com']
  64. extractors: Optional[str] = ""
  65. class ScheduleCommandSchema(Schema):
  66. import_path: Optional[str] = None
  67. add: bool = False
  68. every: Optional[str] = None
  69. tag: str = ''
  70. depth: int = 0
  71. overwrite: bool = False
  72. update: bool = not ARCHIVING_CONFIG.ONLY_NEW
  73. clear: bool = False
  74. class ListCommandSchema(Schema):
  75. filter_patterns: Optional[List[str]] = ['https://example.com']
  76. filter_type: str = FilterTypeChoices.substring
  77. status: Optional[StatusChoices] = StatusChoices.indexed
  78. after: Optional[float] = 0
  79. before: Optional[float] = 999999999999999
  80. sort: str = 'bookmarked_at'
  81. as_json: bool = True
  82. as_html: bool = False
  83. as_csv: str | bool = 'timestamp,url'
  84. with_headers: bool = False
  85. class RemoveCommandSchema(Schema):
  86. delete: bool = True
  87. after: Optional[float] = 0
  88. before: Optional[float] = 999999999999999
  89. filter_type: str = FilterTypeChoices.exact
  90. filter_patterns: Optional[List[str]] = ['https://example.com']
  91. @router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
  92. def cli_add(request, args: AddCommandSchema):
  93. result = add(
  94. urls=args.urls,
  95. tag=args.tag,
  96. depth=args.depth,
  97. update=args.update,
  98. update_all=args.update_all,
  99. index_only=args.index_only,
  100. overwrite=args.overwrite,
  101. init=args.init,
  102. extractors=args.extractors,
  103. parser=args.parser,
  104. )
  105. return {
  106. "success": True,
  107. "errors": [],
  108. "result": result,
  109. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  110. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  111. }
  112. @router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
  113. def cli_update(request, args: UpdateCommandSchema):
  114. result = update(
  115. resume=args.resume,
  116. only_new=args.only_new,
  117. index_only=args.index_only,
  118. overwrite=args.overwrite,
  119. before=args.before,
  120. after=args.after,
  121. status=args.status,
  122. filter_type=args.filter_type,
  123. filter_patterns=args.filter_patterns,
  124. extractors=args.extractors,
  125. )
  126. return {
  127. "success": True,
  128. "errors": [],
  129. "result": result,
  130. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  131. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  132. }
  133. @router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
  134. def cli_schedule(request, args: ScheduleCommandSchema):
  135. result = schedule(
  136. import_path=args.import_path,
  137. add=args.add,
  138. show=args.show,
  139. clear=args.clear,
  140. every=args.every,
  141. tag=args.tag,
  142. depth=args.depth,
  143. overwrite=args.overwrite,
  144. update=args.update,
  145. )
  146. return {
  147. "success": True,
  148. "errors": [],
  149. "result": result,
  150. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  151. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  152. }
  153. @router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
  154. def cli_list(request, args: ListCommandSchema):
  155. result = list_all(
  156. filter_patterns=args.filter_patterns,
  157. filter_type=args.filter_type,
  158. status=args.status,
  159. after=args.after,
  160. before=args.before,
  161. sort=args.sort,
  162. csv=args.as_csv,
  163. json=args.as_json,
  164. html=args.as_html,
  165. with_headers=args.with_headers,
  166. )
  167. result_format = 'txt'
  168. if args.as_json:
  169. result_format = "json"
  170. elif args.as_html:
  171. result_format = "html"
  172. elif args.as_csv:
  173. result_format = "csv"
  174. return {
  175. "success": True,
  176. "errors": [],
  177. "result": result,
  178. "result_format": result_format,
  179. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  180. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  181. }
  182. @router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
  183. def cli_remove(request, args: RemoveCommandSchema):
  184. result = remove(
  185. yes=True, # no way to interactively ask for confirmation via API, so we force yes
  186. delete=args.delete,
  187. before=args.before,
  188. after=args.after,
  189. filter_type=args.filter_type,
  190. filter_patterns=args.filter_patterns,
  191. )
  192. return {
  193. "success": True,
  194. "errors": [],
  195. "result": result,
  196. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  197. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  198. }