v1_cli.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. __package__ = 'archivebox.api'
  2. from typing import List, Dict, Any, Optional
  3. from enum import Enum
  4. from ninja import Router, Schema
  5. from ..main import (
  6. add,
  7. remove,
  8. update,
  9. list_all,
  10. schedule,
  11. )
  12. from ..util import ansi_to_html
  13. from ..config import ONLY_NEW
  14. # router for API that exposes archivebox cli subcommands as REST endpoints
  15. router = Router(tags=['ArchiveBox CLI Sub-Commands'])
  16. # Schemas
  17. JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
  18. class CLICommandResponseSchema(Schema):
  19. success: bool
  20. errors: List[str]
  21. result: JSONType
  22. stdout: str
  23. stderr: str
  24. class FilterTypeChoices(str, Enum):
  25. exact = 'exact'
  26. substring = 'substring'
  27. regex = 'regex'
  28. domain = 'domain'
  29. tag = 'tag'
  30. timestamp = 'timestamp'
  31. class StatusChoices(str, Enum):
  32. indexed = 'indexed'
  33. archived = 'archived'
  34. unarchived = 'unarchived'
  35. present = 'present'
  36. valid = 'valid'
  37. invalid = 'invalid'
  38. duplicate = 'duplicate'
  39. orphaned = 'orphaned'
  40. corrupted = 'corrupted'
  41. unrecognized = 'unrecognized'
  42. class AddCommandSchema(Schema):
  43. urls: List[str]
  44. tag: str = ""
  45. depth: int = 0
  46. update: bool = not ONLY_NEW # Default to the opposite of ONLY_NEW
  47. update_all: bool = False
  48. index_only: bool = False
  49. overwrite: bool = False
  50. init: bool = False
  51. extractors: str = ""
  52. parser: str = "auto"
  53. class UpdateCommandSchema(Schema):
  54. resume: Optional[float] = 0
  55. only_new: bool = ONLY_NEW
  56. index_only: bool = False
  57. overwrite: bool = False
  58. after: Optional[float] = 0
  59. before: Optional[float] = 999999999999999
  60. status: Optional[StatusChoices] = StatusChoices.unarchived
  61. filter_type: Optional[str] = FilterTypeChoices.substring
  62. filter_patterns: Optional[List[str]] = ['https://example.com']
  63. extractors: Optional[str] = ""
  64. class ScheduleCommandSchema(Schema):
  65. import_path: Optional[str] = None
  66. add: bool = False
  67. every: Optional[str] = None
  68. tag: str = ''
  69. depth: int = 0
  70. overwrite: bool = False
  71. update: bool = not ONLY_NEW
  72. clear: bool = False
  73. class ListCommandSchema(Schema):
  74. filter_patterns: Optional[List[str]] = ['https://example.com']
  75. filter_type: str = FilterTypeChoices.substring
  76. status: Optional[StatusChoices] = StatusChoices.indexed
  77. after: Optional[float] = 0
  78. before: Optional[float] = 999999999999999
  79. sort: str = 'added'
  80. as_json: bool = True
  81. as_html: bool = False
  82. as_csv: str | bool = 'timestamp,url'
  83. with_headers: bool = False
  84. class RemoveCommandSchema(Schema):
  85. delete: bool = True
  86. after: Optional[float] = 0
  87. before: Optional[float] = 999999999999999
  88. filter_type: str = FilterTypeChoices.exact
  89. filter_patterns: Optional[List[str]] = ['https://example.com']
  90. @router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
  91. def cli_add(request, args: AddCommandSchema):
  92. result = add(
  93. urls=args.urls,
  94. tag=args.tag,
  95. depth=args.depth,
  96. update=args.update,
  97. update_all=args.update_all,
  98. index_only=args.index_only,
  99. overwrite=args.overwrite,
  100. init=args.init,
  101. extractors=args.extractors,
  102. parser=args.parser,
  103. )
  104. return {
  105. "success": True,
  106. "errors": [],
  107. "result": result,
  108. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  109. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  110. }
  111. @router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
  112. def cli_update(request, args: UpdateCommandSchema):
  113. result = update(
  114. resume=args.resume,
  115. only_new=args.only_new,
  116. index_only=args.index_only,
  117. overwrite=args.overwrite,
  118. before=args.before,
  119. after=args.after,
  120. status=args.status,
  121. filter_type=args.filter_type,
  122. filter_patterns=args.filter_patterns,
  123. extractors=args.extractors,
  124. )
  125. return {
  126. "success": True,
  127. "errors": [],
  128. "result": result,
  129. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  130. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  131. }
  132. @router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
  133. def cli_schedule(request, args: ScheduleCommandSchema):
  134. result = schedule(
  135. import_path=args.import_path,
  136. add=args.add,
  137. show=args.show,
  138. clear=args.clear,
  139. every=args.every,
  140. tag=args.tag,
  141. depth=args.depth,
  142. overwrite=args.overwrite,
  143. update=args.update,
  144. )
  145. return {
  146. "success": True,
  147. "errors": [],
  148. "result": result,
  149. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  150. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  151. }
  152. @router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
  153. def cli_list(request, args: ListCommandSchema):
  154. result = list_all(
  155. filter_patterns=args.filter_patterns,
  156. filter_type=args.filter_type,
  157. status=args.status,
  158. after=args.after,
  159. before=args.before,
  160. sort=args.sort,
  161. csv=args.as_csv,
  162. json=args.as_json,
  163. html=args.as_html,
  164. with_headers=args.with_headers,
  165. )
  166. result_format = 'txt'
  167. if args.as_json:
  168. result_format = "json"
  169. elif args.as_html:
  170. result_format = "html"
  171. elif args.as_csv:
  172. result_format = "csv"
  173. return {
  174. "success": True,
  175. "errors": [],
  176. "result": result,
  177. "result_format": result_format,
  178. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  179. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  180. }
  181. @router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
  182. def cli_remove(request, args: RemoveCommandSchema):
  183. result = remove(
  184. yes=True, # no way to interactively ask for confirmation via API, so we force yes
  185. delete=args.delete,
  186. before=args.before,
  187. after=args.after,
  188. filter_type=args.filter_type,
  189. filter_patterns=args.filter_patterns,
  190. )
  191. return {
  192. "success": True,
  193. "errors": [],
  194. "result": result,
  195. "stdout": ansi_to_html(request.stdout.getvalue().strip()),
  196. "stderr": ansi_to_html(request.stderr.getvalue().strip()),
  197. }