__init__.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. __package__ = 'archivebox.cli'
  2. __command__ = 'archivebox'
  3. import os
  4. import sys
  5. import argparse
  6. import threading
  7. from time import sleep
  8. from collections.abc import Mapping
  9. from rich import print
  10. from typing import Optional, List, IO, Union, Iterable
  11. from pathlib import Path
  12. from importlib import import_module
  13. BUILTIN_LIST = list
  14. CLI_DIR = Path(__file__).resolve().parent
  15. # rewrite setup -> install for backwards compatibility
  16. if len(sys.argv) > 1 and sys.argv[1] == 'setup':
  17. from rich import print
  18. print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
  19. sys.argv[1] = 'install'
  20. if '--debug' in sys.argv:
  21. os.environ['DEBUG'] = 'True'
  22. sys.argv.remove('--debug')
  23. # def list_subcommands() -> Dict[str, str]:
  24. # """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
  25. # COMMANDS = []
  26. # for filename in os.listdir(CLI_DIR):
  27. # if is_cli_module(filename):
  28. # subcommand = filename.replace('archivebox_', '').replace('.py', '')
  29. # module = import_module('.archivebox_{}'.format(subcommand), __package__)
  30. # assert is_valid_cli_module(module, subcommand)
  31. # COMMANDS.append((subcommand, module.main.__doc__))
  32. # globals()[subcommand] = module.main
  33. # display_order = lambda cmd: (
  34. # display_first.index(cmd[0])
  35. # if cmd[0] in display_first else
  36. # 100 + len(cmd[0])
  37. # )
  38. # return dict(sorted(COMMANDS, key=display_order))
  39. # just define it statically, it's much faster:
  40. SUBCOMMAND_MODULES = {
  41. 'help': 'archivebox_help',
  42. 'version': 'archivebox_version' ,
  43. 'init': 'archivebox_init',
  44. 'install': 'archivebox_install',
  45. ##############################################
  46. 'config': 'archivebox_config',
  47. 'add': 'archivebox_add',
  48. 'remove': 'archivebox_remove',
  49. 'update': 'archivebox_update',
  50. 'list': 'archivebox_list',
  51. 'status': 'archivebox_status',
  52. 'schedule': 'archivebox_schedule',
  53. 'server': 'archivebox_server',
  54. 'shell': 'archivebox_shell',
  55. 'manage': 'archivebox_manage',
  56. # 'oneshot': 'archivebox_oneshot',
  57. }
  58. # every imported command module must have these properties in order to be valid
  59. required_attrs = ('__package__', '__command__', 'main')
  60. # basic checks to make sure imported files are valid subcommands
  61. is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
  62. is_valid_cli_module = lambda module, subcommand: (
  63. all(hasattr(module, attr) for attr in required_attrs)
  64. and module.__command__.split(' ')[-1] == subcommand
  65. )
  66. class LazySubcommands(Mapping):
  67. def keys(self):
  68. return SUBCOMMAND_MODULES.keys()
  69. def values(self):
  70. return [self[key] for key in self.keys()]
  71. def items(self):
  72. return [(key, self[key]) for key in self.keys()]
  73. def __getitem__(self, key):
  74. module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
  75. assert is_valid_cli_module(module, key)
  76. return module.main
  77. def __iter__(self):
  78. return iter(SUBCOMMAND_MODULES.keys())
  79. def __len__(self):
  80. return len(SUBCOMMAND_MODULES)
  81. CLI_SUBCOMMANDS = LazySubcommands()
  82. # these common commands will appear sorted before any others for ease-of-use
  83. meta_cmds = ('help', 'version') # dont require valid data folder at all
  84. setup_cmds = ('init', 'setup', 'install') # require valid data folder, but dont require DB present in it yet
  85. archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage') # require valid data folder + existing db present
  86. fake_db = ("oneshot",) # use fake in-memory db
  87. display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
  88. IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
  89. def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
  90. """
  91. Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
  92. Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
  93. """
  94. wait_for_all: bool = thread_names == ()
  95. thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
  96. should_wait = lambda thread: (
  97. not thread_matches(thread, ignore_names)
  98. and (wait_for_all or thread_matches(thread, thread_names)))
  99. for tries in range(timeout):
  100. all_threads = [*threading.enumerate()]
  101. blocking_threads = [*filter(should_wait, all_threads)]
  102. threads_summary = ', '.join(repr(t) for t in blocking_threads)
  103. if blocking_threads:
  104. sleep(1)
  105. if tries == 5: # only show stderr message if we need to wait more than 5s
  106. print(
  107. f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
  108. threads_summary,
  109. file=sys.stderr,
  110. )
  111. else:
  112. return tries
  113. raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
  114. def run_subcommand(subcommand: str,
  115. subcommand_args: List[str] | None = None,
  116. stdin: Optional[IO]=None,
  117. pwd: Union[Path, str, None]=None) -> None:
  118. """Run a given ArchiveBox subcommand with the given list of args"""
  119. subcommand_args = subcommand_args or []
  120. from archivebox.misc.checks import check_migrations
  121. from archivebox.config.django import setup_django
  122. # print('DATA_DIR is', DATA_DIR)
  123. # print('pwd is', os.getcwd())
  124. cmd_requires_db = (subcommand in archive_cmds)
  125. init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
  126. check_db = cmd_requires_db and not init_pending
  127. setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
  128. for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
  129. if ignore_pattern in sys.argv[:4]:
  130. cmd_requires_db = False
  131. break
  132. if subcommand in archive_cmds:
  133. if cmd_requires_db:
  134. check_migrations()
  135. module = import_module('.archivebox_{}'.format(subcommand), __package__)
  136. module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
  137. # wait for webhooks, signals, and other background jobs to finish before exit
  138. wait_for_bg_threads_to_exit(timeout=60)
  139. class NotProvided:
  140. def __len__(self):
  141. return 0
  142. def __bool__(self):
  143. return False
  144. def __repr__(self):
  145. return '<not provided>'
  146. Omitted = Union[None, NotProvided]
  147. OMITTED = NotProvided()
  148. def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
  149. # print('STARTING CLI MAIN ENTRYPOINT')
  150. args = sys.argv[1:] if args is OMITTED else args
  151. stdin = sys.stdin if stdin is OMITTED else stdin
  152. parser = argparse.ArgumentParser(
  153. prog=__command__,
  154. description='ArchiveBox: The self-hosted internet archive',
  155. add_help=False,
  156. )
  157. group = parser.add_mutually_exclusive_group()
  158. group.add_argument(
  159. '--help', '-h',
  160. action='store_true',
  161. help=CLI_SUBCOMMANDS['help'].__doc__,
  162. )
  163. group.add_argument(
  164. '--version',
  165. action='store_true',
  166. help=CLI_SUBCOMMANDS['version'].__doc__,
  167. )
  168. group.add_argument(
  169. "subcommand",
  170. type=str,
  171. help= "The name of the subcommand to run",
  172. nargs='?',
  173. choices=CLI_SUBCOMMANDS.keys(),
  174. default=None,
  175. )
  176. parser.add_argument(
  177. "subcommand_args",
  178. help="Arguments for the subcommand",
  179. nargs=argparse.REMAINDER,
  180. )
  181. command = parser.parse_args(args or ())
  182. if command.version:
  183. command.subcommand = 'version'
  184. elif command.help or command.subcommand is None:
  185. command.subcommand = 'help'
  186. if command.subcommand not in ('version',):
  187. from ..logging_util import log_cli_command
  188. log_cli_command(
  189. subcommand=command.subcommand,
  190. subcommand_args=command.subcommand_args,
  191. stdin=stdin or None,
  192. )
  193. try:
  194. run_subcommand(
  195. subcommand=command.subcommand,
  196. subcommand_args=command.subcommand_args,
  197. stdin=stdin or None,
  198. )
  199. except KeyboardInterrupt:
  200. print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')