__init__.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. __package__ = 'archivebox.cli'
  2. __command__ = 'archivebox'
  3. import os
  4. import sys
  5. import argparse
  6. import threading
  7. from time import sleep
  8. from typing import Optional, Dict, List, IO, Union, Iterable
  9. from pathlib import Path
  10. from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
  11. from importlib import import_module
  12. BUILTIN_LIST = list
  13. CLI_DIR = Path(__file__).resolve().parent
  14. # these common commands will appear sorted before any others for ease-of-use
  15. meta_cmds = ('help', 'version') # dont require valid data folder at all
  16. main_cmds = ('init', 'config', 'setup') # dont require existing db present
  17. archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
  18. fake_db = ("oneshot",) # use fake in-memory db
  19. display_first = (*meta_cmds, *main_cmds, *archive_cmds)
  20. # every imported command module must have these properties in order to be valid
  21. required_attrs = ('__package__', '__command__', 'main')
  22. # basic checks to make sure imported files are valid subcommands
  23. is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
  24. is_valid_cli_module = lambda module, subcommand: (
  25. all(hasattr(module, attr) for attr in required_attrs)
  26. and module.__command__.split(' ')[-1] == subcommand
  27. )
  28. IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread') # threads we dont have to wait for before exiting
  29. def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
  30. """
  31. Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
  32. Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
  33. """
  34. wait_for_all: bool = thread_names == ()
  35. thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
  36. should_wait = lambda thread: (
  37. not thread_matches(thread, ignore_names)
  38. and (wait_for_all or thread_matches(thread, thread_names)))
  39. for tries in range(timeout):
  40. all_threads = [*threading.enumerate()]
  41. blocking_threads = [*filter(should_wait, all_threads)]
  42. threads_summary = ', '.join(repr(t) for t in blocking_threads)
  43. if blocking_threads:
  44. sleep(1)
  45. if tries == 5: # only show stderr message if we need to wait more than 5s
  46. stderr(
  47. f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
  48. threads_summary,
  49. )
  50. else:
  51. return tries
  52. raise Exception('Background threads failed to exit after {tries}s: {threads_summary}')
  53. def list_subcommands() -> Dict[str, str]:
  54. """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
  55. COMMANDS = []
  56. for filename in os.listdir(CLI_DIR):
  57. if is_cli_module(filename):
  58. subcommand = filename.replace('archivebox_', '').replace('.py', '')
  59. module = import_module('.archivebox_{}'.format(subcommand), __package__)
  60. assert is_valid_cli_module(module, subcommand)
  61. COMMANDS.append((subcommand, module.main.__doc__))
  62. globals()[subcommand] = module.main
  63. display_order = lambda cmd: (
  64. display_first.index(cmd[0])
  65. if cmd[0] in display_first else
  66. 100 + len(cmd[0])
  67. )
  68. return dict(sorted(COMMANDS, key=display_order))
  69. def run_subcommand(subcommand: str,
  70. subcommand_args: List[str]=None,
  71. stdin: Optional[IO]=None,
  72. pwd: Union[Path, str, None]=None) -> None:
  73. """Run a given ArchiveBox subcommand with the given list of args"""
  74. subcommand_args = subcommand_args or []
  75. if subcommand not in meta_cmds:
  76. from ..config import setup_django
  77. cmd_requires_db = subcommand in archive_cmds
  78. init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
  79. if cmd_requires_db:
  80. check_data_folder(pwd)
  81. setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
  82. if cmd_requires_db:
  83. check_migrations()
  84. module = import_module('.archivebox_{}'.format(subcommand), __package__)
  85. module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
  86. # wait for webhooks, signals, and other background jobs to finish before exit
  87. wait_for_bg_threads_to_exit(timeout=60)
  88. SUBCOMMANDS = list_subcommands()
  89. class NotProvided:
  90. pass
  91. def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
  92. args = sys.argv[1:] if args is NotProvided else args
  93. stdin = sys.stdin if stdin is NotProvided else stdin
  94. subcommands = list_subcommands()
  95. parser = argparse.ArgumentParser(
  96. prog=__command__,
  97. description='ArchiveBox: The self-hosted internet archive',
  98. add_help=False,
  99. )
  100. group = parser.add_mutually_exclusive_group()
  101. group.add_argument(
  102. '--help', '-h',
  103. action='store_true',
  104. help=subcommands['help'],
  105. )
  106. group.add_argument(
  107. '--version',
  108. action='store_true',
  109. help=subcommands['version'],
  110. )
  111. group.add_argument(
  112. "subcommand",
  113. type=str,
  114. help= "The name of the subcommand to run",
  115. nargs='?',
  116. choices=subcommands.keys(),
  117. default=None,
  118. )
  119. parser.add_argument(
  120. "subcommand_args",
  121. help="Arguments for the subcommand",
  122. nargs=argparse.REMAINDER,
  123. )
  124. command = parser.parse_args(args or ())
  125. if command.version:
  126. command.subcommand = 'version'
  127. elif command.help or command.subcommand is None:
  128. command.subcommand = 'help'
  129. if command.subcommand not in ('help', 'version', 'status'):
  130. from ..logging_util import log_cli_command
  131. log_cli_command(
  132. subcommand=command.subcommand,
  133. subcommand_args=command.subcommand_args,
  134. stdin=stdin,
  135. pwd=pwd or OUTPUT_DIR
  136. )
  137. run_subcommand(
  138. subcommand=command.subcommand,
  139. subcommand_args=command.subcommand_args,
  140. stdin=stdin,
  141. pwd=pwd or OUTPUT_DIR,
  142. )
  143. __all__ = (
  144. 'SUBCOMMANDS',
  145. 'list_subcommands',
  146. 'run_subcommand',
  147. *SUBCOMMANDS.keys(),
  148. )