archivebox_pluginmap.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. from typing import Optional
  4. from pathlib import Path
  5. import rich_click as click
  6. from archivebox.misc.util import docstring, enforce_types
  7. # State Machine ASCII Art Diagrams
  8. CRAWL_MACHINE_DIAGRAM = """
  9. ┌─────────────────────────────────────────────────────────────────────────────┐
  10. │ CrawlMachine │
  11. ├─────────────────────────────────────────────────────────────────────────────┤
  12. │ │
  13. │ ┌─────────────┐ │
  14. │ │ QUEUED │◄────────────────┐ │
  15. │ │ (initial) │ │ │
  16. │ └──────┬──────┘ │ │
  17. │ │ │ tick() unless can_start() │
  18. │ │ tick() when │ │
  19. │ │ can_start() │ │
  20. │ ▼ │ │
  21. │ ┌─────────────┐ │ │
  22. │ │ STARTED │─────────────────┘ │
  23. │ │ │◄────────────────┐ │
  24. │ │ enter: │ │ │
  25. │ │ crawl.run()│ │ tick() unless is_finished() │
  26. │ │ (discover │ │ │
  27. │ │ Crawl │─────────────────┘ │
  28. │ │ hooks) │ │
  29. │ └──────┬──────┘ │
  30. │ │ │
  31. │ │ tick() when is_finished() │
  32. │ ▼ │
  33. │ ┌─────────────┐ │
  34. │ │ SEALED │ │
  35. │ │ (final) │ │
  36. │ │ │ │
  37. │ │ enter: │ │
  38. │ │ cleanup() │ │
  39. │ └─────────────┘ │
  40. │ │
  41. │ Hooks triggered: on_Crawl__* (during STARTED.enter via crawl.run()) │
  42. │ on_CrawlEnd__* (during SEALED.enter via cleanup()) │
  43. └─────────────────────────────────────────────────────────────────────────────┘
  44. """
  45. SNAPSHOT_MACHINE_DIAGRAM = """
  46. ┌─────────────────────────────────────────────────────────────────────────────┐
  47. │ SnapshotMachine │
  48. ├─────────────────────────────────────────────────────────────────────────────┤
  49. │ │
  50. │ ┌─────────────┐ │
  51. │ │ QUEUED │◄────────────────┐ │
  52. │ │ (initial) │ │ │
  53. │ └──────┬──────┘ │ │
  54. │ │ │ tick() unless can_start() │
  55. │ │ tick() when │ │
  56. │ │ can_start() │ │
  57. │ ▼ │ │
  58. │ ┌─────────────┐ │ │
  59. │ │ STARTED │─────────────────┘ │
  60. │ │ │◄────────────────┐ │
  61. │ │ enter: │ │ │
  62. │ │ snapshot │ │ tick() unless is_finished() │
  63. │ │ .run() │ │ │
  64. │ │ (discover │─────────────────┘ │
  65. │ │ Snapshot │ │
  66. │ │ hooks, │ │
  67. │ │ create │ │
  68. │ │ pending │ │
  69. │ │ results) │ │
  70. │ └──────┬──────┘ │
  71. │ │ │
  72. │ │ tick() when is_finished() │
  73. │ ▼ │
  74. │ ┌─────────────┐ │
  75. │ │ SEALED │ │
  76. │ │ (final) │ │
  77. │ │ │ │
  78. │ │ enter: │ │
  79. │ │ cleanup() │ │
  80. │ └─────────────┘ │
  81. │ │
  82. │ Hooks triggered: on_Snapshot__* (creates ArchiveResults in STARTED.enter) │
  83. └─────────────────────────────────────────────────────────────────────────────┘
  84. """
  85. ARCHIVERESULT_MACHINE_DIAGRAM = """
  86. ┌─────────────────────────────────────────────────────────────────────────────┐
  87. │ ArchiveResultMachine │
  88. ├─────────────────────────────────────────────────────────────────────────────┤
  89. │ │
  90. │ ┌─────────────┐ │
  91. │ │ QUEUED │◄─────────────────┐ │
  92. │ │ (initial) │ │ │
  93. │ └──┬───────┬──┘ │ │
  94. │ │ │ │ tick() unless can_start() │
  95. │ │ │ exceeded_max_ │ │
  96. │ │ │ attempts │ │
  97. │ │ ▼ │ │
  98. │ │ ┌──────────┐ │ │
  99. │ │ │ SKIPPED │ │ │
  100. │ │ │ (final) │ │ │
  101. │ │ └──────────┘ │ │
  102. │ │ tick() when │ │
  103. │ │ can_start() │ │
  104. │ ▼ │ │
  105. │ ┌─────────────┐ │ │
  106. │ │ STARTED │──────────────────┘ │
  107. │ │ │◄─────────────────────────────────────────────────┐ │
  108. │ │ enter: │ │ │ │
  109. │ │ result.run()│ tick() unless │ │ │
  110. │ │ (execute │ is_finished() │ │ │
  111. │ │ hook via │──────────────────────┘ │ │
  112. │ │ run_hook())│ │ │
  113. │ └──────┬──────┘ │ │
  114. │ │ │ │
  115. │ │ tick() checks status set by hook output │ │
  116. │ ├─────────────┬─────────────┬─────────────┐ │ │
  117. │ ▼ ▼ ▼ ▼ │ │
  118. │ ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐ │ │
  119. │ │ SUCCEEDED │ │ FAILED │ │ SKIPPED │ │ BACKOFF │ │ │
  120. │ │ (final) │ │ (final) │ │ (final) │ │ │ │ │
  121. │ └───────────┘ └───────────┘ └───────────┘ └──┬──────┬─┘ │ │
  122. │ │ │ │ │
  123. │ exceeded_max_ │ │ can_start()│ │
  124. │ attempts │ │ loops back │ │
  125. │ ▼ │ └────────────┘ │
  126. │ ┌──────────┐ │ │
  127. │ │ SKIPPED │◄─┘ │
  128. │ │ (final) │ │
  129. │ └──────────┘ │
  130. │ │
  131. │ Each ArchiveResult runs ONE specific hook (stored in .hook_name field) │
  132. └─────────────────────────────────────────────────────────────────────────────┘
  133. """
  134. BINARY_MACHINE_DIAGRAM = """
  135. ┌─────────────────────────────────────────────────────────────────────────────┐
  136. │ BinaryMachine │
  137. ├─────────────────────────────────────────────────────────────────────────────┤
  138. │ │
  139. │ ┌─────────────┐ │
  140. │ │ QUEUED │◄────────────────┐ │
  141. │ │ (initial) │ │ │
  142. │ └──────┬──────┘ │ │
  143. │ │ │ tick() unless can_install() │
  144. │ │ │ (stays queued if failed) │
  145. │ │ tick() when │ │
  146. │ │ can_install() │ │
  147. │ │ │ │
  148. │ │ on_install() runs │ │
  149. │ │ during transition: │ │
  150. │ │ • binary.run() │ │
  151. │ │ (discover Binary │ │
  152. │ │ hooks, try each │ │
  153. │ │ provider until │ │
  154. │ │ one succeeds) │ │
  155. │ │ • Sets abspath, │ │
  156. │ │ version, sha256 │ │
  157. │ │ │ │
  158. │ │ If install fails: │ │
  159. │ │ raises exception──────┘ │
  160. │ │ (retry_at bumped) │
  161. │ │ │
  162. │ ▼ │
  163. │ ┌─────────────┐ │
  164. │ │ INSTALLED │ │
  165. │ │ (final) │ │
  166. │ │ │ │
  167. │ │ Binary is │ │
  168. │ │ ready to │ │
  169. │ │ use │ │
  170. │ └─────────────┘ │
  171. │ │
  172. │ Hooks triggered: on_Binary__* (provider hooks during transition) │
  173. │ Providers tried in sequence until one succeeds: apt, brew, pip, npm, etc. │
  174. │ Installation is synchronous - no intermediate STARTED state │
  175. └─────────────────────────────────────────────────────────────────────────────┘
  176. """
  177. @enforce_types
  178. def pluginmap(
  179. show_disabled: bool = False,
  180. model: Optional[str] = None,
  181. quiet: bool = False,
  182. ) -> dict:
  183. """
  184. Show a map of all state machines and their associated plugin hooks.
  185. Displays ASCII art diagrams of the core model state machines (Crawl, Snapshot,
  186. ArchiveResult, Binary) and lists all auto-detected on_Modelname_xyz hooks
  187. that will run for each model's transitions.
  188. """
  189. from rich.console import Console
  190. from rich.table import Table
  191. from rich.panel import Panel
  192. from rich import box
  193. from archivebox.hooks import (
  194. discover_hooks,
  195. is_background_hook,
  196. BUILTIN_PLUGINS_DIR,
  197. USER_PLUGINS_DIR,
  198. )
  199. console = Console()
  200. prnt = console.print
  201. # Model event types that can have hooks
  202. model_events = {
  203. 'Crawl': {
  204. 'description': 'Hooks run when a Crawl starts (QUEUED→STARTED)',
  205. 'machine': 'CrawlMachine',
  206. 'diagram': CRAWL_MACHINE_DIAGRAM,
  207. },
  208. 'CrawlEnd': {
  209. 'description': 'Hooks run when a Crawl finishes (STARTED→SEALED)',
  210. 'machine': 'CrawlMachine',
  211. 'diagram': None, # Part of CrawlMachine
  212. },
  213. 'Snapshot': {
  214. 'description': 'Hooks run for each Snapshot (creates ArchiveResults)',
  215. 'machine': 'SnapshotMachine',
  216. 'diagram': SNAPSHOT_MACHINE_DIAGRAM,
  217. },
  218. 'Binary': {
  219. 'description': 'Hooks for installing binary dependencies (providers)',
  220. 'machine': 'BinaryMachine',
  221. 'diagram': BINARY_MACHINE_DIAGRAM,
  222. },
  223. }
  224. # Filter to specific model if requested
  225. if model:
  226. model = model.title()
  227. if model not in model_events:
  228. prnt(f'[red]Error: Unknown model "{model}". Available: {", ".join(model_events.keys())}[/red]')
  229. return {}
  230. model_events = {model: model_events[model]}
  231. result = {
  232. 'models': {},
  233. 'plugins_dir': str(BUILTIN_PLUGINS_DIR),
  234. 'user_plugins_dir': str(USER_PLUGINS_DIR),
  235. }
  236. if not quiet:
  237. prnt()
  238. prnt('[bold cyan]ArchiveBox Plugin Map[/bold cyan]')
  239. prnt(f'[dim]Built-in plugins: {BUILTIN_PLUGINS_DIR}[/dim]')
  240. prnt(f'[dim]User plugins: {USER_PLUGINS_DIR}[/dim]')
  241. prnt()
  242. # Show diagrams first (unless quiet mode)
  243. if not quiet:
  244. # Show ArchiveResult diagram separately since it's different
  245. prnt(Panel(
  246. ARCHIVERESULT_MACHINE_DIAGRAM,
  247. title='[bold green]ArchiveResultMachine[/bold green]',
  248. border_style='green',
  249. expand=False,
  250. ))
  251. prnt()
  252. for event_name, info in model_events.items():
  253. # Discover hooks for this event
  254. hooks = discover_hooks(event_name, filter_disabled=not show_disabled)
  255. # Build hook info list
  256. hook_infos = []
  257. for hook_path in hooks:
  258. # Get plugin name from parent directory (e.g., 'wget' from 'plugins/wget/on_Snapshot__06_wget.bg.py')
  259. plugin_name = hook_path.parent.name
  260. is_bg = is_background_hook(hook_path.name)
  261. hook_infos.append({
  262. 'path': str(hook_path),
  263. 'name': hook_path.name,
  264. 'plugin': plugin_name,
  265. 'is_background': is_bg,
  266. 'extension': hook_path.suffix,
  267. })
  268. result['models'][event_name] = {
  269. 'description': info['description'],
  270. 'machine': info['machine'],
  271. 'hooks': hook_infos,
  272. 'hook_count': len(hook_infos),
  273. }
  274. if not quiet:
  275. # Show diagram if this model has one
  276. if info.get('diagram'):
  277. prnt(Panel(
  278. info['diagram'],
  279. title=f'[bold green]{info["machine"]}[/bold green]',
  280. border_style='green',
  281. expand=False,
  282. ))
  283. prnt()
  284. # Create hooks table
  285. table = Table(
  286. title=f'[bold yellow]on_{event_name}__* Hooks[/bold yellow] ({len(hooks)} found)',
  287. box=box.ROUNDED,
  288. show_header=True,
  289. header_style='bold magenta',
  290. )
  291. table.add_column('Plugin', style='cyan', width=20)
  292. table.add_column('Hook Name', style='green')
  293. table.add_column('BG', justify='center', width=4)
  294. table.add_column('Type', justify='center', width=5)
  295. # Sort lexicographically by hook name
  296. sorted_hooks = sorted(hook_infos, key=lambda h: h['name'])
  297. for hook in sorted_hooks:
  298. bg_marker = '[yellow]bg[/yellow]' if hook['is_background'] else ''
  299. ext = hook['extension'].lstrip('.')
  300. table.add_row(
  301. hook['plugin'],
  302. hook['name'],
  303. bg_marker,
  304. ext,
  305. )
  306. prnt(table)
  307. prnt()
  308. prnt(f'[dim]{info["description"]}[/dim]')
  309. prnt()
  310. # Summary
  311. if not quiet:
  312. total_hooks = sum(m['hook_count'] for m in result['models'].values())
  313. prnt(f'[bold]Total hooks discovered: {total_hooks}[/bold]')
  314. prnt()
  315. prnt('[dim]Hook naming convention: on_{Model}__{XX}_{description}[.bg].{ext}[/dim]')
  316. prnt('[dim] - XX: Two-digit lexicographic order (00-99)[/dim]')
  317. prnt('[dim] - .bg: Background hook (non-blocking)[/dim]')
  318. prnt('[dim] - ext: py, sh, or js[/dim]')
  319. prnt()
  320. return result
  321. @click.command()
  322. @click.option('--show-disabled', '-a', is_flag=True, help='Show hooks from disabled plugins too')
  323. @click.option('--model', '-m', type=str, default=None, help='Filter to specific model (Crawl, Snapshot, Binary, CrawlEnd)')
  324. @click.option('--quiet', '-q', is_flag=True, help='Output JSON only, no ASCII diagrams')
  325. @docstring(pluginmap.__doc__)
  326. def main(**kwargs):
  327. import json
  328. result = pluginmap(**kwargs)
  329. if kwargs.get('quiet'):
  330. print(json.dumps(result, indent=2))
  331. if __name__ == '__main__':
  332. main()