archivebox_process.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/env python3
  2. """
  3. archivebox process <action> [--filters]
  4. Manage Process records (system-managed, mostly read-only).
  5. Process records track executions of binaries during extraction.
  6. They are created automatically by the system and are primarily for debugging.
  7. Actions:
  8. list - List Processes as JSONL (with optional filters)
  9. Examples:
  10. # List all processes
  11. archivebox process list
  12. # List processes by binary
  13. archivebox process list --binary-name=chrome
  14. # List recent processes
  15. archivebox process list --limit=10
  16. """
  17. __package__ = 'archivebox.cli'
  18. __command__ = 'archivebox process'
  19. import sys
  20. from typing import Optional
  21. import rich_click as click
  22. from rich import print as rprint
  23. from archivebox.cli.cli_utils import apply_filters
  24. # =============================================================================
  25. # LIST
  26. # =============================================================================
  27. def list_processes(
  28. binary_name: Optional[str] = None,
  29. machine_id: Optional[str] = None,
  30. limit: Optional[int] = None,
  31. ) -> int:
  32. """
  33. List Processes as JSONL with optional filters.
  34. Exit codes:
  35. 0: Success (even if no results)
  36. """
  37. from archivebox.misc.jsonl import write_record
  38. from archivebox.machine.models import Process
  39. is_tty = sys.stdout.isatty()
  40. queryset = Process.objects.all().select_related('binary', 'machine').order_by('-start_ts')
  41. # Apply filters
  42. filter_kwargs = {}
  43. if binary_name:
  44. filter_kwargs['binary__name'] = binary_name
  45. if machine_id:
  46. filter_kwargs['machine_id'] = machine_id
  47. queryset = apply_filters(queryset, filter_kwargs, limit=limit)
  48. count = 0
  49. for process in queryset:
  50. if is_tty:
  51. binary_name_str = process.binary.name if process.binary else 'unknown'
  52. exit_code = process.returncode if process.returncode is not None else '?'
  53. status_color = 'green' if process.returncode == 0 else 'red' if process.returncode else 'yellow'
  54. rprint(f'[{status_color}]exit={exit_code:3}[/{status_color}] [cyan]{binary_name_str:15}[/cyan] [dim]{process.id}[/dim]')
  55. else:
  56. write_record(process.to_json())
  57. count += 1
  58. rprint(f'[dim]Listed {count} processes[/dim]', file=sys.stderr)
  59. return 0
  60. # =============================================================================
  61. # CLI Commands
  62. # =============================================================================
  63. @click.group()
  64. def main():
  65. """Manage Process records (read-only, system-managed)."""
  66. pass
  67. @main.command('list')
  68. @click.option('--binary-name', '-b', help='Filter by binary name')
  69. @click.option('--machine-id', '-m', help='Filter by machine ID')
  70. @click.option('--limit', '-n', type=int, help='Limit number of results')
  71. def list_cmd(binary_name: Optional[str], machine_id: Optional[str], limit: Optional[int]):
  72. """List Processes as JSONL."""
  73. sys.exit(list_processes(
  74. binary_name=binary_name,
  75. machine_id=machine_id,
  76. limit=limit,
  77. ))
  78. if __name__ == '__main__':
  79. main()