archivebox_install.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. import os
  4. import sys
  5. import shutil
  6. import rich_click as click
  7. from rich import print
  8. from archivebox.misc.util import docstring, enforce_types
  9. @enforce_types
  10. def install(dry_run: bool=False) -> None:
  11. """Detect and install ArchiveBox dependencies by running a dependency-check crawl"""
  12. from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
  13. from archivebox.config.paths import ARCHIVE_DIR
  14. from archivebox.misc.logging import stderr
  15. from archivebox.cli.archivebox_init import init
  16. if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
  17. init() # must init full index because we need a db to store Binary entries in
  18. print('\n[green][+] Detecting ArchiveBox dependencies...[/green]')
  19. if IS_ROOT:
  20. EUID = os.geteuid()
  21. print()
  22. print(f'[yellow]:warning: Running as UID=[blue]{EUID}[/blue].[/yellow]')
  23. print(f' DATA_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
  24. print()
  25. if dry_run:
  26. print('[dim]Dry run - would create a crawl to detect dependencies[/dim]')
  27. return
  28. # Set up Django
  29. from archivebox.config.django import setup_django
  30. setup_django()
  31. from django.utils import timezone
  32. from archivebox.crawls.models import Crawl
  33. from archivebox.base_models.models import get_or_create_system_user_pk
  34. # Create a crawl for dependency detection
  35. # Using a minimal crawl that will trigger on_Crawl hooks
  36. created_by_id = get_or_create_system_user_pk()
  37. crawl, created = Crawl.objects.get_or_create(
  38. urls='archivebox://install',
  39. defaults={
  40. 'label': 'Dependency detection',
  41. 'created_by_id': created_by_id,
  42. 'max_depth': 0,
  43. 'status': 'queued',
  44. }
  45. )
  46. # If crawl already existed, reset it to queued state so it can be processed again
  47. if not created:
  48. crawl.status = 'queued'
  49. crawl.retry_at = timezone.now()
  50. crawl.save()
  51. print(f'[+] Created dependency detection crawl: {crawl.id}')
  52. print(f'[+] Crawl status: {crawl.status}, retry_at: {crawl.retry_at}')
  53. # Verify the crawl is in the queue
  54. from archivebox.crawls.models import Crawl as CrawlModel
  55. queued_crawls = CrawlModel.objects.filter(
  56. retry_at__lte=timezone.now()
  57. ).exclude(
  58. status__in=CrawlModel.FINAL_STATES
  59. )
  60. print(f'[+] Crawls in queue: {queued_crawls.count()}')
  61. if queued_crawls.exists():
  62. for c in queued_crawls:
  63. print(f' - Crawl {c.id}: status={c.status}, retry_at={c.retry_at}')
  64. print('[+] Running crawl to detect binaries via on_Crawl hooks...')
  65. print()
  66. # Run the crawl synchronously (this triggers on_Crawl hooks)
  67. from archivebox.workers.orchestrator import Orchestrator
  68. orchestrator = Orchestrator(exit_on_idle=True)
  69. orchestrator.runloop()
  70. print()
  71. # Check for superuser
  72. from django.contrib.auth import get_user_model
  73. User = get_user_model()
  74. if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
  75. stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
  76. stderr(' archivebox manage createsuperuser')
  77. print()
  78. # Run version to show full status
  79. archivebox_path = shutil.which('archivebox') or sys.executable
  80. if 'python' in archivebox_path:
  81. os.system(f'{sys.executable} -m archivebox version')
  82. else:
  83. os.system(f'{archivebox_path} version')
  84. @click.command()
  85. @click.option('--dry-run', '-d', is_flag=True, help='Show what would happen without actually running', default=False)
  86. @docstring(install.__doc__)
  87. def main(**kwargs) -> None:
  88. install(**kwargs)
  89. if __name__ == '__main__':
  90. main()