archivebox_extract.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. __command__ = 'archivebox extract'
  4. import sys
  5. from typing import TYPE_CHECKING, Generator
  6. import rich_click as click
  7. from django.db.models import Q
  8. from archivebox.misc.util import enforce_types, docstring
  9. if TYPE_CHECKING:
  10. from core.models import ArchiveResult
  11. ORCHESTRATOR = None
  12. @enforce_types
  13. def extract(archiveresult_id: str) -> Generator['ArchiveResult', None, None]:
  14. archiveresult = ArchiveResult.objects.get(Q(id=archiveresult_id) | Q(abid=archiveresult_id))
  15. if not archiveresult:
  16. raise Exception(f'ArchiveResult {archiveresult_id} not found')
  17. return archiveresult.EXTRACTOR.extract()
  18. # <user>@<machine_id>#<datetime>/absolute/path/to/binary
  19. # 2014.24.01
  20. @click.command()
  21. @click.argument('archiveresult_ids', nargs=-1, type=str)
  22. @docstring(extract.__doc__)
  23. def main(archiveresult_ids: list[str]):
  24. """Add a new URL or list of URLs to your archive"""
  25. for archiveresult_id in (archiveresult_ids or sys.stdin):
  26. print(f'Extracting {archiveresult_id}...')
  27. archiveresult = extract(str(archiveresult_id))
  28. print(archiveresult.as_json())
  29. if __name__ == '__main__':
  30. main()