test_cli_extract.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. #!/usr/bin/env python3
  2. """
  3. Tests for archivebox extract command.
  4. Verify extract re-runs extractors on existing snapshots.
  5. """
  6. import os
  7. import subprocess
  8. import sqlite3
  9. from .fixtures import *
  10. def test_extract_runs_on_existing_snapshots(tmp_path, process, disable_extractors_dict):
  11. """Test that extract command runs on existing snapshots."""
  12. os.chdir(tmp_path)
  13. # Add a snapshot first
  14. subprocess.run(
  15. ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
  16. capture_output=True,
  17. env=disable_extractors_dict,
  18. )
  19. # Run extract
  20. result = subprocess.run(
  21. ['archivebox', 'extract'],
  22. capture_output=True,
  23. env=disable_extractors_dict,
  24. timeout=30,
  25. )
  26. # Should complete
  27. assert result.returncode in [0, 1]
  28. def test_extract_preserves_snapshot_count(tmp_path, process, disable_extractors_dict):
  29. """Test that extract doesn't change snapshot count."""
  30. os.chdir(tmp_path)
  31. # Add snapshot
  32. subprocess.run(
  33. ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
  34. capture_output=True,
  35. env=disable_extractors_dict,
  36. )
  37. conn = sqlite3.connect("index.sqlite3")
  38. c = conn.cursor()
  39. count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
  40. conn.close()
  41. # Run extract
  42. subprocess.run(
  43. ['archivebox', 'extract', '--overwrite'],
  44. capture_output=True,
  45. env=disable_extractors_dict,
  46. timeout=30,
  47. )
  48. conn = sqlite3.connect("index.sqlite3")
  49. c = conn.cursor()
  50. count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
  51. conn.close()
  52. assert count_after == count_before