test_archivedotorg.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. Integration tests for archivedotorg plugin
  3. Tests verify standalone archive.org extractor execution.
  4. """
  5. import json
  6. import subprocess
  7. import sys
  8. import tempfile
  9. from pathlib import Path
  10. import pytest
  11. PLUGIN_DIR = Path(__file__).parent.parent
  12. ARCHIVEDOTORG_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_archivedotorg.*'), None)
  13. TEST_URL = 'https://example.com'
  14. def test_hook_script_exists():
  15. assert ARCHIVEDOTORG_HOOK.exists()
  16. def test_submits_to_archivedotorg():
  17. with tempfile.TemporaryDirectory() as tmpdir:
  18. result = subprocess.run(
  19. [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test789'],
  20. cwd=tmpdir, capture_output=True, text=True, timeout=60
  21. )
  22. assert result.returncode in (0, 1)
  23. # Parse clean JSONL output
  24. result_json = None
  25. for line in result.stdout.strip().split('\n'):
  26. line = line.strip()
  27. if line.startswith('{'):
  28. try:
  29. record = json.loads(line)
  30. if record.get('type') == 'ArchiveResult':
  31. result_json = record
  32. break
  33. except json.JSONDecodeError:
  34. pass
  35. if result.returncode == 0:
  36. # Success - should have ArchiveResult
  37. assert result_json, "Should have ArchiveResult JSONL output on success"
  38. assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
  39. else:
  40. # Transient error - no JSONL output, just stderr
  41. assert not result_json, "Should NOT emit JSONL on transient error"
  42. assert result.stderr, "Should have error message in stderr"
  43. def test_config_save_archivedotorg_false_skips():
  44. with tempfile.TemporaryDirectory() as tmpdir:
  45. import os
  46. env = os.environ.copy()
  47. env['ARCHIVEDOTORG_ENABLED'] = 'False'
  48. result = subprocess.run(
  49. [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
  50. cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
  51. )
  52. assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
  53. # Feature disabled - temporary failure, should NOT emit JSONL
  54. assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
  55. # Should NOT emit any JSONL
  56. jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
  57. assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
  58. def test_handles_timeout():
  59. with tempfile.TemporaryDirectory() as tmpdir:
  60. import os
  61. env = os.environ.copy()
  62. env['TIMEOUT'] = '1'
  63. result = subprocess.run(
  64. [sys.executable, str(ARCHIVEDOTORG_HOOK), '--url', TEST_URL, '--snapshot-id', 'testtimeout'],
  65. cwd=tmpdir, capture_output=True, text=True, env=env, timeout=30
  66. )
  67. # Timeout is a transient error - should exit 1 with no JSONL
  68. assert result.returncode in (0, 1), "Should complete without hanging"
  69. # If it timed out (exit 1), should have no JSONL output
  70. if result.returncode == 1:
  71. jsonl_lines = [line for line in result.stdout.strip().split('\n')
  72. if line.strip().startswith('{')]
  73. assert len(jsonl_lines) == 0, "Should not emit JSONL on timeout (transient error)"
  74. if __name__ == '__main__':
  75. pytest.main([__file__, '-v'])