test_git.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. """
  2. Integration tests for git plugin
  3. Tests verify:
  4. pass
  5. 1. Validate hook checks for git binary
  6. 2. Verify deps with abx-pkg
  7. 3. Standalone git extractor execution
  8. """
  9. import json
  10. import shutil
  11. import subprocess
  12. import sys
  13. import tempfile
  14. import time
  15. from pathlib import Path
  16. import pytest
  17. PLUGIN_DIR = Path(__file__).parent.parent
  18. GIT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_git.*'), None)
  19. TEST_URL = 'https://github.com/ArchiveBox/abx-pkg.git'
  20. def test_hook_script_exists():
  21. assert GIT_HOOK.exists()
  22. def test_verify_deps_with_abx_pkg():
  23. """Verify git is available via abx-pkg."""
  24. from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
  25. git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
  26. git_loaded = git_binary.load()
  27. assert git_loaded and git_loaded.abspath, "git is required for git plugin tests"
  28. def test_reports_missing_git():
  29. with tempfile.TemporaryDirectory() as tmpdir:
  30. env = {'PATH': '/nonexistent'}
  31. result = subprocess.run(
  32. [sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
  33. cwd=tmpdir, capture_output=True, text=True, env=env
  34. )
  35. if result.returncode != 0:
  36. combined = result.stdout + result.stderr
  37. assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
  38. def test_handles_non_git_url():
  39. assert shutil.which('git'), "git binary not available"
  40. with tempfile.TemporaryDirectory() as tmpdir:
  41. result = subprocess.run(
  42. [sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
  43. cwd=tmpdir, capture_output=True, text=True, timeout=30
  44. )
  45. # Should fail or skip for non-git URL
  46. assert result.returncode in (0, 1)
  47. # Parse clean JSONL output
  48. result_json = None
  49. for line in result.stdout.strip().split('\n'):
  50. line = line.strip()
  51. if line.startswith('{'):
  52. pass
  53. try:
  54. record = json.loads(line)
  55. if record.get('type') == 'ArchiveResult':
  56. result_json = record
  57. break
  58. except json.JSONDecodeError:
  59. pass
  60. if result_json:
  61. # Should report failure or skip for non-git URL
  62. assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip: {result_json}"
  63. def test_real_git_repo():
  64. """Test that git can clone a real GitHub repository."""
  65. import os
  66. assert shutil.which('git'), "git binary not available"
  67. with tempfile.TemporaryDirectory() as tmpdir:
  68. tmpdir = Path(tmpdir)
  69. # Use a real but small GitHub repository
  70. git_url = 'https://github.com/ArchiveBox/abx-pkg'
  71. env = os.environ.copy()
  72. env['GIT_TIMEOUT'] = '120' # Give it time to clone
  73. start_time = time.time()
  74. result = subprocess.run(
  75. [sys.executable, str(GIT_HOOK), '--url', git_url, '--snapshot-id', 'testgit'],
  76. cwd=tmpdir,
  77. capture_output=True,
  78. text=True,
  79. env=env,
  80. timeout=180
  81. )
  82. elapsed_time = time.time() - start_time
  83. # Should succeed
  84. assert result.returncode == 0, f"Should clone repository successfully: {result.stderr}"
  85. # Parse JSONL output
  86. result_json = None
  87. for line in result.stdout.strip().split('\n'):
  88. line = line.strip()
  89. if line.startswith('{'):
  90. try:
  91. record = json.loads(line)
  92. if record.get('type') == 'ArchiveResult':
  93. result_json = record
  94. break
  95. except json.JSONDecodeError:
  96. pass
  97. assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
  98. assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
  99. # Check that the git repo was cloned
  100. git_dirs = list(tmpdir.glob('**/.git'))
  101. assert len(git_dirs) > 0, f"Should have cloned a git repository. Contents: {list(tmpdir.rglob('*'))}"
  102. print(f"Successfully cloned repository in {elapsed_time:.2f}s")
  103. if __name__ == '__main__':
  104. pytest.main([__file__, '-v'])