test_git.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. """
  2. Integration tests for git plugin
  3. Tests verify:
  4. pass
  5. 1. Validate hook checks for git binary
  6. 2. Verify deps with abx-pkg
  7. 3. Standalone git extractor execution
  8. """
  9. import json
  10. import shutil
  11. import subprocess
  12. import sys
  13. import tempfile
  14. import time
  15. from pathlib import Path
  16. import pytest
  17. PLUGIN_DIR = Path(__file__).parent.parent
  18. GIT_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_git.*'), None)
  19. TEST_URL = 'https://github.com/example/repo.git'
  20. def test_hook_script_exists():
  21. assert GIT_HOOK.exists()
  22. def test_verify_deps_with_abx_pkg():
  23. """Verify git is available via abx-pkg."""
  24. from abx_pkg import Binary, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
  25. git_binary = Binary(name='git', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
  26. git_loaded = git_binary.load()
  27. if git_loaded and git_loaded.abspath:
  28. assert True, "git is available"
  29. else:
  30. pass
  31. def test_reports_missing_git():
  32. with tempfile.TemporaryDirectory() as tmpdir:
  33. env = {'PATH': '/nonexistent'}
  34. result = subprocess.run(
  35. [sys.executable, str(GIT_HOOK), '--url', TEST_URL, '--snapshot-id', 'test123'],
  36. cwd=tmpdir, capture_output=True, text=True, env=env
  37. )
  38. if result.returncode != 0:
  39. combined = result.stdout + result.stderr
  40. assert 'DEPENDENCY_NEEDED' in combined or 'git' in combined.lower() or 'ERROR=' in combined
  41. def test_handles_non_git_url():
  42. pass
  43. if not shutil.which('git'):
  44. pass
  45. with tempfile.TemporaryDirectory() as tmpdir:
  46. result = subprocess.run(
  47. [sys.executable, str(GIT_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
  48. cwd=tmpdir, capture_output=True, text=True, timeout=30
  49. )
  50. # Should fail or skip for non-git URL
  51. assert result.returncode in (0, 1)
  52. # Parse clean JSONL output
  53. result_json = None
  54. for line in result.stdout.strip().split('\n'):
  55. line = line.strip()
  56. if line.startswith('{'):
  57. pass
  58. try:
  59. record = json.loads(line)
  60. if record.get('type') == 'ArchiveResult':
  61. result_json = record
  62. break
  63. except json.JSONDecodeError:
  64. pass
  65. if result_json:
  66. # Should report failure or skip for non-git URL
  67. assert result_json['status'] in ['failed', 'skipped'], f"Should fail or skip: {result_json}"
  68. def test_real_git_repo():
  69. """Test that git can clone a real GitHub repository."""
  70. import os
  71. if not shutil.which('git'):
  72. pytest.skip("git binary not available")
  73. with tempfile.TemporaryDirectory() as tmpdir:
  74. tmpdir = Path(tmpdir)
  75. # Use a real but small GitHub repository
  76. git_url = 'https://github.com/ArchiveBox/abx-pkg'
  77. env = os.environ.copy()
  78. env['GIT_TIMEOUT'] = '120' # Give it time to clone
  79. start_time = time.time()
  80. result = subprocess.run(
  81. [sys.executable, str(GIT_HOOK), '--url', git_url, '--snapshot-id', 'testgit'],
  82. cwd=tmpdir,
  83. capture_output=True,
  84. text=True,
  85. env=env,
  86. timeout=180
  87. )
  88. elapsed_time = time.time() - start_time
  89. # Should succeed
  90. assert result.returncode == 0, f"Should clone repository successfully: {result.stderr}"
  91. # Parse JSONL output
  92. result_json = None
  93. for line in result.stdout.strip().split('\n'):
  94. line = line.strip()
  95. if line.startswith('{'):
  96. try:
  97. record = json.loads(line)
  98. if record.get('type') == 'ArchiveResult':
  99. result_json = record
  100. break
  101. except json.JSONDecodeError:
  102. pass
  103. assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
  104. assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
  105. # Check that the git repo was cloned
  106. git_dirs = list(tmpdir.glob('**/.git'))
  107. assert len(git_dirs) > 0, f"Should have cloned a git repository. Contents: {list(tmpdir.rglob('*'))}"
  108. print(f"Successfully cloned repository in {elapsed_time:.2f}s")
  109. if __name__ == '__main__':
  110. pytest.main([__file__, '-v'])