on_Snapshot__62_git.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #!/usr/bin/env python3
  2. """
  3. Clone a git repository from a URL.
  4. Usage: on_Snapshot__git.py --url=<url> --snapshot-id=<uuid>
  5. Output: Clones repository to $PWD/repo
  6. Environment variables:
  7. GIT_BINARY: Path to git binary
  8. GIT_TIMEOUT: Timeout in seconds (default: 120)
  9. GIT_ARGS: Default git arguments (JSON array, default: ["clone", "--depth=1", "--recursive"])
  10. GIT_ARGS_EXTRA: Extra arguments to append (JSON array, default: [])
  11. # Fallback to ARCHIVING_CONFIG values if GIT_* not set:
  12. TIMEOUT: Fallback timeout
  13. """
  14. import json
  15. import os
  16. import subprocess
  17. import sys
  18. from pathlib import Path
  19. import rich_click as click
  20. # Extractor metadata
  21. PLUGIN_NAME = 'git'
  22. BIN_NAME = 'git'
  23. BIN_PROVIDERS = 'apt,brew,env'
  24. OUTPUT_DIR = '.'
  25. def get_env(name: str, default: str = '') -> str:
  26. return os.environ.get(name, default).strip()
  27. def get_env_int(name: str, default: int = 0) -> int:
  28. try:
  29. return int(get_env(name, str(default)))
  30. except ValueError:
  31. return default
  32. def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
  33. """Parse a JSON array from environment variable."""
  34. val = get_env(name, '')
  35. if not val:
  36. return default if default is not None else []
  37. try:
  38. result = json.loads(val)
  39. if isinstance(result, list):
  40. return [str(item) for item in result]
  41. return default if default is not None else []
  42. except json.JSONDecodeError:
  43. return default if default is not None else []
  44. def is_git_url(url: str) -> bool:
  45. """Check if URL looks like a git repository."""
  46. git_patterns = [
  47. '.git',
  48. 'github.com',
  49. 'gitlab.com',
  50. 'bitbucket.org',
  51. 'git://',
  52. 'ssh://git@',
  53. ]
  54. return any(p in url.lower() for p in git_patterns)
  55. def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
  56. """
  57. Clone git repository.
  58. Returns: (success, output_path, error_message)
  59. """
  60. timeout = get_env_int('GIT_TIMEOUT') or get_env_int('TIMEOUT', 120)
  61. git_args = get_env_array('GIT_ARGS', ["clone", "--depth=1", "--recursive"])
  62. git_args_extra = get_env_array('GIT_ARGS_EXTRA', [])
  63. cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
  64. try:
  65. result = subprocess.run(cmd, capture_output=True, timeout=timeout)
  66. if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
  67. return True, OUTPUT_DIR, ''
  68. else:
  69. stderr = result.stderr.decode('utf-8', errors='replace')
  70. return False, None, f'git clone failed: {stderr[:200]}'
  71. except subprocess.TimeoutExpired:
  72. return False, None, f'Timed out after {timeout} seconds'
  73. except Exception as e:
  74. return False, None, f'{type(e).__name__}: {e}'
  75. @click.command()
  76. @click.option('--url', required=True, help='Git repository URL')
  77. @click.option('--snapshot-id', required=True, help='Snapshot UUID')
  78. def main(url: str, snapshot_id: str):
  79. """Clone a git repository from a URL."""
  80. output = None
  81. status = 'failed'
  82. error = ''
  83. try:
  84. # Check if URL looks like a git repo
  85. if not is_git_url(url):
  86. print(f'Skipping git clone for non-git URL: {url}', file=sys.stderr)
  87. print(json.dumps({
  88. 'type': 'ArchiveResult',
  89. 'status': 'skipped',
  90. 'output_str': 'Not a git URL',
  91. }))
  92. sys.exit(0)
  93. # Get binary from environment
  94. binary = get_env('GIT_BINARY', 'git')
  95. # Run extraction
  96. success, output, error = clone_git(url, binary)
  97. status = 'succeeded' if success else 'failed'
  98. except Exception as e:
  99. error = f'{type(e).__name__}: {e}'
  100. status = 'failed'
  101. if error:
  102. print(f'ERROR: {error}', file=sys.stderr)
  103. # Output clean JSONL (no RESULT_JSON= prefix)
  104. result = {
  105. 'type': 'ArchiveResult',
  106. 'status': status,
  107. 'output_str': output or error or '',
  108. }
  109. print(json.dumps(result))
  110. sys.exit(0 if status == 'succeeded' else 1)
  111. if __name__ == '__main__':
  112. main()