on_Snapshot__05_git.bg.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #!/usr/bin/env python3
  2. """
  3. Clone a git repository from a URL.
  4. Usage: on_Snapshot__05_git.bg.py --url=<url> --snapshot-id=<uuid>
  5. Output: Clones repository to $PWD/repo
  6. Environment variables:
  7. GIT_BINARY: Path to git binary
  8. GIT_TIMEOUT: Timeout in seconds (default: 120)
  9. GIT_ARGS: Default git arguments (JSON array, default: ["clone", "--depth=1", "--recursive"])
  10. GIT_ARGS_EXTRA: Extra arguments to append (JSON array, default: [])
  11. # Fallback to ARCHIVING_CONFIG values if GIT_* not set:
  12. TIMEOUT: Fallback timeout
  13. """
  14. import json
  15. import os
  16. import subprocess
  17. import sys
  18. from pathlib import Path
  19. import rich_click as click
  20. # Extractor metadata
  21. PLUGIN_NAME = 'git'
  22. BIN_NAME = 'git'
  23. BIN_PROVIDERS = 'apt,brew,env'
  24. OUTPUT_DIR = '.'
  25. def get_env(name: str, default: str = '') -> str:
  26. return os.environ.get(name, default).strip()
  27. def get_env_int(name: str, default: int = 0) -> int:
  28. try:
  29. return int(get_env(name, str(default)))
  30. except ValueError:
  31. return default
  32. def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
  33. """Parse a JSON array from environment variable."""
  34. val = get_env(name, '')
  35. if not val:
  36. return default if default is not None else []
  37. try:
  38. result = json.loads(val)
  39. if isinstance(result, list):
  40. return [str(item) for item in result]
  41. return default if default is not None else []
  42. except json.JSONDecodeError:
  43. return default if default is not None else []
  44. def is_git_url(url: str) -> bool:
  45. """Check if URL looks like a git repository."""
  46. git_patterns = [
  47. '.git',
  48. 'github.com',
  49. 'gitlab.com',
  50. 'bitbucket.org',
  51. 'git://',
  52. 'ssh://git@',
  53. ]
  54. return any(p in url.lower() for p in git_patterns)
  55. def clone_git(url: str, binary: str) -> tuple[bool, str | None, str]:
  56. """
  57. Clone git repository.
  58. Returns: (success, output_path, error_message)
  59. """
  60. timeout = get_env_int('GIT_TIMEOUT') or get_env_int('TIMEOUT', 120)
  61. git_args = get_env_array('GIT_ARGS', ["clone", "--depth=1", "--recursive"])
  62. git_args_extra = get_env_array('GIT_ARGS_EXTRA', [])
  63. cmd = [binary, *git_args, *git_args_extra, url, OUTPUT_DIR]
  64. try:
  65. result = subprocess.run(cmd, timeout=timeout)
  66. if result.returncode == 0 and Path(OUTPUT_DIR).is_dir():
  67. return True, OUTPUT_DIR, ''
  68. else:
  69. return False, None, f'git clone failed (exit={result.returncode})'
  70. except subprocess.TimeoutExpired:
  71. return False, None, f'Timed out after {timeout} seconds'
  72. except Exception as e:
  73. return False, None, f'{type(e).__name__}: {e}'
  74. @click.command()
  75. @click.option('--url', required=True, help='Git repository URL')
  76. @click.option('--snapshot-id', required=True, help='Snapshot UUID')
  77. def main(url: str, snapshot_id: str):
  78. """Clone a git repository from a URL."""
  79. output = None
  80. status = 'failed'
  81. error = ''
  82. try:
  83. # Check if URL looks like a git repo
  84. if not is_git_url(url):
  85. print(f'Skipping git clone for non-git URL: {url}', file=sys.stderr)
  86. print(json.dumps({
  87. 'type': 'ArchiveResult',
  88. 'status': 'skipped',
  89. 'output_str': 'Not a git URL',
  90. }))
  91. sys.exit(0)
  92. # Get binary from environment
  93. binary = get_env('GIT_BINARY', 'git')
  94. # Run extraction
  95. success, output, error = clone_git(url, binary)
  96. status = 'succeeded' if success else 'failed'
  97. except Exception as e:
  98. error = f'{type(e).__name__}: {e}'
  99. status = 'failed'
  100. if error:
  101. print(f'ERROR: {error}', file=sys.stderr)
  102. # Output clean JSONL (no RESULT_JSON= prefix)
  103. result = {
  104. 'type': 'ArchiveResult',
  105. 'status': status,
  106. 'output_str': output or error or '',
  107. }
  108. print(json.dumps(result))
  109. sys.exit(0 if status == 'succeeded' else 1)
  110. if __name__ == '__main__':
  111. main()