on_Crawl__00_npm_install.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. #!/usr/bin/env python3
  2. """
  3. Emit node/npm Binary dependencies for the crawl.
  4. This hook runs early in the Crawl lifecycle so node/npm are installed
  5. before any npm-based extractors (e.g., puppeteer) run.
  6. """
  7. import json
  8. import os
  9. import sys
  10. def get_env(name: str, default: str = '') -> str:
  11. return os.environ.get(name, default).strip()
  12. def output_binary(name: str, binproviders: str, overrides: dict | None = None) -> None:
  13. machine_id = os.environ.get('MACHINE_ID', '')
  14. record = {
  15. 'type': 'Binary',
  16. 'name': name,
  17. 'binproviders': binproviders,
  18. 'machine_id': machine_id,
  19. }
  20. if overrides:
  21. record['overrides'] = overrides
  22. print(json.dumps(record))
  23. def main() -> None:
  24. output_binary(
  25. name='node',
  26. binproviders='apt,brew,env',
  27. overrides={'apt': {'packages': ['nodejs']}},
  28. )
  29. output_binary(
  30. name='npm',
  31. binproviders='apt,brew,env',
  32. overrides={
  33. 'apt': {'packages': ['nodejs', 'npm']},
  34. 'brew': {'packages': ['node']},
  35. },
  36. )
  37. sys.exit(0)
  38. if __name__ == '__main__':
  39. main()