| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- #!/usr/bin/env python3
- """
- Emit readability-extractor Binary dependency for the crawl.
- """
- import json
- import os
- import sys
- def get_env(name: str, default: str = '') -> str:
- return os.environ.get(name, default).strip()
- def get_env_bool(name: str, default: bool = False) -> bool:
- val = get_env(name, '').lower()
- if val in ('true', '1', 'yes', 'on'):
- return True
- if val in ('false', '0', 'no', 'off'):
- return False
- return default
- def output_binary(name: str, binproviders: str):
- """Output Binary JSONL record for a dependency."""
- machine_id = os.environ.get('MACHINE_ID', '')
- record = {
- 'type': 'Binary',
- 'name': name,
- 'binproviders': binproviders,
- 'overrides': {
- 'npm': {
- 'packages': ['https://github.com/ArchiveBox/readability-extractor'],
- },
- },
- 'machine_id': machine_id,
- }
- print(json.dumps(record))
- def main():
- readability_enabled = get_env_bool('READABILITY_ENABLED', True)
- if not readability_enabled:
- sys.exit(0)
- output_binary(name='readability-extractor', binproviders='npm,env')
- sys.exit(0)
- if __name__ == '__main__':
- main()
|