binproviders.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. __package__ = 'plugins_pkg.puppeteer'
  2. import os
  3. import platform
  4. from pathlib import Path
  5. from typing import List, Optional, Dict, ClassVar
  6. from pydantic import Field
  7. from pydantic_pkgr import (
  8. BinName,
  9. BinProviderName,
  10. BinProviderOverrides,
  11. InstallArgs,
  12. PATHStr,
  13. HostBinPath,
  14. )
  15. from archivebox.config import CONSTANTS
  16. from archivebox.config.permissions import ARCHIVEBOX_USER
  17. from abx.archivebox.base_binary import BaseBinProvider
  18. from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
  19. class PuppeteerBinProvider(BaseBinProvider):
  20. name: BinProviderName = "puppeteer"
  21. INSTALLER_BIN: BinName = "npx"
  22. PATH: PATHStr = str(CONSTANTS.DEFAULT_LIB_DIR / 'bin')
  23. euid: Optional[int] = ARCHIVEBOX_USER
  24. puppeteer_browsers_dir: Path = CONSTANTS.DEFAULT_LIB_DIR / 'browsers'
  25. puppeteer_install_args: List[str] = ['--yes', "@puppeteer/browsers", "install"]
  26. packages_handler: BinProviderOverrides = Field(default={
  27. "chrome": lambda:
  28. ['chrome@stable'],
  29. }, exclude=True)
  30. _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
  31. def setup(self) -> None:
  32. # update paths from config
  33. from archivebox.config.common import STORAGE_CONFIG
  34. self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
  35. self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')
  36. assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
  37. if self.puppeteer_browsers_dir:
  38. self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
  39. def installed_browser_bins(self, browser_name: str='*') -> List[Path]:
  40. # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
  41. if platform.system().lower() == 'darwin':
  42. # /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
  43. return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
  44. # /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
  45. # /data/lib/aarch64-linux/browsers/chrome/linux-129.0.6668.100/chrome-linux64/chrome
  46. return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
  47. def default_abspath_handler(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
  48. assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
  49. # already loaded, return abspath from cache
  50. if bin_name in self._browser_abspaths:
  51. return self._browser_abspaths[bin_name]
  52. # first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
  53. matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
  54. if matching_bins:
  55. newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
  56. self._browser_abspaths[bin_name] = newest_bin
  57. return newest_bin
  58. return None
  59. def default_install_handler(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
  60. """npx @puppeteer/browsers install chrome@stable"""
  61. self.setup()
  62. assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
  63. if not self.INSTALLER_BIN_ABSPATH:
  64. raise Exception(
  65. f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
  66. )
  67. packages = packages or self.get_packages(bin_name)
  68. assert packages, f"No packages specified for installation of {bin_name}"
  69. # print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
  70. install_args = [*self.puppeteer_install_args, "--path", str(self.puppeteer_browsers_dir)]
  71. proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
  72. if proc.returncode != 0:
  73. print(proc.stdout.strip())
  74. print(proc.stderr.strip())
  75. raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
  76. # [email protected] /tmp/test3/lib/x86_64-linux/browsers/chrome/linux-129.0.6668.91/chrome-linux64/chrome
  77. # [email protected] /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
  78. # /data/lib/aarch64-linux/browsers/chrome/linux-129.0.6668.100/chrome-linux64/chrome
  79. relpath = proc.stdout.strip().split(str(self.puppeteer_browsers_dir))[-1].split('\n', 1)[0]
  80. abspath = self.puppeteer_browsers_dir / relpath
  81. if os.path.isfile(abspath) and os.access(abspath, os.X_OK):
  82. self._browser_abspaths[bin_name] = abspath
  83. return abspath
  84. return (proc.stderr.strip() + "\n" + proc.stdout.strip()).strip()
  85. PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
  86. # ALTERNATIVE INSTALL METHOD using Ansible:
  87. # install_playbook = self.plugin_dir / 'install_puppeteer.yml'
  88. # chrome_bin = run_playbook(install_playbook, data_dir=DATA_DIR, quiet=quiet).BINARIES.chrome
  89. # return self.__class__.model_validate(
  90. # {
  91. # **self.model_dump(),
  92. # "loaded_abspath": chrome_bin.symlink,
  93. # "loaded_version": chrome_bin.version,
  94. # "loaded_binprovider": env,
  95. # "binproviders_supported": self.binproviders_supported,
  96. # }
  97. # )