binproviders.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. __package__ = 'plugins_pkg.playwright'
  2. import os
  3. import platform
  4. from pathlib import Path
  5. from typing import List, Optional, Dict, ClassVar
  6. from pydantic import computed_field, Field
  7. from pydantic_pkgr import (
  8. BinName,
  9. BinProviderName,
  10. BinProviderOverrides,
  11. InstallArgs,
  12. PATHStr,
  13. HostBinPath,
  14. bin_abspath,
  15. OPERATING_SYSTEM,
  16. DEFAULT_ENV_PATH,
  17. )
  18. from archivebox.config import CONSTANTS
  19. from abx.archivebox.base_binary import BaseBinProvider, env
  20. from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER
  21. from .binaries import PLAYWRIGHT_BINARY
  22. MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright")
  23. LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright")
  24. class PlaywrightBinProvider(BaseBinProvider):
  25. name: BinProviderName = "playwright"
  26. INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
  27. PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
  28. playwright_browsers_dir: Path = (
  29. MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
  30. if OPERATING_SYSTEM == "darwin" else
  31. LINUX_PLAYWRIGHT_CACHE_DIR.expanduser()
  32. )
  33. playwright_install_args: List[str] = ["install"]
  34. packages_handler: BinProviderOverrides = Field(default={
  35. "chrome": ["chromium"],
  36. }, exclude=True)
  37. _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
  38. @computed_field
  39. @property
  40. def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
  41. return PLAYWRIGHT_BINARY.load().abspath
  42. def setup(self) -> None:
  43. # update paths from config if they arent the default
  44. from archivebox.config.common import STORAGE_CONFIG
  45. if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
  46. self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
  47. assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
  48. if self.playwright_browsers_dir:
  49. self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True)
  50. def installed_browser_bins(self, browser_name: str = "*") -> List[Path]:
  51. if browser_name == 'chrome':
  52. browser_name = 'chromium'
  53. # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
  54. if platform.system().lower() == "darwin":
  55. # ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium
  56. return sorted(
  57. self.playwright_browsers_dir.glob(
  58. f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*"
  59. )
  60. )
  61. # ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
  62. paths = []
  63. for path in sorted(self.playwright_browsers_dir.glob(f"{browser_name}-*/*-linux/*")):
  64. if 'xdg-settings' in str(path):
  65. continue
  66. if 'ffmpeg' in str(path):
  67. continue
  68. if '/chrom' in str(path) and 'chrom' in path.name.lower():
  69. paths.append(path)
  70. return paths
  71. def default_abspath_handler(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
  72. assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
  73. # already loaded, return abspath from cache
  74. if bin_name in self._browser_abspaths:
  75. return self._browser_abspaths[bin_name]
  76. # first time loading, find browser in self.playwright_browsers_dir by searching filesystem for installed binaries
  77. matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
  78. if matching_bins:
  79. newest_bin = matching_bins[-1] # already sorted alphabetically, last should theoretically be highest version number
  80. self._browser_abspaths[bin_name] = newest_bin
  81. return self._browser_abspaths[bin_name]
  82. # playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well
  83. abspath = bin_abspath('google-chrome-stable', PATH=env.PATH)
  84. if abspath:
  85. self._browser_abspaths[bin_name] = abspath
  86. return self._browser_abspaths[bin_name]
  87. return None
  88. def default_install_handler(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
  89. """playwright install chrome"""
  90. self.setup()
  91. assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
  92. if not self.INSTALLER_BIN_ABSPATH:
  93. raise Exception(
  94. f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
  95. )
  96. packages = packages or self.get_packages(bin_name)
  97. # print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
  98. # playwright install-deps (to install system dependencies like fonts, graphics libraries, etc.)
  99. if platform.system().lower() != 'darwin':
  100. # libglib2.0-0, libnss3, libnspr4, libdbus-1-3, libatk1.0-0, libatk-bridge2.0-0, libcups2, libdrm2, libxcb1, libxkbcommon0, libatspi2.0-0, libx11-6, libxcomposite1, libxdamage1, libxext6, libxfixes3, libxrandr2, libgbm1, libcairo2, libasound2
  101. proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=['install-deps'])
  102. if proc.returncode != 0:
  103. print(proc.stdout.strip())
  104. print(proc.stderr.strip())
  105. proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=['install', *packages])
  106. if proc.returncode != 0:
  107. print(proc.stdout.strip())
  108. print(proc.stderr.strip())
  109. raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages} PACKAGES={packages}")
  110. # [email protected] /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
  111. # playwright build v1010 downloaded to /home/squash/.cache/ms-playwright/ffmpeg-1010
  112. output_lines = [
  113. line for line in proc.stdout.strip().split('\n')
  114. if '/chrom' in line
  115. and 'chrom' in line.rsplit('/', 1)[-1].lower() # if final path segment (filename) contains chrome or chromium
  116. and 'xdg-settings' not in line
  117. and 'ffmpeg' not in line
  118. ]
  119. if output_lines:
  120. relpath = output_lines[0].split(str(self.playwright_browsers_dir))[-1]
  121. abspath = self.playwright_browsers_dir / relpath
  122. if os.path.isfile(abspath) and os.access(abspath, os.X_OK):
  123. self._browser_abspaths[bin_name] = abspath
  124. return (proc.stderr.strip() + "\n" + proc.stdout.strip()).strip()
  125. PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider()