Преглед изворни кода

split puppeteer plugin into Puppeteer, Playwright, and Chrome

Nick Sweeting пре 1 година
родитељ
комит
541cd6c5a1

+ 0 - 0
archivebox/builtin_plugins/chrome/__init__.py


+ 132 - 0
archivebox/builtin_plugins/chrome/apps.py

@@ -0,0 +1,132 @@
+import platform
+from pathlib import Path
+from typing import List, Optional, Dict
+
+from django.conf import settings
+
+# Depends on other PyPI/vendor packages:
+from pydantic import InstanceOf, Field
+from pydantic_pkgr import (
+    BinProvider,
+    BinName,
+    BinProviderName,
+    ProviderLookupDict,
+    bin_abspath,
+)
+
+# Depends on other Django apps:
+from plugantic.base_plugin import BasePlugin
+from plugantic.base_configset import BaseConfigSet, ConfigSectionName
+from plugantic.base_binary import BaseBinary, env
+# from plugantic.base_extractor import BaseExtractor
+# from plugantic.base_queue import BaseQueue
+from plugantic.base_hook import BaseHook
+
+# Depends on Other Plugins:
+from builtin_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER
+from builtin_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER
+
+
+CHROMIUM_BINARY_NAMES = [
+    "chromium",
+    "chromium-browser",
+    "chromium-browser-beta",
+    "chromium-browser-unstable",
+    "chromium-browser-canary",
+    "chromium-browser-dev",
+    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+]
+CHROME_BINARY_NAMES = [
+    "google-chrome",
+    "google-chrome-stable",
+    "google-chrome-beta",
+    "google-chrome-canary",
+    "google-chrome-unstable",
+    "google-chrome-dev",
+    # 'chrome',
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
+]
+
+
+def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
+    for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
+        abspath = bin_abspath(bin_name, PATH=env.PATH)
+        if abspath:
+            return abspath
+    return None
+
+###################### Config ##########################
+
+
+class ChromeDependencyConfigs(BaseConfigSet):
+    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+
+    CHROME_BINARY: str = Field(default='wget')
+    CHROME_ARGS: Optional[List[str]] = Field(default=None)
+    CHROME_EXTRA_ARGS: List[str] = []
+    CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
+
+class ChromeConfigs(ChromeDependencyConfigs):
+    # section: ConfigSectionName = 'ALL_CONFIGS'
+    pass
+
+DEFAULT_GLOBAL_CONFIG = {
+}
+
+CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
+
+
+class ChromeBinary(BaseBinary):
+    name: BinName = 'chrome'
+    binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
+    
+    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
+        env.name: {
+            'abspath': lambda:
+                autodetect_system_chrome_install(PATH=env.PATH),
+        },
+        PUPPETEER_BINPROVIDER.name: {
+            'packages': lambda:
+                ['chrome@stable'],
+        },
+        PLAYWRIGHT_BINPROVIDER.name: {
+            'packages': lambda:
+                ['chromium'],
+        },
+    }
+
+    @staticmethod
+    def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
+        if not (binary.abspath and binary.abspath.exists()):
+            return
+        bin_dir.mkdir(parents=True, exist_ok=True)
+        symlink = bin_dir / binary.name
+        
+        if platform.system().lower() == 'darwin':
+            # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
+            symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""")
+            symlink.chmod(0o777)   # make sure its executable by everyone
+        else:
+            # otherwise on linux we can symlink directly to binary executable
+            symlink.symlink_to(binary.abspath)
+
+
+CHROME_BINARY = ChromeBinary()
+
+PLUGIN_BINARIES = [CHROME_BINARY]
+
+class ChromePlugin(BasePlugin):
+    app_label: str ='puppeteer'
+    verbose_name: str = 'Chrome & Playwright'
+
+    hooks: List[InstanceOf[BaseHook]] = [
+        CHROME_CONFIG,
+        CHROME_BINARY,
+    ]
+
+
+
+PLUGIN = ChromePlugin()
+PLUGIN.register(settings)
+DJANGO_APP = PLUGIN.AppConfig

+ 10 - 9
archivebox/builtin_plugins/npm/apps.py

@@ -4,12 +4,12 @@ from pathlib import Path
 from typing import List, Optional
 
 from django.conf import settings
-from pydantic import InstanceOf, Field
+from pydantic import InstanceOf
 
 from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
 
 from plugantic.base_plugin import BasePlugin
-from plugantic.base_configset import BaseConfigSet, ConfigSectionName
+from plugantic.base_configset import BaseConfigSet
 from plugantic.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
 from plugantic.base_hook import BaseHook
 
@@ -20,13 +20,14 @@ from ...config import CONFIG
 
 
 class NpmDependencyConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+    # section: ConfigSectionName = 'DEPENDENCY_CONFIG'
 
-    USE_NPM: bool = True
-    NPM_BINARY: str = Field(default='npm')
-    NPM_ARGS: Optional[List[str]] = Field(default=None)
-    NPM_EXTRA_ARGS: List[str] = []
-    NPM_DEFAULT_ARGS: List[str] = []
+    # USE_NPM: bool = True
+    # NPM_BINARY: str = Field(default='npm')
+    # NPM_ARGS: Optional[List[str]] = Field(default=None)
+    # NPM_EXTRA_ARGS: List[str] = []
+    # NPM_DEFAULT_ARGS: List[str] = []
+    pass
 
 
 DEFAULT_GLOBAL_CONFIG = {
@@ -35,7 +36,7 @@ NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
 
 
 class SystemNpmProvider(NpmProvider, BaseBinProvider):
-    name: BinProviderName = "npm"
+    name: BinProviderName = "sys_npm"
     PATH: PATHStr = str(CONFIG.NODE_BIN_PATH)
     
     npm_prefix: Optional[Path] = None

+ 36 - 17
archivebox/builtin_plugins/pip/apps.py

@@ -30,6 +30,7 @@ class PipDependencyConfigs(BaseConfigSet):
     PIP_ARGS: Optional[List[str]] = Field(default=None)
     PIP_EXTRA_ARGS: List[str] = []
     PIP_DEFAULT_ARGS: List[str] = []
+    
 
 
 DEFAULT_GLOBAL_CONFIG = {
@@ -37,15 +38,27 @@ DEFAULT_GLOBAL_CONFIG = {
 PIP_CONFIG = PipDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
 
 class SystemPipBinProvider(PipProvider, BaseBinProvider):
-    name: BinProviderName = "pip"
+    name: BinProviderName = "sys_pip"
     INSTALLER_BIN: BinName = "pip"
     
     pip_venv: Optional[Path] = None        # global pip scope
     
+    def on_install(self, bin_name: str, **kwargs):
+        # never modify system pip packages
+        return 'refusing to install packages globally with system pip, use a venv instead'
 
 class SystemPipxBinProvider(PipProvider, BaseBinProvider):
     name: BinProviderName = "pipx"
     INSTALLER_BIN: BinName = "pipx"
+    
+    pip_venv: Optional[Path] = None        # global pipx scope
+
+
+class VenvPipBinProvider(PipProvider, BaseBinProvider):
+    name: BinProviderName = "venv_pip"
+    INSTALLER_BIN: BinName = "pip"
+
+    pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv')
 
 
 class LibPipBinProvider(PipProvider, BaseBinProvider):
@@ -55,7 +68,8 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
     pip_venv: Optional[Path] = settings.CONFIG.OUTPUT_DIR / 'lib' / 'pip' / 'venv'
 
 SYS_PIP_BINPROVIDER = SystemPipBinProvider()
-SYS_PIPX_BINPROVIDER = SystemPipxBinProvider()
+PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
+VENV_PIP_BINPROVIDER = VenvPipBinProvider()
 LIB_PIP_BINPROVIDER = LibPipBinProvider()
 pip = LIB_PIP_BINPROVIDER
 
@@ -64,7 +78,7 @@ pip = LIB_PIP_BINPROVIDER
 class PythonBinary(BaseBinary):
     name: BinName = 'python'
 
-    binproviders_supported: List[InstanceOf[BinProvider]] = [SYS_PIP_BINPROVIDER, apt, brew, env]
+    binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
     provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
         SYS_PIP_BINPROVIDER.name: {
             'abspath': lambda:
@@ -78,13 +92,15 @@ PYTHON_BINARY = PythonBinary()
 
 class SqliteBinary(BaseBinary):
     name: BinName = 'sqlite'
-    binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER])
-    provider_overrides:  Dict[BinProviderName, ProviderLookupDict] = {
+    binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
+    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
+        VENV_PIP_BINPROVIDER.name: {
+            "abspath": lambda: Path(inspect.getfile(django_sqlite3)),
+            "version": lambda: SemVer(django_sqlite3.version),
+        },
         SYS_PIP_BINPROVIDER.name: {
-            'abspath': lambda:
-                Path(inspect.getfile(django_sqlite3)),
-            'version': lambda:
-                SemVer(django_sqlite3.version),
+            "abspath": lambda: Path(inspect.getfile(django_sqlite3)),
+            "version": lambda: SemVer(django_sqlite3.version),
         },
     }
 
@@ -94,13 +110,15 @@ SQLITE_BINARY = SqliteBinary()
 class DjangoBinary(BaseBinary):
     name: BinName = 'django'
 
-    binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[SYS_PIP_BINPROVIDER])
-    provider_overrides:  Dict[BinProviderName, ProviderLookupDict] = {
+    binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
+    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
+        VENV_PIP_BINPROVIDER.name: {
+            "abspath": lambda: inspect.getfile(django),
+            "version": lambda: django.VERSION[:3],
+        },
         SYS_PIP_BINPROVIDER.name: {
-            'abspath': lambda:
-                inspect.getfile(django),
-            'version': lambda:
-                django.VERSION[:3],
+            "abspath": lambda: inspect.getfile(django),
+            "version": lambda: django.VERSION[:3],
         },
     }
 
@@ -108,7 +126,7 @@ DJANGO_BINARY = DjangoBinary()
 
 class PipBinary(BaseBinary):
     name: BinName = "pip"
-    binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
+    binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
 
 
 PIP_BINARY = PipBinary()
@@ -164,7 +182,8 @@ class PipPlugin(BasePlugin):
     hooks: List[InstanceOf[BaseHook]] = [
         PIP_CONFIG,
         SYS_PIP_BINPROVIDER,
-        SYS_PIPX_BINPROVIDER,
+        PIPX_PIP_BINPROVIDER,
+        VENV_PIP_BINPROVIDER,
         LIB_PIP_BINPROVIDER,
         PIP_BINARY,
         PYTHON_BINARY,

+ 0 - 0
archivebox/builtin_plugins/playwright/__init__.py


+ 182 - 0
archivebox/builtin_plugins/playwright/apps.py

@@ -0,0 +1,182 @@
+import platform
+from pathlib import Path
+from typing import List, Optional, Dict, ClassVar
+
+from django.conf import settings
+
+# Depends on other PyPI/vendor packages:
+from pydantic import InstanceOf, computed_field, Field
+from pydantic_pkgr import (
+    BinName,
+    BinProvider,
+    BinProviderName,
+    ProviderLookupDict,
+    InstallArgs,
+    PATHStr,
+    HostBinPath,
+    bin_abspath,
+    OPERATING_SYSTEM,
+    DEFAULT_ENV_PATH,
+)
+
+# Depends on other Django apps:
+from plugantic.base_plugin import BasePlugin
+from plugantic.base_configset import BaseConfigSet
+from plugantic.base_binary import BaseBinary, BaseBinProvider, env
+# from plugantic.base_extractor import BaseExtractor
+# from plugantic.base_queue import BaseQueue
+from plugantic.base_hook import BaseHook
+
+# Depends on Other Plugins:
+from builtin_plugins.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
+
+
+###################### Config ##########################
+
+
+class PlaywrightConfigs(BaseConfigSet):
+    # section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+
+    # PLAYWRIGHT_BINARY: str = Field(default='wget')
+    # PLAYWRIGHT_ARGS: Optional[List[str]] = Field(default=None)
+    # PLAYWRIGHT_EXTRA_ARGS: List[str] = []
+    # PLAYWRIGHT_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
+    pass
+
+DEFAULT_GLOBAL_CONFIG = {
+}
+
+PLAYWRIGHT_CONFIG = PlaywrightConfigs(**DEFAULT_GLOBAL_CONFIG)
+
+LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
+
+
+
+class PlaywrightBinary(BaseBinary):
+    name: BinName = "playwright"
+
+    binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
+    
+
+
+PLAYWRIGHT_BINARY = PlaywrightBinary()
+
+
+class PlaywrightBinProvider(BaseBinProvider):
+    name: BinProviderName = "playwright"
+    INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
+
+    PATH: PATHStr = f"{settings.CONFIG.BIN_DIR}:{DEFAULT_ENV_PATH}"
+
+    puppeteer_browsers_dir: Optional[Path] = (
+        Path("~/Library/Caches/ms-playwright").expanduser()
+        if OPERATING_SYSTEM == "darwin" else
+        Path("~/.cache/ms-playwright").expanduser()
+    )
+    puppeteer_install_args: List[str] = ["install"]  # --with-deps
+
+    packages_handler: ProviderLookupDict = Field(default={
+        "chrome": lambda: ["chromium"],
+    }, exclude=True)
+
+    _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
+
+    @computed_field
+    @property
+    def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
+        return PLAYWRIGHT_BINARY.load().abspath
+
+    def setup(self) -> None:
+        assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
+
+        if self.puppeteer_browsers_dir:
+            self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
+
+    def installed_browser_bins(self, browser_name: str = "*") -> List[Path]:
+        if browser_name == 'chrome':
+            browser_name = 'chromium'
+        
+        # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
+        if platform.system().lower() == "darwin":
+            # ~/Library/caches/ms-playwright/chromium-1097/chrome-mac/Chromium.app/Contents/MacOS/Chromium
+            return sorted(
+                self.puppeteer_browsers_dir.glob(
+                    f"{browser_name}-*/*-mac*/*.app/Contents/MacOS/*"
+                )
+            )
+
+        # ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
+        return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}-*/*-linux/*"))
+
+    def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
+        assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
+
+        # already loaded, return abspath from cache
+        if bin_name in self._browser_abspaths:
+            return self._browser_abspaths[bin_name]
+
+        # first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
+        matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
+        if matching_bins:
+            newest_bin = matching_bins[-1]  # already sorted alphabetically, last should theoretically be highest version number
+            self._browser_abspaths[bin_name] = newest_bin
+            return self._browser_abspaths[bin_name]
+        
+        # playwright sometimes installs google-chrome-stable via apt into system $PATH, check there as well
+        abspath = bin_abspath('google-chrome-stable', PATH=env.PATH)
+        if abspath:
+            self._browser_abspaths[bin_name] = abspath
+            return self._browser_abspaths[bin_name]
+
+        return None
+
+    def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
+        """playwright install chrome"""
+        self.setup()
+        assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
+
+        if not self.INSTALLER_BIN_ABSPATH:
+            raise Exception(
+                f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
+            )
+        packages = packages or self.on_get_packages(bin_name)
+
+        # print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
+
+        install_args = [*self.puppeteer_install_args]
+
+        proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
+
+        if proc.returncode != 0:
+            print(proc.stdout.strip())
+            print(proc.stderr.strip())
+            raise Exception(f"{self.__class__.__name__}: install got returncode {proc.returncode} while installing {packages}: {packages}")
+
+        # [email protected] /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
+        output_info = proc.stdout.strip().split("\n")[-1]
+        browser_abspath = output_info.split(" ", 1)[-1]
+        # browser_version = output_info.split('@', 1)[-1].split(' ', 1)[0]
+
+        self._browser_abspaths[bin_name] = Path(browser_abspath)
+
+        return proc.stderr.strip() + "\n" + proc.stdout.strip()
+
+PLAYWRIGHT_BINPROVIDER = PlaywrightBinProvider()
+
+
+
+class PlaywrightPlugin(BasePlugin):
+    app_label: str = 'playwright'
+    verbose_name: str = 'Playwright'
+
+    hooks: List[InstanceOf[BaseHook]] = [
+        PLAYWRIGHT_CONFIG,
+        PLAYWRIGHT_BINPROVIDER,
+        PLAYWRIGHT_BINARY,
+    ]
+
+
+
+PLUGIN = PlaywrightPlugin()
+PLUGIN.register(settings)
+DJANGO_APP = PLUGIN.AppConfig

+ 50 - 94
archivebox/builtin_plugins/puppeteer/apps.py

@@ -6,33 +6,38 @@ from django.conf import settings
 
 # Depends on other PyPI/vendor packages:
 from pydantic import InstanceOf, Field
-from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, InstallArgs, HostBinPath, bin_abspath
+from pydantic_pkgr import (
+    BinProvider,
+    BinName,
+    BinProviderName,
+    ProviderLookupDict,
+    InstallArgs,
+    PATHStr,
+    HostBinPath,
+)
 
 # Depends on other Django apps:
 from plugantic.base_plugin import BasePlugin
-from plugantic.base_configset import BaseConfigSet, ConfigSectionName
+from plugantic.base_configset import BaseConfigSet
 from plugantic.base_binary import BaseBinary, BaseBinProvider, env
 # from plugantic.base_extractor import BaseExtractor
 # from plugantic.base_queue import BaseQueue
 from plugantic.base_hook import BaseHook
 
 # Depends on Other Plugins:
-from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER
+from builtin_plugins.npm.apps import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
 
 
 ###################### Config ##########################
 
 
-class PuppeteerDependencyConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+class PuppeteerConfigs(BaseConfigSet):
+    # section: ConfigSectionName = 'DEPENDENCY_CONFIG'
 
-    PUPPETEER_BINARY: str = Field(default='wget')
-    PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
-    PUPPETEER_EXTRA_ARGS: List[str] = []
-    PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
-
-class PuppeteerConfigs(PuppeteerDependencyConfigs):
-    # section: ConfigSectionName = 'ALL_CONFIGS'
+    # PUPPETEER_BINARY: str = Field(default='wget')
+    # PUPPETEER_ARGS: Optional[List[str]] = Field(default=None)
+    # PUPPETEER_EXTRA_ARGS: List[str] = []
+    # PUPPETEER_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
     pass
 
 DEFAULT_GLOBAL_CONFIG = {
@@ -42,17 +47,29 @@ PUPPETEER_CONFIG = PuppeteerConfigs(**DEFAULT_GLOBAL_CONFIG)
 
 LIB_DIR_BROWSERS = settings.CONFIG.OUTPUT_DIR / "lib" / "browsers"
 
+
+class PuppeteerBinary(BaseBinary):
+    name: BinName = "puppeteer"
+
+    binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
+
+
+PUPPETEER_BINARY = PuppeteerBinary()
+
+
 class PuppeteerBinProvider(BaseBinProvider):
     name: BinProviderName = "puppeteer"
     INSTALLER_BIN: BinName = "npx"
+    
+    PATH: PATHStr = str(settings.CONFIG.BIN_DIR)
 
     puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
     puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
 
-    # packages_handler: ProviderLookupDict = {
-    #     "chrome": lambda:
-    #         ['chrome@stable'],
-    # }
+    packages_handler: ProviderLookupDict = Field(default={
+        "chrome": lambda:
+            ['chrome@stable'],
+    }, exclude=True)
     
     _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
     
@@ -61,6 +78,15 @@ class PuppeteerBinProvider(BaseBinProvider):
         
         if self.puppeteer_browsers_dir:
             self.puppeteer_browsers_dir.mkdir(parents=True, exist_ok=True)
+    
+    def installed_browser_bins(self, browser_name: str='*') -> List[Path]:
+        # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
+        if platform.system().lower() == 'darwin':
+            # /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
+            return sorted(self.puppeteer_browsers_dir.glob(f'{browser_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
+
+        # /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
+        return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
 
     def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
         assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
@@ -70,21 +96,13 @@ class PuppeteerBinProvider(BaseBinProvider):
             return self._browser_abspaths[bin_name]
         
         # first time loading, find browser in self.puppeteer_browsers_dir by searching filesystem for installed binaries
-        browsers_present = [d.name for d in self.puppeteer_browsers_dir.glob("*")]
-        if bin_name in browsers_present:
-            candidates = []
-            # if on macOS, browser binary is inside a .app, otherwise it's just a plain binary
-            if platform.system().lower() == 'darwin':
-                # /data/lib/browsers/chrome/mac_arm-129.0.6668.58/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing
-                candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/mac*/chrome*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing'))
-            else:
-                # /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
-                candidates = sorted(self.puppeteer_browsers_dir.glob(f'/{bin_name}/linux*/chrome*/chrome'))
-            if candidates:
-                self._browser_abspaths[bin_name] = candidates[-1]
-                return self._browser_abspaths[bin_name]
+        matching_bins = [abspath for abspath in self.installed_browser_bins() if bin_name in str(abspath)]
+        if matching_bins:
+            newest_bin = matching_bins[-1]  # already sorted alphabetically, last should theoretically be highest version number
+            self._browser_abspaths[bin_name] = newest_bin
+            return self._browser_abspaths[bin_name]
         
-        return super().on_get_abspath(bin_name, **context)
+        return None
 
     def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
         """npx @puppeteer/browsers install chrome@stable"""
@@ -119,64 +137,6 @@ class PuppeteerBinProvider(BaseBinProvider):
 
 PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
 
-CHROMIUM_BINARY_NAMES = [
-    'chromium',
-    'chromium-browser',
-    'chromium-browser-beta',
-    'chromium-browser-unstable',
-    'chromium-browser-canary',
-    'chromium-browser-dev'   
-    '/Applications/Chromium.app/Contents/MacOS/Chromium',
-]
-CHROME_BINARY_NAMES = [
-    'google-chrome',
-    'google-chrome-stable',
-    'google-chrome-beta',
-    'google-chrome-canary',
-    'google-chrome-unstable',
-    'google-chrome-dev',
-    # 'chrome',
-    '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
-    '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
-]
-
-def autodetect_system_chrome_install(PATH=None):
-    for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
-        abspath = bin_abspath(bin_name, PATH=env.PATH)
-        if abspath:
-            return abspath
-    return None
-
-class ChromeBinary(BaseBinary):
-    name: BinName = 'chrome'
-    binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env]
-    
-    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
-        env.name: {
-            'abspath': lambda:
-                autodetect_system_chrome_install(PATH=env.PATH),
-        },
-        PUPPETEER_BINPROVIDER.name: {
-            'packages': lambda:
-                ['chrome@stable'],
-        }
-    }
-
-    @staticmethod
-    def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
-        if not (binary.abspath and binary.abspath.exists()):
-            return
-        bin_dir.mkdir(parents=True, exist_ok=True)
-        symlink = bin_dir / binary.name
-        
-        if platform.system().lower() == 'darwin':
-            # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
-            symlink.write_text(f"""#!/usr/bin/env bash\nexec '{binary.abspath}' "$@"\n""")
-            symlink.chmod(0o777)   # make sure its executable by everyone
-        else:
-            # otherwise on linux we can symlink directly to binary executable
-            symlink.symlink_to(binary.abspath)
-
 
 # ALTERNATIVE INSTALL METHOD using Ansible:
 # install_playbook = self.plugin_dir / 'install_puppeteer.yml'
@@ -192,18 +152,14 @@ class ChromeBinary(BaseBinary):
 # )
 
 
-CHROME_BINARY = ChromeBinary()
-
-PLUGIN_BINARIES = [CHROME_BINARY]
-
 class PuppeteerPlugin(BasePlugin):
     app_label: str ='puppeteer'
-    verbose_name: str = 'SingleFile'
+    verbose_name: str = 'Puppeteer & Playwright'
 
     hooks: List[InstanceOf[BaseHook]] = [
         PUPPETEER_CONFIG,
         PUPPETEER_BINPROVIDER,
-        CHROME_BINARY,
+        PUPPETEER_BINARY,
     ]
 
 

+ 1 - 1
archivebox/plugantic/base_hook.py

@@ -4,7 +4,7 @@ import inspect
 from huey.api import TaskWrapper
 
 from pathlib import Path
-from typing import List, Literal
+from typing import List, Literal, ClassVar
 from pydantic import BaseModel, ConfigDict, Field, computed_field
 
 

+ 2 - 2
archivebox/plugantic/views.py

@@ -70,7 +70,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
         "Provided By": [],
         "Found Abspath": [],
         "Related Configuration": [],
-        "Overrides": [],
+        # "Overrides": [],
         # "Description": [],
     }
 
@@ -109,7 +109,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
             )))
             # if not binary.provider_overrides:
                 # import ipdb; ipdb.set_trace()
-            rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
+            # rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
             # rows['Description'].append(binary.description)
 
     return TableContext(

+ 1 - 1
pyproject.toml

@@ -77,7 +77,7 @@ dependencies = [
     ############# VENDORED LIBS ######################
     # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian)
     # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py
-    "pydantic-pkgr>=0.3.0",
+    "pydantic-pkgr>=0.3.2",
     "atomicwrites==1.4.1",
     "pocket@git+https://github.com/tapanpandita/[email protected]",
     "django-taggit==1.3.0",