binaries.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. __package__ = 'plugins_extractor.chrome'
  2. import os
  3. import platform
  4. from pathlib import Path
  5. from typing import List, Optional
  6. from pydantic import InstanceOf
  7. from pydantic_pkgr import (
  8. BinProvider,
  9. BinName,
  10. BinaryOverrides,
  11. bin_abspath,
  12. )
  13. from abx.archivebox.base_binary import BaseBinary, env, apt, brew
  14. # Depends on Other Plugins:
  15. from archivebox.config import CONSTANTS
  16. from archivebox.config.common import SHELL_CONFIG
  17. from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
  18. from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
  19. from .config import CHROME_CONFIG
  20. CHROMIUM_BINARY_NAMES_LINUX = [
  21. "chromium",
  22. "chromium-browser",
  23. "chromium-browser-beta",
  24. "chromium-browser-unstable",
  25. "chromium-browser-canary",
  26. "chromium-browser-dev",
  27. ]
  28. CHROMIUM_BINARY_NAMES_MACOS = ["/Applications/Chromium.app/Contents/MacOS/Chromium"]
  29. CHROMIUM_BINARY_NAMES = CHROMIUM_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_MACOS
  30. CHROME_BINARY_NAMES_LINUX = [
  31. "google-chrome",
  32. "google-chrome-stable",
  33. "google-chrome-beta",
  34. "google-chrome-canary",
  35. "google-chrome-unstable",
  36. "google-chrome-dev",
  37. "chrome"
  38. ]
  39. CHROME_BINARY_NAMES_MACOS = [
  40. "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
  41. "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
  42. ]
  43. CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
  44. APT_DEPENDENCIES = [
  45. 'apt-transport-https', 'at-spi2-common', 'chromium-browser',
  46. 'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
  47. 'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
  48. 'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
  49. 'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
  50. ]
  51. def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
  52. for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
  53. abspath = bin_abspath(bin_name, PATH=env.PATH)
  54. if abspath:
  55. return abspath
  56. return None
  57. def create_macos_app_symlink(target: Path, shortcut: Path):
  58. """
  59. on macOS, some binaries are inside of .app, so we need to
  60. create a tiny bash script instead of a symlink
  61. (so that ../ parent relationships are relative to original .app instead of callsite dir)
  62. """
  63. # TODO: should we enforce this? is it useful in any other situation?
  64. # if platform.system().lower() != 'darwin':
  65. # raise Exception(...)
  66. shortcut.unlink(missing_ok=True)
  67. shortcut.write_text(f"""#!/usr/bin/env bash\nexec '{target}' "$@"\n""")
  68. shortcut.chmod(0o777) # make sure its executable by everyone
  69. ###################### Config ##########################
  70. class ChromeBinary(BaseBinary):
  71. name: BinName = CHROME_CONFIG.CHROME_BINARY
  72. binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew]
  73. overrides: BinaryOverrides = {
  74. env.name: {
  75. 'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH), # /usr/bin/google-chrome-stable
  76. },
  77. PUPPETEER_BINPROVIDER.name: {
  78. 'packages': ['chrome@stable'], # npx @puppeteer/browsers install chrome@stable
  79. },
  80. PLAYWRIGHT_BINPROVIDER.name: {
  81. 'packages': ['chromium'], # playwright install chromium
  82. },
  83. apt.name: {
  84. 'packages': APT_DEPENDENCIES,
  85. },
  86. brew.name: {
  87. 'packages': ['--cask', 'chromium'],
  88. },
  89. }
  90. @staticmethod
  91. def symlink_to_lib(binary, bin_dir=None) -> None:
  92. from archivebox.config.common import STORAGE_CONFIG
  93. bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
  94. if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
  95. return
  96. bin_dir.mkdir(parents=True, exist_ok=True)
  97. symlink = bin_dir / binary.name
  98. try:
  99. if platform.system().lower() == 'darwin':
  100. # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
  101. create_macos_app_symlink(binary.abspath, symlink)
  102. else:
  103. # otherwise on linux we can symlink directly to binary executable
  104. symlink.unlink(missing_ok=True)
  105. symlink.symlink_to(binary.abspath)
  106. except Exception as err:
  107. # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
  108. # not actually needed, we can just run without it
  109. pass
  110. @staticmethod
  111. def chrome_cleanup_lockfile():
  112. """
  113. Cleans up any state or runtime files that chrome leaves behind when killed by
  114. a timeout or other error
  115. """
  116. lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
  117. if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
  118. lock_file.unlink()
  119. if CHROME_CONFIG.CHROME_USER_DATA_DIR:
  120. if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
  121. lock_file.unlink()
  122. CHROME_BINARY = ChromeBinary()