apps.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import platform
  2. from pathlib import Path
  3. from typing import List, Optional, Dict, ClassVar
  4. from django.conf import settings
  5. # Depends on other PyPI/vendor packages:
  6. from pydantic import InstanceOf, Field
  7. from pydantic_pkgr import (
  8. BinProvider,
  9. BinName,
  10. BinProviderName,
  11. ProviderLookupDict,
  12. bin_abspath,
  13. )
  14. # Depends on other Django apps:
  15. from plugantic.base_plugin import BasePlugin
  16. from plugantic.base_configset import BaseConfigSet, ConfigSectionName
  17. from plugantic.base_binary import BaseBinary, env
  18. # from plugantic.base_extractor import BaseExtractor
  19. # from plugantic.base_queue import BaseQueue
  20. from plugantic.base_hook import BaseHook
  21. # Depends on Other Plugins:
  22. from pkg_plugins.puppeteer.apps import PUPPETEER_BINPROVIDER
  23. from pkg_plugins.playwright.apps import PLAYWRIGHT_BINPROVIDER
  24. CHROMIUM_BINARY_NAMES_LINUX = [
  25. "chromium",
  26. "chromium-browser",
  27. "chromium-browser-beta",
  28. "chromium-browser-unstable",
  29. "chromium-browser-canary",
  30. "chromium-browser-dev",
  31. ]
  32. CHROMIUM_BINARY_NAMES_MACOS = ["/Applications/Chromium.app/Contents/MacOS/Chromium"]
  33. CHROMIUM_BINARY_NAMES = CHROMIUM_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_MACOS
  34. CHROME_BINARY_NAMES_LINUX = [
  35. "google-chrome",
  36. "google-chrome-stable",
  37. "google-chrome-beta",
  38. "google-chrome-canary",
  39. "google-chrome-unstable",
  40. "google-chrome-dev",
  41. "chrome"
  42. ]
  43. CHROME_BINARY_NAMES_MACOS = [
  44. "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
  45. "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
  46. ]
  47. CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
  48. def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
  49. for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
  50. abspath = bin_abspath(bin_name, PATH=env.PATH)
  51. if abspath:
  52. return abspath
  53. return None
  54. def create_macos_app_symlink(target: Path, shortcut: Path):
  55. """
  56. on macOS, some binaries are inside of .app, so we need to
  57. create a tiny bash script instead of a symlink
  58. (so that ../ parent relationships are relative to original .app instead of callsite dir)
  59. """
  60. # TODO: should we enforce this? is it useful in any other situation?
  61. # if platform.system().lower() != 'darwin':
  62. # raise Exception(...)
  63. shortcut.write_text(f"""#!/usr/bin/env bash\nexec '{target}' "$@"\n""")
  64. shortcut.chmod(0o777) # make sure its executable by everyone
  65. ###################### Config ##########################
  66. class ChromeDependencyConfigs(BaseConfigSet):
  67. section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
  68. CHROME_BINARY: str = Field(default='chrome')
  69. CHROME_ARGS: Optional[List[str]] = Field(default=None)
  70. CHROME_EXTRA_ARGS: List[str] = []
  71. CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
  72. # def load(self) -> Self:
  73. # # for each field in the model, load its value
  74. # # load from each source in order of precedence (lowest to highest):
  75. # # - schema default
  76. # # - ArchiveBox.conf INI file
  77. # # - environment variables
  78. # # - command-line arguments
  79. # LOADED_VALUES: Dict[str, Any] = {}
  80. # for field_name, field in self.__fields__.items():
  81. # def_value = field.default_factory() if field.default_factory else field.default
  82. # ini_value = settings.INI_CONFIG.get_value(field_name)
  83. # env_value = settings.ENV_CONFIG.get_value(field_name)
  84. # cli_value = settings.CLI_CONFIG.get_value(field_name)
  85. # run_value = settings.RUN_CONFIG.get_value(field_name)
  86. # value = run_value or cli_value or env_value or ini_value or def_value
  87. class ChromeConfigs(ChromeDependencyConfigs):
  88. # section: ConfigSectionName = 'ALL_CONFIGS'
  89. pass
  90. DEFAULT_GLOBAL_CONFIG = {
  91. }
  92. CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
  93. class ChromeBinary(BaseBinary):
  94. name: BinName = CHROME_CONFIG.CHROME_BINARY
  95. binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
  96. provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
  97. env.name: {
  98. 'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH), # /usr/bin/google-chrome-stable
  99. },
  100. PUPPETEER_BINPROVIDER.name: {
  101. 'packages': lambda: ['chrome@stable'], # npx @puppeteer/browsers install chrome@stable
  102. },
  103. PLAYWRIGHT_BINPROVIDER.name: {
  104. 'packages': lambda: ['chromium'], # playwright install chromium
  105. },
  106. }
  107. @staticmethod
  108. def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
  109. if not (binary.abspath and binary.abspath.exists()):
  110. return
  111. bin_dir.mkdir(parents=True, exist_ok=True)
  112. symlink = bin_dir / binary.name
  113. if platform.system().lower() == 'darwin':
  114. # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
  115. create_macos_app_symlink(binary.abspath, symlink)
  116. else:
  117. # otherwise on linux we can symlink directly to binary executable
  118. symlink.symlink_to(binary.abspath)
  119. CHROME_BINARY = ChromeBinary()
  120. PLUGIN_BINARIES = [CHROME_BINARY]
  121. class ChromePlugin(BasePlugin):
  122. app_label: str = 'chrome'
  123. verbose_name: str = 'Chrome Browser'
  124. hooks: List[InstanceOf[BaseHook]] = [
  125. CHROME_CONFIG,
  126. CHROME_BINARY,
  127. ]
  128. PLUGIN = ChromePlugin()
  129. PLUGIN.register(settings)
  130. DJANGO_APP = PLUGIN.AppConfig