base_hook.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. __package__ = 'archivebox.plugantic'
  2. import inspect
  3. from huey.api import TaskWrapper
  4. from pathlib import Path
  5. from typing import List, Literal, ClassVar
  6. from pydantic import BaseModel, ConfigDict
  7. HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE']
  8. hook_type_names: List[HookType] = ['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE']
  9. class BaseHook(BaseModel):
  10. """
  11. A Plugin consists of a list of Hooks, applied to django.conf.settings when AppConfig.read() -> Plugin.register() is called.
  12. Plugin.register() then calls each Hook.register() on the provided settings.
  13. each Hook.regsiter() function (ideally pure) takes a django.conf.settings as input and returns a new one back.
  14. or
  15. it modifies django.conf.settings in-place to add changes corresponding to its HookType.
  16. e.g. for a HookType.CONFIG, the Hook.register() function places the hook in settings.CONFIG (and settings.HOOKS)
  17. An example of an impure Hook would be a CHECK that modifies settings but also calls django.core.checks.register(check).
  18. In practice any object that subclasses BaseHook and provides a .register() function can behave as a Hook.
  19. setup_django() -> imports all settings.INSTALLED_APPS...
  20. # django imports AppConfig, models, migrations, admins, etc. for all installed apps
  21. # django then calls AppConfig.ready() on each installed app...
  22. pkg_plugins.npm.NpmPlugin().AppConfig.ready() # called by django
  23. pkg_plugins.npm.NpmPlugin().register(settings) ->
  24. pkg_plugins.npm.NpmConfigSet().register(settings)
  25. plugantic.base_configset.BaseConfigSet().register(settings)
  26. plugantic.base_hook.BaseHook().register(settings, parent_plugin=pkg_plugins.npm.NpmPlugin())
  27. ...
  28. ...
  29. Both core ArchiveBox code and plugin code depend on python >= 3.10 and django >= 5.0 w/ sqlite and a filesystem.
  30. Core ArchiveBox code can depend only on python and the pip libraries it ships with, and can never depend on plugin code / node / other binaries.
  31. Plugin code can depend on archivebox core, other django apps, other pip libraries, and other plugins.
  32. Plugins can provide BinProviders + Binaries which can depend on arbitrary other binaries / package managers like curl / wget / yt-dlp / etc.
  33. The execution interface between plugins is simply calling builtinplugins.npm.... functions directly, django handles
  34. importing all plugin code. There is no need to manually register methods/classes, only register to call
  35. impure setup functions or provide runtime state.
  36. settings.CONFIGS / settings.BINPROVIDERS / settings.BINARIES /... etc. are reserved for dynamic runtime state only.
  37. This state is exposed to the broader system in a flat namespace, e.g. CONFIG.IS_DOCKER=True, or BINARIES = [
  38. ..., Binary('node', abspath='/usr/local/bin/node', version='22.2.0'), ...
  39. ]
  40. """
  41. model_config = ConfigDict(
  42. extra="allow",
  43. arbitrary_types_allowed=True,
  44. from_attributes=True,
  45. populate_by_name=True,
  46. validate_defaults=True,
  47. validate_assignment=False,
  48. revalidate_instances="subclass-instances",
  49. ignored_types=(TaskWrapper, ),
  50. )
  51. hook_type: ClassVar[HookType] # e.g. = 'CONFIG'
  52. # verbose_name: str = Field()
  53. _is_registered: bool = False
  54. _is_ready: bool = False
  55. @property
  56. def id(self) -> str:
  57. return self.__class__.__name__
  58. @property
  59. def hook_module(self) -> str:
  60. """e.g. extractor_plugins.singlefile.apps.SinglefileConfigSet"""
  61. return f'{self.__module__}.{self.__class__.__name__}'
  62. @property
  63. def hook_file(self) -> Path:
  64. """e.g. extractor_plugins.singlefile.apps.SinglefileConfigSet"""
  65. return Path(inspect.getfile(self.__class__))
  66. @property
  67. def plugin_module(self) -> str:
  68. """e.g. extractor_plugins.singlefile"""
  69. return f"{self.__module__}.{self.__class__.__name__}".split("archivebox.", 1)[-1].rsplit(".apps.", 1)[0]
  70. @property
  71. def plugin_dir(self) -> Path:
  72. return Path(inspect.getfile(self.__class__)).parent.resolve()
  73. @property
  74. def admin_url(self) -> str:
  75. # e.g. /admin/environment/config/LdapConfig/
  76. return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
  77. def register(self, settings, parent_plugin=None):
  78. """Load a record of an installed hook into global Django settings.HOOKS at runtime."""
  79. self._plugin = parent_plugin # for debugging only, never rely on this!
  80. # assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
  81. # record installed hook in settings.HOOKS
  82. settings.HOOKS[self.id] = self
  83. if settings.HOOKS[self.id]._is_registered:
  84. raise Exception(f"Tried to run {self.hook_module}.register() but its already been called!")
  85. settings.HOOKS[self.id]._is_registered = True
  86. # print("REGISTERED HOOK:", self.hook_module)
  87. def ready(self, settings):
  88. """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
  89. assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
  90. if settings.HOOKS[self.id]._is_ready:
  91. raise Exception(f"Tried to run {self.hook_module}.ready() but its already been called!")
  92. settings.HOOKS[self.id]._is_ready = True