plugins.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. __package__ = 'archivebox.plugantic'
  2. from typing import List
  3. from typing_extensions import Self
  4. from pydantic import (
  5. BaseModel,
  6. ConfigDict,
  7. Field,
  8. model_validator,
  9. validate_call,
  10. SerializeAsAny,
  11. )
  12. from .binaries import (
  13. Binary,
  14. WgetBinary,
  15. YtdlpBinary,
  16. )
  17. from .extractors import (
  18. Extractor,
  19. YtdlpExtractor,
  20. WgetExtractor,
  21. WarcExtractor,
  22. )
  23. from .replayers import (
  24. Replayer,
  25. MEDIA_REPLAYER,
  26. )
  27. from .configs import (
  28. ConfigSet,
  29. WGET_CONFIG,
  30. )
  31. class Plugin(BaseModel):
  32. model_config = ConfigDict(arbitrary_types_allowed=True, extra='ignore', populate_by_name=True)
  33. name: str = Field(default='baseplugin') # e.g. media
  34. description: str = Field(default='') # e.g. get media using yt-dlp
  35. configs: List[SerializeAsAny[ConfigSet]] = Field(default=[])
  36. binaries: List[SerializeAsAny[Binary]] = Field(default=[]) # e.g. [Binary(name='yt-dlp')]
  37. extractors: List[SerializeAsAny[Extractor]] = Field(default=[])
  38. replayers: List[SerializeAsAny[Replayer]] = Field(default=[])
  39. @model_validator(mode='after')
  40. def validate(self):
  41. self.description = self.description or self.name
  42. @validate_call
  43. def install(self) -> Self:
  44. new_binaries = []
  45. for idx, binary in enumerate(self.binaries):
  46. new_binaries.append(binary.install() or binary)
  47. return self.model_copy(update={
  48. 'binaries': new_binaries,
  49. })
  50. @validate_call
  51. def load(self, cache=True) -> Self:
  52. new_binaries = []
  53. for idx, binary in enumerate(self.binaries):
  54. new_binaries.append(binary.load(cache=cache) or binary)
  55. return self.model_copy(update={
  56. 'binaries': new_binaries,
  57. })
  58. @validate_call
  59. def load_or_install(self, cache=True) -> Self:
  60. new_binaries = []
  61. for idx, binary in enumerate(self.binaries):
  62. new_binaries.append(binary.load_or_install(cache=cache) or binary)
  63. return self.model_copy(update={
  64. 'binaries': new_binaries,
  65. })
  66. class YtdlpPlugin(Plugin):
  67. name: str = 'ytdlp'
  68. configs: List[SerializeAsAny[ConfigSet]] = []
  69. binaries: List[SerializeAsAny[Binary]] = [YtdlpBinary()]
  70. extractors: List[SerializeAsAny[Extractor]] = [YtdlpExtractor()]
  71. replayers: List[SerializeAsAny[Replayer]] = [MEDIA_REPLAYER]
  72. class WgetPlugin(Plugin):
  73. name: str = 'wget'
  74. configs: List[SerializeAsAny[ConfigSet]] = [*WGET_CONFIG]
  75. binaries: List[SerializeAsAny[Binary]] = [WgetBinary()]
  76. extractors: List[SerializeAsAny[Extractor]] = [WgetExtractor(), WarcExtractor()]
  77. YTDLP_PLUGIN = YtdlpPlugin()
  78. WGET_PLUGIN = WgetPlugin()
  79. PLUGINS = [
  80. YTDLP_PLUGIN,
  81. WGET_PLUGIN,
  82. ]
  83. LOADED_PLUGINS = PLUGINS
  84. import json
  85. for plugin in PLUGINS:
  86. try:
  87. json.dumps(plugin.model_json_schema(), indent=4)
  88. # print(json.dumps(plugin.model_json_schema(), indent=4))
  89. except Exception as err:
  90. print(f'Failed to generate JSON schema for {plugin.name}')
  91. raise
  92. # print('-------------------------------------BEFORE INSTALL---------------------------------')
  93. # for plugin in PLUGINS:
  94. # print(plugin.model_dump_json(indent=4))
  95. # print('-------------------------------------DURING LOAD/INSTALL---------------------------------')
  96. # for plugin in PLUGINS:
  97. # LOADED_PLUGINS.append(plugin.install())
  98. # print('-------------------------------------AFTER INSTALL---------------------------------')
  99. # for plugin in LOADED_PLUGINS:
  100. # print(plugin.model_dump_json(indent=4))