apps.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. __package__ = 'archivebox.plugins_extractor.singlefile'
  2. from pathlib import Path
  3. from typing import List, Dict, Optional, ClassVar
  4. # from typing_extensions import Self
  5. # Depends on other PyPI/vendor packages:
  6. from pydantic import InstanceOf, Field, validate_call
  7. from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
  8. # Depends on other Django apps:
  9. from abx.archivebox.base_plugin import BasePlugin
  10. from abx.archivebox.base_configset import BaseConfigSet
  11. from abx.archivebox.base_binary import BaseBinary, env
  12. from abx.archivebox.base_extractor import BaseExtractor
  13. from abx.archivebox.base_queue import BaseQueue
  14. from abx.archivebox.base_hook import BaseHook
  15. # Depends on Other Plugins:
  16. from archivebox.config import ARCHIVING_CONFIG
  17. from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
  18. ###################### Config ##########################
  19. class SinglefileConfig(BaseConfigSet):
  20. SAVE_SINGLEFILE: bool = True
  21. SINGLEFILE_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)
  22. SINGLEFILE_TIMEOUT: int = Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
  23. SINGLEFILE_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
  24. SINGLEFILE_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE)
  25. SINGLEFILE_BINARY: str = Field(default='single-file')
  26. SINGLEFILE_EXTRA_ARGS: List[str] = []
  27. SINGLEFILE_CONFIG = SinglefileConfig()
  28. SINGLEFILE_MIN_VERSION = '1.1.54'
  29. SINGLEFILE_MAX_VERSION = '1.1.60'
  30. class SinglefileBinary(BaseBinary):
  31. name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY
  32. binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
  33. provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
  34. env.name: {
  35. 'abspath': lambda:
  36. bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH)
  37. or bin_abspath('single-file', PATH=env.PATH)
  38. or bin_abspath('single-file-node.js', PATH=env.PATH),
  39. },
  40. LIB_NPM_BINPROVIDER.name: {
  41. "abspath": lambda:
  42. bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH)
  43. or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH)
  44. or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH),
  45. "packages": lambda:
  46. [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
  47. },
  48. SYS_NPM_BINPROVIDER.name: {
  49. "packages": lambda:
  50. [], # prevent modifying system global npm packages
  51. },
  52. }
  53. def install(self, binprovider_name: Optional[BinProviderName]=None, **kwargs) -> ShallowBinary:
  54. # force install to only use lib/npm provider, we never want to modify global NPM packages
  55. return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name, **kwargs)
  56. def load_or_install(self, binprovider_name: Optional[BinProviderName]=None, fresh=False, **kwargs) -> ShallowBinary:
  57. try:
  58. return self.load(fresh=fresh)
  59. except Exception:
  60. # force install to only use lib/npm provider, we never want to modify global NPM packages
  61. return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name, **kwargs)
  62. SINGLEFILE_BINARY = SinglefileBinary()
  63. PLUGIN_BINARIES = [SINGLEFILE_BINARY]
  64. class SinglefileExtractor(BaseExtractor):
  65. name: str = 'singlefile'
  66. binary: BinName = SINGLEFILE_BINARY.name
  67. def get_output_path(self, snapshot) -> Path:
  68. return Path(snapshot.link_dir) / 'singlefile.html'
  69. SINGLEFILE_BINARY = SinglefileBinary()
  70. SINGLEFILE_EXTRACTOR = SinglefileExtractor()
  71. class SinglefileQueue(BaseQueue):
  72. name: str = 'singlefile'
  73. binaries: List[InstanceOf[BaseBinary]] = [SINGLEFILE_BINARY]
  74. SINGLEFILE_QUEUE = SinglefileQueue()
  75. class SinglefilePlugin(BasePlugin):
  76. app_label: str ='singlefile'
  77. verbose_name: str = 'SingleFile'
  78. hooks: List[InstanceOf[BaseHook]] = [
  79. SINGLEFILE_CONFIG,
  80. SINGLEFILE_BINARY,
  81. SINGLEFILE_EXTRACTOR,
  82. SINGLEFILE_QUEUE,
  83. ]
  84. PLUGIN = SinglefilePlugin()
  85. # PLUGIN.register(settings)
  86. DJANGO_APP = PLUGIN.AppConfig