Browse Source

add new pydantic_settings based loader for ConfigSets

Nick Sweeting 1 năm trước cách đây
mục cha
commit
b6cfeb8d40

+ 2 - 3
archivebox/builtin_plugins/chrome/apps.py

@@ -1,7 +1,6 @@
 import platform
 from pathlib import Path
-from typing import List, Optional, Dict, Any
-from typing_extensions import Self
+from typing import List, Optional, Dict, ClassVar
 
 from django.conf import settings
 
@@ -79,7 +78,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
 
 
 class ChromeDependencyConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+    section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
 
     CHROME_BINARY: str = Field(default='chrome')
     CHROME_ARGS: Optional[List[str]] = Field(default=None)

+ 2 - 2
archivebox/builtin_plugins/pip/apps.py

@@ -2,7 +2,7 @@ import os
 import sys
 import inspect
 from pathlib import Path
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, ClassVar
 from pydantic import InstanceOf, Field
 
 import django
@@ -23,7 +23,7 @@ from plugantic.base_hook import BaseHook
 
 
 class PipDependencyConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+    section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
 
     USE_PIP: bool = True
     PIP_BINARY: str = Field(default='pip')

+ 5 - 5
archivebox/builtin_plugins/singlefile/apps.py

@@ -1,7 +1,7 @@
 __package__ = 'archivebox.builtin_plugins.singlefile'
 
 from pathlib import Path
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, ClassVar
 from typing_extensions import Self
 
 from django.conf import settings
@@ -25,13 +25,13 @@ from builtin_plugins.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
 ###################### Config ##########################
 
 class SinglefileToggleConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'ARCHIVE_METHOD_TOGGLES'
+    section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_TOGGLES'
 
     SAVE_SINGLEFILE: bool = True
 
 
 class SinglefileOptionsConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'ARCHIVE_METHOD_OPTIONS'
+    section: ClassVar[ConfigSectionName] = 'ARCHIVE_METHOD_OPTIONS'
 
     # loaded from shared config
     SINGLEFILE_USER_AGENT: str = Field(default='', alias='USER_AGENT')
@@ -42,7 +42,7 @@ class SinglefileOptionsConfigs(BaseConfigSet):
 
 
 class SinglefileDependencyConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+    section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
 
     SINGLEFILE_BINARY: str = Field(default='wget')
     SINGLEFILE_ARGS: Optional[List[str]] = Field(default=None)
@@ -50,7 +50,7 @@ class SinglefileDependencyConfigs(BaseConfigSet):
     SINGLEFILE_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
 
 class SinglefileConfigs(SinglefileToggleConfigs, SinglefileOptionsConfigs, SinglefileDependencyConfigs):
-    # section: ConfigSectionName = 'ALL_CONFIGS'
+    # section: ClassVar[ConfigSectionName] = 'ALL_CONFIGS'
     pass
 
 DEFAULT_GLOBAL_CONFIG = {

+ 2 - 2
archivebox/builtin_plugins/ytdlp/apps.py

@@ -1,4 +1,4 @@
-from typing import List, Dict
+from typing import List, Dict, ClassVar
 from subprocess import run, PIPE
 from pydantic import InstanceOf, Field
 
@@ -16,7 +16,7 @@ from builtin_plugins.pip.apps import pip
 
 
 class YtdlpDependencyConfigs(BaseConfigSet):
-    section: ConfigSectionName = 'DEPENDENCY_CONFIG'
+    section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
 
     USE_YTDLP: bool = True
 

+ 160 - 10
archivebox/plugantic/base_configset.py

@@ -1,36 +1,186 @@
 __package__ = 'archivebox.plugantic'
 
 
-from typing import List, Literal
+from pathlib import Path
+from typing import List, Literal, Type, Tuple, Callable, ClassVar
 
-from .base_hook import BaseHook, HookType
-from ..config_stubs import AttrDict
+from benedict import benedict
+from pydantic import model_validator, TypeAdapter
+from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
+from pydantic_settings.sources import TomlConfigSettingsSource
+
+from django.conf import settings
 
+from .base_hook import BaseHook, HookType
+from . import ini_to_toml
 
 ConfigSectionName = Literal[
+    'SHELL_CONFIG',
     'GENERAL_CONFIG',
+    'SERVER_CONFIG',
     'ARCHIVE_METHOD_TOGGLES',
     'ARCHIVE_METHOD_OPTIONS',
+    'SEARCH_BACKEND_CONFIG',
     'DEPENDENCY_CONFIG',
 ]
 ConfigSectionNames: List[ConfigSectionName] = [
+    'SHELL_CONFIG',
     'GENERAL_CONFIG',
+    'SERVER_CONFIG',
     'ARCHIVE_METHOD_TOGGLES',
     'ARCHIVE_METHOD_OPTIONS',
+    'SEARCH_BACKEND_CONFIG',
     'DEPENDENCY_CONFIG',
 ]
 
-
-class BaseConfigSet(BaseHook):
-    hook_type: HookType = 'CONFIG'
-
-    section: ConfigSectionName = 'GENERAL_CONFIG'
+class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
+    """
+    A source class that loads variables from a TOML file
+    """
+
+    def __init__(
+        self,
+        settings_cls: type[BaseSettings],
+        toml_file: Path | None=None,
+    ):
+        self.toml_file_path = toml_file or settings_cls.model_config.get("toml_file")
+        
+        self.nested_toml_data = self._read_files(self.toml_file_path)
+        self.toml_data = {}
+        for section_name, section in self.nested_toml_data.items():
+            if section_name in ConfigSectionNames and isinstance(section, dict):
+                # value is nested, flatten it
+                for key, value in section.items():
+                    self.toml_data[key] = value
+            else:
+                # value is already flat, just set it as-is
+                self.toml_data[section_name] = section
+                
+        # filter toml_data to only include keys that are defined on the settings_cls
+        self.toml_data = {
+            key: value
+            for key, value in self.toml_data.items()
+            if key in settings_cls.model_fields
+        }
+            
+        super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
+
+
+class ArchiveBoxBaseConfig(BaseSettings):
+    """
+    This is the base class for an ArchiveBox ConfigSet.
+    It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
+
+    class WgetConfig(ArchiveBoxBaseConfig):
+        WGET_BINARY: str = Field(default='wget', alias='WGET_BINARY_PATH')
+
+    c = WgetConfig()
+    print(c.WGET_BINARY)                    # outputs: wget
+
+    # you can mutate process environment variable and reload config using .__init__()
+    os.environ['WGET_BINARY_PATH'] = 'wget2'
+    c.__init__()
+
+    print(c.WGET_BINARY)                    # outputs: wget2
+
+    """
+    
+    # these pydantic config options are all VERY carefully chosen, make sure to test thoroughly before changing!!!
+    model_config = SettingsConfigDict(
+        validate_default=False,
+        case_sensitive=True,
+        extra="ignore",
+        arbitrary_types_allowed=False,
+        populate_by_name=True,
+        from_attributes=True,
+        loc_by_alias=False,
+        validate_assignment=True,
+        validate_return=True,
+        revalidate_instances="always",
+    )
+
+    @classmethod
+    def settings_customise_sources(
+        cls,
+        settings_cls: Type[BaseSettings],
+        init_settings: PydanticBaseSettingsSource,
+        env_settings: PydanticBaseSettingsSource,
+        dotenv_settings: PydanticBaseSettingsSource,
+        file_secret_settings: PydanticBaseSettingsSource,
+    ) -> Tuple[PydanticBaseSettingsSource, ...]:
+        """Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
+        
+        ARCHIVEBOX_CONFIG_FILE = settings.DATA_DIR / "ArchiveBox.conf"
+        ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
+        
+        # import ipdb; ipdb.set_trace()
+        
+        # if ArchiveBox.conf does not exist yet, return defaults -> env order
+        if not ARCHIVEBOX_CONFIG_FILE.is_file():
+            return (
+                init_settings,
+                env_settings,
+            )
+        
+        # if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order
+        try:
+            return (
+                init_settings,
+                FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
+                env_settings,
+            )
+        except Exception as err:
+            if err.__class__.__name__ != "TOMLDecodeError":
+                raise
+            # if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order
+
+            # Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak
+            original_ini = ARCHIVEBOX_CONFIG_FILE.read_text()
+            ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini)
+            new_toml = ini_to_toml.convert(original_ini)
+            ARCHIVEBOX_CONFIG_FILE.write_text(new_toml)
+
+            return (
+                init_settings,
+                FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
+                env_settings,
+            )
+
+    @model_validator(mode="after")
+    def fill_defaults(self):
+        """Populate any unset values using function provided as their default"""
+
+        for key, field in self.model_fields.items():
+            config_so_far = self.model_dump()
+            value = getattr(self, key)
+            if isinstance(value, Callable):
+                # if value is a function, execute it to get the actual value, passing existing config as a dict arg
+                fallback_value = field.default(config_so_far)
+
+                # check to make sure default factory return value matches type annotation
+                TypeAdapter(field.annotation).validate_python(fallback_value)
+
+                # set generated default value as final validated value
+                setattr(self, key, fallback_value)
+        return self
+
+class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook):      # type: ignore[type-arg]
+    hook_type: ClassVar[HookType] = 'CONFIG'
+
+    section: ClassVar[ConfigSectionName] = 'GENERAL_CONFIG'
 
     def register(self, settings, parent_plugin=None):
         # self._plugin = parent_plugin                                      # for debugging only, never rely on this!
 
-        settings.CONFIGS = getattr(settings, "CONFIGS", None) or AttrDict({})
-        settings.CONFIGS[self.id] = self
+        settings.FLAT_CONFIG = getattr(settings, "FLAT_CONFIG", None) or benedict({})
+        settings.CONFIGS = getattr(settings, "CONFIGS", None) or benedict({})
+        
+        # pass FLAT_CONFIG so far into our config model to load it
+        loaded_config = self.__class__(**settings.FLAT_CONFIG)
+        # then dump our parsed config back into FLAT_CONFIG for the next plugin to use
+        settings.FLAT_CONFIG.merge(loaded_config.model_dump())
+        
+        settings.CONFIGS[self.id] = loaded_config
 
         super().register(settings, parent_plugin=parent_plugin)
 

+ 29 - 1
pdm.lock

@@ -5,7 +5,7 @@
 groups = ["default", "all", "ldap", "sonic"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:d7c9e7a40b0a794986eb3f6a3774d5003c9b39985411f63c1aa387dda9986ada"
+content_hash = "sha256:6b062624538c5dfe6b1bd5be32546fef02b70ee73c4a1710a8eea9764bdd21d8"
 
 [[metadata.targets]]
 requires_python = "==3.11.*"
@@ -1147,6 +1147,22 @@ files = [
     {file = "pydantic_pkgr-0.3.5.tar.gz", hash = "sha256:36444778d53d5cbdc261086fda0d65fb519a072105f5d1c7d88e224bd197dd1d"},
 ]
 
+[[package]]
+name = "pydantic-settings"
+version = "2.5.2"
+requires_python = ">=3.8"
+summary = "Settings management using Pydantic"
+groups = ["default"]
+marker = "python_version == \"3.11\""
+dependencies = [
+    "pydantic>=2.7.0",
+    "python-dotenv>=0.21.0",
+]
+files = [
+    {file = "pydantic_settings-2.5.2-py3-none-any.whl", hash = "sha256:2c912e55fd5794a59bf8c832b9de832dcfdf4778d79ff79b708744eed499a907"},
+    {file = "pydantic_settings-2.5.2.tar.gz", hash = "sha256:f90b139682bee4d2065273d5185d71d37ea46cfe57e1b5ae184fc6a0b2484ca0"},
+]
+
 [[package]]
 name = "pygments"
 version = "2.18.0"
@@ -1277,6 +1293,18 @@ files = [
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
 ]
 
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+requires_python = ">=3.8"
+summary = "Read key-value pairs from a .env file and set them as environment variables"
+groups = ["default"]
+marker = "python_version == \"3.11\""
+files = [
+    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
+    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
+]
+
 [[package]]
 name = "python-fsutil"
 version = "0.14.1"

+ 1 - 0
pyproject.toml

@@ -84,6 +84,7 @@ dependencies = [
     "base32-crockford==0.3.0",
     ############# Extractor Dependencies #############
     "yt-dlp>=2024.8.6",               # for: media
+    "pydantic-settings>=2.5.2",
 ]
 
 # pdm lock --group=':all' 

+ 2 - 0
requirements.txt

@@ -76,6 +76,7 @@ pycryptodomex==3.20.0; python_version == "3.11"
 pydantic==2.9.2; python_version == "3.11"
 pydantic-core==2.23.4; python_version == "3.11"
 pydantic-pkgr==0.3.5; python_version == "3.11"
+pydantic-settings==2.5.2; python_version == "3.11"
 pygments==2.18.0; python_version == "3.11"
 pyopenssl==24.2.1; python_version == "3.11"
 python-benedict[html,toml,xls,xml,yaml]==0.33.2; python_version == "3.11"
@@ -83,6 +84,7 @@ python-benedict[io,parse]==0.33.2; python_version == "3.11"
 python-benedict[xml]==0.33.2; python_version == "3.11"
 python-crontab==3.2.0; python_version == "3.11"
 python-dateutil==2.9.0.post0; python_version == "3.11"
+python-dotenv==1.0.1; python_version == "3.11"
 python-fsutil==0.14.1; python_version == "3.11"
 python-ldap==3.4.4; python_version == "3.11"
 python-slugify==8.0.4; python_version == "3.11"