Browse Source

fix LIB_DIR and TMP_DIR loading when primary option isnt available

Nick Sweeting 1 year ago
parent
commit
a211461ffc

+ 8 - 4
archivebox/abx/archivebox/base_binary.py

@@ -37,7 +37,8 @@ class BaseBinary(Binary):
 
 
     @staticmethod
     @staticmethod
     def symlink_to_lib(binary, bin_dir=None) -> None:
     def symlink_to_lib(binary, bin_dir=None) -> None:
-        bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR
+        from archivebox.config.common import STORAGE_CONFIG
+        bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
         
         
         if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
         if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
             return
             return
@@ -55,9 +56,10 @@ class BaseBinary(Binary):
         
         
     @validate_call
     @validate_call
     def load(self, fresh=False, **kwargs) -> Self:
     def load(self, fresh=False, **kwargs) -> Self:
+        from archivebox.config.common import STORAGE_CONFIG
         if fresh:
         if fresh:
             binary = super().load(**kwargs)
             binary = super().load(**kwargs)
-            self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
+            self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
         else:
         else:
             # get cached binary from db
             # get cached binary from db
             try:
             try:
@@ -72,16 +74,18 @@ class BaseBinary(Binary):
     
     
     @validate_call
     @validate_call
     def install(self, **kwargs) -> Self:
     def install(self, **kwargs) -> Self:
+        from archivebox.config.common import STORAGE_CONFIG
         binary = super().install(**kwargs)
         binary = super().install(**kwargs)
-        self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
+        self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
         return binary
         return binary
     
     
     @validate_call
     @validate_call
     def load_or_install(self, fresh=False, **kwargs) -> Self:
     def load_or_install(self, fresh=False, **kwargs) -> Self:
+        from archivebox.config.common import STORAGE_CONFIG
         try:
         try:
             binary = self.load(fresh=fresh)
             binary = self.load(fresh=fresh)
             if binary and binary.version:
             if binary and binary.version:
-                self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
+                self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
                 return binary
                 return binary
         except Exception:
         except Exception:
             pass
             pass

+ 128 - 31
archivebox/abx/archivebox/base_configset.py

@@ -1,8 +1,13 @@
 __package__ = 'abx.archivebox'
 __package__ = 'abx.archivebox'
 
 
 import os
 import os
+import sys
+import re
 from pathlib import Path
 from pathlib import Path
-from typing import Type, Tuple, Callable, ClassVar
+from typing import Type, Tuple, Callable, ClassVar, Dict, Any
+
+import toml
+from rich import print
 
 
 from benedict import benedict
 from benedict import benedict
 from pydantic import model_validator, TypeAdapter
 from pydantic import model_validator, TypeAdapter
@@ -18,6 +23,11 @@ from . import toml_util
 PACKAGE_DIR = Path(__file__).resolve().parent.parent
 PACKAGE_DIR = Path(__file__).resolve().parent.parent
 DATA_DIR = Path(os.getcwd()).resolve()
 DATA_DIR = Path(os.getcwd()).resolve()
 
 
+ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
+ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
+
+AUTOFIXES_HEADER = "[AUTOFIXES]"
+AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:"
 
 
 
 
 class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
 class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
@@ -53,7 +63,7 @@ class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
         super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
         super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
 
 
 
 
-class ArchiveBoxBaseConfig(BaseSettings):
+class BaseConfigSet(BaseSettings):
     """
     """
     This is the base class for an ArchiveBox ConfigSet.
     This is the base class for an ArchiveBox ConfigSet.
     It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
     It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
@@ -83,7 +93,7 @@ class ArchiveBoxBaseConfig(BaseSettings):
         loc_by_alias=False,
         loc_by_alias=False,
         validate_assignment=True,
         validate_assignment=True,
         validate_return=True,
         validate_return=True,
-        revalidate_instances="always",
+        revalidate_instances="subclass-instances",
     )
     )
     
     
     load_from_defaults: ClassVar[bool] = True
     load_from_defaults: ClassVar[bool] = True
@@ -101,9 +111,6 @@ class ArchiveBoxBaseConfig(BaseSettings):
     ) -> Tuple[PydanticBaseSettingsSource, ...]:
     ) -> Tuple[PydanticBaseSettingsSource, ...]:
         """Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
         """Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
         
         
-        ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
-        ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
-        
         # import ipdb; ipdb.set_trace()
         # import ipdb; ipdb.set_trace()
         
         
         precedence_order = {}
         precedence_order = {}
@@ -152,27 +159,36 @@ class ArchiveBoxBaseConfig(BaseSettings):
     def fill_defaults(self):
     def fill_defaults(self):
         """Populate any unset values using function provided as their default"""
         """Populate any unset values using function provided as their default"""
 
 
-        for key, field in self.model_fields.items():
-            value = getattr(self, key)
-            
-            if isinstance(value, Callable):
-                # if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
-                if func_takes_args_or_kwargs(value):
-                    # assemble dict of existing field values to pass to default factory functions
-                    config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
-                    computed_default = field.default(config_so_far)
-                else:
-                    # otherwise it's a pure function with no args, just call it
-                    computed_default = field.default()
+        for key in self.model_fields.keys():
+            if isinstance(getattr(self, key), Callable):
+                if self.load_from_defaults:
+                    computed_default = self.get_default_value(key)
+                    # set generated default value as final validated value
+                    setattr(self, key, computed_default)
+        return self
+    
+    def get_default_value(self, key):
+        """Get the default value for a given config key"""
+        field = self.model_fields[key]
+        value = getattr(self, key)
+    
+        if isinstance(value, Callable):
+            # if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
+            if func_takes_args_or_kwargs(value):
+                # assemble dict of existing field values to pass to default factory functions
+                config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
+                computed_default = field.default(config_so_far)
+            else:
+                # otherwise it's a pure function with no args, just call it
+                computed_default = field.default()
 
 
-                # coerce/check to make sure default factory return value matches type annotation
-                TypeAdapter(field.annotation).validate_python(computed_default)
+            # coerce/check to make sure default factory return value matches type annotation
+            TypeAdapter(field.annotation).validate_python(computed_default)
 
 
-                # set generated default value as final validated value
-                setattr(self, key, computed_default)
-        return self
+            return computed_default
+        return value
     
     
-    def update_in_place(self, warn=True, **kwargs):
+    def update_in_place(self, warn=True, persist=False, hint='', **kwargs):
         """
         """
         Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
         Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
         Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
         Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
@@ -180,25 +196,106 @@ class ArchiveBoxBaseConfig(BaseSettings):
         Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
         Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
         SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
         SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
         """
         """
+        from archivebox.misc.toml_util import CustomTOMLEncoder
+        
         if warn:
         if warn:
-            print('[!] WARNING: Some of the provided user config values cannot be used, temporarily ignoring them:')
+            fix_scope = 'in ArchiveBox.conf' if persist else 'just for current run'
+            print(f'[yellow]:warning:  WARNING: Some config cannot be used as-is, fixing automatically {fix_scope}:[/yellow] {hint}', file=sys.stderr)
+        
+        # set the new values in the environment
         for key, value in kwargs.items():
         for key, value in kwargs.items():
             os.environ[key] = str(value)
             os.environ[key] = str(value)
             original_value = getattr(self, key)
             original_value = getattr(self, key)
             if warn:
             if warn:
                 print(f'    {key}={original_value} -> {value}')
                 print(f'    {key}={original_value} -> {value}')
+        
+        # if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section
+        try:
+            if persist and ARCHIVEBOX_CONFIG_FILE.is_file():
+                autofixes_to_add = benedict(kwargs).to_toml(encoder=CustomTOMLEncoder())
+                
+                existing_config = ARCHIVEBOX_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
+                if AUTOFIXES_HEADER in existing_config:
+                    existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip()
+                else:
+                    existing_autofixes = ''
+                
+                new_config = '\n'.join(line for line in [
+                    existing_config,
+                    '\n' + AUTOFIXES_HEADER,
+                    AUTOFIXES_SUBHEADER,
+                    existing_autofixes,
+                    autofixes_to_add,
+                ] if line.strip()).strip() + '\n'
+                ARCHIVEBOX_CONFIG_FILE.write_text(new_config)
+        except Exception:
+            pass
         self.__init__()
         self.__init__()
+        if warn:
+            print(file=sys.stderr)
+            
         return self
         return self
     
     
-    def as_legacy_config_schema(self):
+    @property
+    def toml_section_header(self):
+        """Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG"""
+        class_name = self.__class__.__name__
+        return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_')
+    
+    
+    def from_defaults(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the default values"""
+        class OnlyDefaultsConfig(self.__class__):
+            load_from_defaults = True
+            load_from_configfile = False
+            load_from_environment = False
+        return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
+    
+    def from_configfile(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
+        class OnlyConfigFileConfig(self.__class__):
+            load_from_defaults = False
+            load_from_configfile = True
+            load_from_environment = False
+        return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
+    
+    def from_environment(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the environment variables"""
+        class OnlyEnvironmentConfig(self.__class__):
+            load_from_defaults = False
+            load_from_configfile = False
+            load_from_environment = True
+        return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
+    
+    def from_computed(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the computed fields"""
+        return benedict(self.model_dump(include=set(self.model_computed_fields.keys())))
+    
+
+    def to_toml_dict(self, defaults=False) -> Dict[str, Any]:
+        """Get the current config as a TOML-ready dict"""
+        config_dict = {}
+        for key, value in benedict(self).items():
+            if defaults or value != self.get_default_value(key):
+                config_dict[key] = value
+        
+        return benedict({self.toml_section_header: config_dict})
+    
+    def to_toml_str(self, defaults=False) -> str:
+        """Get the current config as a TOML string"""
+        from archivebox.misc.toml_util import CustomTOMLEncoder
+        
+        toml_dict = self.to_toml_dict(defaults=defaults)
+        if not toml_dict[self.toml_section_header]:
+            # if the section is empty, don't write it
+            toml_dict.pop(self.toml_section_header)
+        
+        return toml.dumps(toml_dict, encoder=CustomTOMLEncoder())
+    
+    def as_legacy_config_schema(self) -> Dict[str, Any]:
         # shim for backwards compatibility with old config schema style
         # shim for backwards compatibility with old config schema style
         model_values = self.model_dump()
         model_values = self.model_dump()
         return benedict({
         return benedict({
             key: {'type': field.annotation, 'default': model_values[key]}
             key: {'type': field.annotation, 'default': model_values[key]}
             for key, field in self.model_fields.items()
             for key, field in self.model_fields.items()
         })
         })
-
-
-class BaseConfigSet(ArchiveBoxBaseConfig):      # type: ignore[type-arg]
-
-    pass

+ 11 - 6
archivebox/abx/archivebox/hookspec.py

@@ -18,13 +18,7 @@ def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
 def get_CONFIG() -> Dict[str, BaseConfigSet]:
 def get_CONFIG() -> Dict[str, BaseConfigSet]:
     return {}
     return {}
 
 
-@hookspec
-def get_BINARIES() -> Dict[str, BaseBinary]:
-    return {}
 
 
-@hookspec
-def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
-    return {}
 
 
 @hookspec
 @hookspec
 def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
 def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
@@ -45,3 +39,14 @@ def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
 # @hookspec
 # @hookspec
 # def get_QUEUES():
 # def get_QUEUES():
 #     return {}
 #     return {}
+
+
+##############################################################
+# provided by abx.pydantic_pkgr.hookspec:
+# @hookspec
+# def get_BINARIES() -> Dict[str, BaseBinary]:
+#     return {}
+
+# @hookspec
+# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
+#     return {}

+ 5 - 2
archivebox/abx/archivebox/reads.py

@@ -131,9 +131,12 @@ def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
 
 
 
 
 
 
-def get_scope_config(defaults=settings.CONFIG, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
+def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
     """Get all the relevant config for the given scope, in correct precedence order"""
     """Get all the relevant config for the given scope, in correct precedence order"""
     
     
+    from django.conf import settings
+    default_config: benedict = defaults or settings.CONFIG
+    
     snapshot = snapshot or (archiveresult and archiveresult.snapshot)
     snapshot = snapshot or (archiveresult and archiveresult.snapshot)
     crawl = crawl or (snapshot and snapshot.crawl)
     crawl = crawl or (snapshot and snapshot.crawl)
     seed = seed or (crawl and crawl.seed)
     seed = seed or (crawl and crawl.seed)
@@ -147,7 +150,7 @@ def get_scope_config(defaults=settings.CONFIG, persona=None, seed=None, crawl=No
     extra_config = extra_config or {}
     extra_config = extra_config or {}
     
     
     return {
     return {
-        **defaults,                     # defaults / config file / environment variables
+        **default_config,               # defaults / config file / environment variables
         **persona_config,               # lowest precedence
         **persona_config,               # lowest precedence
         **seed_config,
         **seed_config,
         **crawl_config,
         **crawl_config,

+ 6 - 1
archivebox/cli/__init__.py

@@ -164,13 +164,18 @@ def run_subcommand(subcommand: str,
     # print('DATA_DIR is', DATA_DIR)
     # print('DATA_DIR is', DATA_DIR)
     # print('pwd is', os.getcwd())    
     # print('pwd is', os.getcwd())    
 
 
-    cmd_requires_db = subcommand in archive_cmds
+    cmd_requires_db = (subcommand in archive_cmds)
     init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
     init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
 
 
     check_db = cmd_requires_db and not init_pending
     check_db = cmd_requires_db and not init_pending
 
 
     setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
     setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
 
 
+    for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
+        if ignore_pattern in sys.argv[:4]:
+            cmd_requires_db = False
+            break
+    
     if subcommand in archive_cmds:
     if subcommand in archive_cmds:
         if cmd_requires_db:
         if cmd_requires_db:
             check_migrations()
             check_migrations()

+ 13 - 4
archivebox/config/common.py

@@ -1,18 +1,18 @@
 __package__ = 'archivebox.config'
 __package__ = 'archivebox.config'
 
 
+import os
 import sys
 import sys
 import shutil
 import shutil
-
+import tempfile
 from typing import Dict, Optional
 from typing import Dict, Optional
 from pathlib import Path
 from pathlib import Path
 
 
 from rich import print
 from rich import print
-from pydantic import Field, field_validator, computed_field
+from pydantic import Field, field_validator, computed_field, model_validator
 from django.utils.crypto import get_random_string
 from django.utils.crypto import get_random_string
 
 
 from abx.archivebox.base_configset import BaseConfigSet
 from abx.archivebox.base_configset import BaseConfigSet
 
 
-
 from .constants import CONSTANTS
 from .constants import CONSTANTS
 from .version import get_COMMIT_HASH, get_BUILD_TIME
 from .version import get_COMMIT_HASH, get_BUILD_TIME
 from .permissions import IN_DOCKER
 from .permissions import IN_DOCKER
@@ -35,7 +35,6 @@ class ShellConfig(BaseConfigSet):
     VERSIONS_AVAILABLE: bool = False             # .check_for_update.get_versions_available_on_github(c)},
     VERSIONS_AVAILABLE: bool = False             # .check_for_update.get_versions_available_on_github(c)},
     CAN_UPGRADE: bool = False                    # .check_for_update.can_upgrade(c)},
     CAN_UPGRADE: bool = False                    # .check_for_update.can_upgrade(c)},
 
 
-    
     @computed_field
     @computed_field
     @property
     @property
     def TERM_WIDTH(self) -> int:
     def TERM_WIDTH(self) -> int:
@@ -57,6 +56,16 @@ SHELL_CONFIG = ShellConfig()
 
 
 
 
 class StorageConfig(BaseConfigSet):
 class StorageConfig(BaseConfigSet):
+    # TMP_DIR must be a local, fast, readable/writable dir by archivebox user,
+    # must be a short path due to unix path length restrictions for socket files (<100 chars)
+    # must be a local SSD/tmpfs for speed and because bind mounts/network mounts/FUSE dont support unix sockets
+    TMP_DIR: Path                       = Field(default=CONSTANTS.DEFAULT_TMP_DIR)
+    
+    # LIB_DIR must be a local, fast, readable/writable dir by archivebox user,
+    # must be able to contain executable binaries (up to 5GB size)
+    # should not be a remote/network/FUSE mount for speed reasons, otherwise extractors will be slow
+    LIB_DIR: Path                       = Field(default=CONSTANTS.DEFAULT_LIB_DIR)
+    
     OUTPUT_PERMISSIONS: str             = Field(default='644')
     OUTPUT_PERMISSIONS: str             = Field(default='644')
     RESTRICT_FILE_NAMES: str            = Field(default='windows')
     RESTRICT_FILE_NAMES: str            = Field(default='windows')
     ENFORCE_ATOMIC_WRITES: bool         = Field(default=True)
     ENFORCE_ATOMIC_WRITES: bool         = Field(default=True)

+ 3 - 92
archivebox/config/constants.py

@@ -1,6 +1,5 @@
 __package__ = 'archivebox.config'
 __package__ = 'archivebox.config'
 
 
-import os
 import re
 import re
 import sys
 import sys
 
 
@@ -97,14 +96,10 @@ class ConstantsDict(Mapping):
     
     
     # Runtime dirs
     # Runtime dirs
     TMP_DIR_NAME: str                   = 'tmp'
     TMP_DIR_NAME: str                   = 'tmp'
-    TMP_DIR: Path                       = DATA_DIR / TMP_DIR_NAME / MACHINE_ID
+    DEFAULT_TMP_DIR: Path               = DATA_DIR / TMP_DIR_NAME / MACHINE_ID    # ./data/tmp/abc3244323
+    
     LIB_DIR_NAME: str                   = 'lib'
     LIB_DIR_NAME: str                   = 'lib'
-    LIB_DIR: Path                       = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE
-    LIB_PIP_DIR: Path                   = LIB_DIR / 'pip'
-    LIB_NPM_DIR: Path                   = LIB_DIR / 'npm'
-    LIB_BROWSERS_DIR: Path              = LIB_DIR / 'browsers'
-    LIB_BIN_DIR: Path                   = LIB_DIR / 'bin'
-    BIN_DIR: Path                       = LIB_BIN_DIR
+    DEFAULT_LIB_DIR: Path               = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE  # ./data/lib/arm64-linux-docker
 
 
     # Config constants
     # Config constants
     TIMEZONE: str                       = 'UTC'
     TIMEZONE: str                       = 'UTC'
@@ -198,91 +193,7 @@ class ConstantsDict(Mapping):
         ".archivebox_id",
         ".archivebox_id",
         "Dockerfile",
         "Dockerfile",
     ))
     ))
-
-    CODE_LOCATIONS = benedict({
-        'PACKAGE_DIR': {
-            'path': (PACKAGE_DIR).resolve(),
-            'enabled': True,
-            'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK),                                                                  # executable
-        },
-        'TEMPLATES_DIR': {
-            'path': TEMPLATES_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK),                                                # read + list
-        },
-        'CUSTOM_TEMPLATES_DIR': {
-            'path': CUSTOM_TEMPLATES_DIR.resolve(),
-            'enabled': os.path.isdir(CUSTOM_TEMPLATES_DIR),
-            'is_valid': os.path.isdir(CUSTOM_TEMPLATES_DIR) and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK),                                      # read
-        },
-        'USER_PLUGINS_DIR': {
-            'path': USER_PLUGINS_DIR.resolve(),
-            'enabled': os.path.isdir(USER_PLUGINS_DIR),
-            'is_valid': os.path.isdir(USER_PLUGINS_DIR) and os.access(USER_PLUGINS_DIR, os.R_OK),                                              # read
-        },
-        'LIB_DIR': {
-            'path': LIB_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(LIB_DIR) and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.W_OK),                      # read + write
-        },
-    })
         
         
-    DATA_LOCATIONS = benedict({
-        "DATA_DIR": {
-            "path": DATA_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
-            "is_mount": os.path.ismount(DATA_DIR.resolve()),
-        },
-        "CONFIG_FILE": {
-            "path": CONFIG_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(CONFIG_FILE) and os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK),
-        },
-        "SQL_INDEX": {
-            "path": DATABASE_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
-            "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
-        },
-        "QUEUE_DATABASE": {
-            "path": QUEUE_DATABASE_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(QUEUE_DATABASE_FILE) and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK),
-            "is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
-        },
-        "ARCHIVE_DIR": {
-            "path": ARCHIVE_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
-            "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
-        },
-        "SOURCES_DIR": {
-            "path": SOURCES_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(SOURCES_DIR) and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK),
-        },
-        "PERSONAS_DIR": {
-            "path": PERSONAS_DIR.resolve(),
-            "enabled": os.path.isdir(PERSONAS_DIR),
-            "is_valid": os.path.isdir(PERSONAS_DIR) and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK),                 # read + write
-        },
-        "LOGS_DIR": {
-            "path": LOGS_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(LOGS_DIR) and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK),                              # read + write
-        },
-        'TMP_DIR': {
-            'path': TMP_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(TMP_DIR) and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.W_OK),                      # read + write
-        },
-        # "CACHE_DIR": {
-        #     "path": CACHE_DIR.resolve(),
-        #     "enabled": True,
-        #     "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK),                        # read + write
-        # },
-    })
 
 
     @classmethod
     @classmethod
     def __getitem__(cls, key: str):
     def __getitem__(cls, key: str):

+ 4 - 1
archivebox/config/legacy.py

@@ -258,6 +258,9 @@ def load_config_val(key: str,
 
 
     elif type is list or type is dict:
     elif type is list or type is dict:
         return json.loads(val)
         return json.loads(val)
+    
+    elif type is Path:
+        return Path(val)
 
 
     raise Exception('Config values can only be str, bool, int, or json')
     raise Exception('Config values can only be str, bool, int, or json')
 
 
@@ -574,7 +577,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
             with SudoPermission(uid=0):
             with SudoPermission(uid=0):
                 # running as root is a special case where it's ok to be a bit slower
                 # running as root is a special case where it's ok to be a bit slower
                 # make sure data dir is always owned by the correct user
                 # make sure data dir is always owned by the correct user
-                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"')
+                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
                 os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
                 os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
 
 
         bump_startup_progress_bar()
         bump_startup_progress_bar()

+ 194 - 3
archivebox/config/paths.py

@@ -1,12 +1,16 @@
 __package__ = 'archivebox.config'
 __package__ = 'archivebox.config'
 
 
 import os
 import os
+import socket
 import hashlib
 import hashlib
+import tempfile
 import platform
 import platform
 from pathlib import Path
 from pathlib import Path
 from functools import cache
 from functools import cache
 from datetime import datetime
 from datetime import datetime
 
 
+from benedict import benedict
+
 from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER
 from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER
 
 
 #############################################################################################
 #############################################################################################
@@ -88,7 +92,7 @@ def get_machine_type() -> str:
     return LIB_DIR_SCOPE
     return LIB_DIR_SCOPE
 
 
 
 
-def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
+def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True, chown=True) -> bool:
     """Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
     """Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
     current_uid, current_gid = os.geteuid(), os.getegid()
     current_uid, current_gid = os.geteuid(), os.getegid()
     uid, gid = uid or current_uid, gid or current_gid
     uid, gid = uid or current_uid, gid or current_gid
@@ -101,10 +105,197 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
             test_file.unlink()
             test_file.unlink()
             return True
             return True
     except (IOError, OSError, PermissionError):
     except (IOError, OSError, PermissionError):
-        pass
-        
+        if chown:    
+            # try fixing it using sudo permissions
+            with SudoPermission(uid=uid, fallback=fallback):
+                os.system(f'chown {uid}:{gid} "{dir_path}" 2>/dev/null')
+            return dir_is_writable(dir_path, uid=uid, gid=gid, fallback=fallback, chown=False)
     return False
     return False
 
 
+def assert_dir_can_contain_unix_sockets(dir_path: Path) -> bool:
+    """Check if a given directory can contain unix sockets (e.g. /tmp/supervisord.sock)"""
+    from archivebox.logging_util import pretty_path
+    
+    try:
+        socket_path = str(dir_path / '.test_socket.sock')
+        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        try:
+            os.remove(socket_path)
+        except OSError:
+            pass
+        s.bind(socket_path)
+        s.close()
+        try:
+            os.remove(socket_path)
+        except OSError:
+            pass
+    except Exception as e:
+        raise Exception(f'ArchiveBox failed to create a test UNIX socket file in {pretty_path(dir_path, color=False)}') from e
+    
+    return True
+
+
+def create_and_chown_dir(dir_path: Path) -> None:
+    with SudoPermission(uid=0, fallback=True):
+        dir_path.mkdir(parents=True, exist_ok=True)
+        os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}" 2>/dev/null')
+        os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')
+
+@cache
+def get_or_create_working_tmp_dir(autofix=True, quiet=False):
+    from archivebox import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.misc.checks import check_tmp_dir
+
+    # try a few potential directories in order of preference
+    CANDIDATES = [
+        STORAGE_CONFIG.TMP_DIR,                                                # <user-specified>
+        CONSTANTS.DEFAULT_TMP_DIR,                                             # ./data/tmp/<machine_id>
+        Path('/var/run/archivebox') / get_collection_id(),                     # /var/run/archivebox/abc5d8512
+        Path('/tmp') / 'archivebox' / get_collection_id(),                     # /tmp/archivebox/abc5d8512
+        Path('~/.tmp/archivebox').expanduser() / get_collection_id(),          # ~/.tmp/archivebox/abc5d8512
+        Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id(),      # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d8512
+        Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id()[:4],  # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d
+        Path(tempfile.gettempdir()) / 'abx' / get_collection_id()[:4],         # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/abx/abc5
+    ]
+    for candidate in CANDIDATES:
+        try:
+            create_and_chown_dir(candidate)
+        except Exception:
+            pass
+        if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
+            if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
+                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
+            return candidate
+    
+    if not quiet:
+        raise OSError(f'ArchiveBox is unable to find a writable TMP_DIR, tried {CANDIDATES}!')
+
+@cache
+def get_or_create_working_lib_dir(autofix=True, quiet=False):
+    from archivebox import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.misc.checks import check_lib_dir
+    
+    # try a few potential directories in order of preference
+    CANDIDATES = [
+        STORAGE_CONFIG.LIB_DIR,                                                   # <user-specified>
+        CONSTANTS.DEFAULT_LIB_DIR,                                                # ./data/lib/arm64-linux-docker
+        Path('/usr/local/share/archivebox') / get_collection_id(),                # /usr/local/share/archivebox/abc5
+        *([Path('/opt/homebrew/share/archivebox') / get_collection_id()] if os.path.isfile('/opt/homebrew/bin/archivebox') else []),  # /opt/homebrew/share/archivebox/abc5
+        Path('~/.local/share/archivebox').expanduser() / get_collection_id(),     # ~/.local/share/archivebox/abc5
+    ]
+    
+    for candidate in CANDIDATES:
+        try:
+            create_and_chown_dir(candidate)
+        except Exception:
+            pass
+        if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
+            if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
+                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
+            return candidate
+    
+    if not quiet:
+        raise OSError(f'ArchiveBox is unable to find a writable LIB_DIR, tried {CANDIDATES}!')
+
+
+
+@cache
+def get_data_locations():
+    from archivebox.config import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    
+    return benedict({
+        "DATA_DIR": {
+            "path": DATA_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
+            "is_mount": os.path.ismount(DATA_DIR.resolve()),
+        },
+        "CONFIG_FILE": {
+            "path": CONSTANTS.CONFIG_FILE.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isfile(CONSTANTS.CONFIG_FILE) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.W_OK),
+        },
+        "SQL_INDEX": {
+            "path": DATABASE_FILE.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
+            "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
+        },
+        "QUEUE_DATABASE": {
+            "path": CONSTANTS.QUEUE_DATABASE_FILE,
+            "enabled": True,
+            "is_valid": os.path.isfile(CONSTANTS.QUEUE_DATABASE_FILE) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.R_OK) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.W_OK),
+            "is_mount": os.path.ismount(CONSTANTS.QUEUE_DATABASE_FILE),
+        },
+        "ARCHIVE_DIR": {
+            "path": ARCHIVE_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
+            "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
+        },
+        "SOURCES_DIR": {
+            "path": CONSTANTS.SOURCES_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(CONSTANTS.SOURCES_DIR) and os.access(CONSTANTS.SOURCES_DIR, os.R_OK) and os.access(CONSTANTS.SOURCES_DIR, os.W_OK),
+        },
+        "PERSONAS_DIR": {
+            "path": CONSTANTS.PERSONAS_DIR.resolve(),
+            "enabled": os.path.isdir(CONSTANTS.PERSONAS_DIR),
+            "is_valid": os.path.isdir(CONSTANTS.PERSONAS_DIR) and os.access(CONSTANTS.PERSONAS_DIR, os.R_OK) and os.access(CONSTANTS.PERSONAS_DIR, os.W_OK),                 # read + write
+        },
+        "LOGS_DIR": {
+            "path": CONSTANTS.LOGS_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(CONSTANTS.LOGS_DIR) and os.access(CONSTANTS.LOGS_DIR, os.R_OK) and os.access(CONSTANTS.LOGS_DIR, os.W_OK),                             # read + write
+        },
+        'TMP_DIR': {
+            'path': STORAGE_CONFIG.TMP_DIR.resolve(),
+            'enabled': True,
+            'is_valid': os.path.isdir(STORAGE_CONFIG.TMP_DIR) and os.access(STORAGE_CONFIG.TMP_DIR, os.R_OK) and os.access(STORAGE_CONFIG.TMP_DIR, os.W_OK),        # read + write
+        },
+        # "CACHE_DIR": {
+        #     "path": CACHE_DIR.resolve(),
+        #     "enabled": True,
+        #     "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK),                        # read + write
+        # },
+    })
+
+@cache
+def get_code_locations():
+    from archivebox.config import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    
+    return benedict({
+        'PACKAGE_DIR': {
+            'path': (PACKAGE_DIR).resolve(),
+            'enabled': True,
+            'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK),                                                                  # executable
+        },
+        'TEMPLATES_DIR': {
+            'path': CONSTANTS.TEMPLATES_DIR.resolve(),
+            'enabled': True,
+            'is_valid': os.access(CONSTANTS.STATIC_DIR, os.R_OK) and os.access(CONSTANTS.STATIC_DIR, os.X_OK),                                                # read + list
+        },
+        'CUSTOM_TEMPLATES_DIR': {
+            'path': CONSTANTS.CUSTOM_TEMPLATES_DIR.resolve(),
+            'enabled': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR),
+            'is_valid': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK),                                      # read
+        },
+        'USER_PLUGINS_DIR': {
+            'path': CONSTANTS.USER_PLUGINS_DIR.resolve(),
+            'enabled': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR),
+            'is_valid': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR) and os.access(CONSTANTS.USER_PLUGINS_DIR, os.R_OK),                                              # read
+        },
+        'LIB_DIR': {
+            'path': STORAGE_CONFIG.LIB_DIR.resolve(),
+            'enabled': True,
+            'is_valid': os.path.isdir(STORAGE_CONFIG.LIB_DIR) and os.access(STORAGE_CONFIG.LIB_DIR, os.R_OK) and os.access(STORAGE_CONFIG.LIB_DIR, os.W_OK),                      # read + write
+        },
+    })
+
 
 
 
 
 # @cache
 # @cache

+ 5 - 2
archivebox/logging_util.py

@@ -510,7 +510,7 @@ def log_removal_finished(all_links: int, to_remove: int):
 ### Helpers
 ### Helpers
 
 
 @enforce_types
 @enforce_types
-def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
+def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR, color: bool=True) -> str:
     """convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
     """convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
     pwd = str(Path(pwd))  # .resolve()
     pwd = str(Path(pwd))  # .resolve()
     path = str(path)
     path = str(path)
@@ -520,7 +520,10 @@ def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
 
 
     # replace long absolute paths with ./ relative ones to save on terminal output width
     # replace long absolute paths with ./ relative ones to save on terminal output width
     if path.startswith(pwd) and (pwd != '/') and path != pwd:
     if path.startswith(pwd) and (pwd != '/') and path != pwd:
-        path = path.replace(pwd, '[light_slate_blue].[/light_slate_blue]', 1)
+        if color:
+            path = path.replace(pwd, '[light_slate_blue].[/light_slate_blue]', 1)
+        else:
+            path = path.replace(pwd, '.', 1)
     
     
     # quote paths containing spaces
     # quote paths containing spaces
     if ' ' in path:
     if ' ' in path:

+ 53 - 35
archivebox/main.py

@@ -189,6 +189,7 @@ def version(quiet: bool=False,
     if quiet or '--version' in sys.argv:
     if quiet or '--version' in sys.argv:
         return
         return
     
     
+    from rich.panel import Panel
     from rich.console import Console
     from rich.console import Console
     console = Console()
     console = Console()
     prnt = console.print
     prnt = console.print
@@ -197,6 +198,7 @@ def version(quiet: bool=False,
     from django.conf import settings
     from django.conf import settings
     from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
     from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
     from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
     from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
+    from archivebox.config.paths import get_data_locations, get_code_locations
 
 
     from abx.archivebox.base_binary import BaseBinary, apt, brew, env
     from abx.archivebox.base_binary import BaseBinary, apt, brew, env
 
 
@@ -221,7 +223,7 @@ def version(quiet: bool=False,
         f'PLATFORM={platform.platform()}',
         f'PLATFORM={platform.platform()}',
         f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
         f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
     )
     )
-    OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
+    OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
     DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
     DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
     prnt(
     prnt(
         f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
         f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
@@ -240,6 +242,21 @@ def version(quiet: bool=False,
         #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
         #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
     )
     )
     prnt()
     prnt()
+    
+    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
+        PANEL_TEXT = '\n'.join((
+            # '',
+            # f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
+            '',
+            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
+            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
+            '',
+            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
+            '',
+        ))
+        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
+        prnt()
+        return
 
 
     prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
     prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
     failures = []
     failures = []
@@ -299,13 +316,13 @@ def version(quiet: bool=False,
         
         
         prnt()
         prnt()
         prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
         prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
-        for name, path in CONSTANTS.CODE_LOCATIONS.items():
+        for name, path in get_code_locations().items():
             prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
             prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
 
 
         prnt()
         prnt()
         if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
         if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
             prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
             prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
-            for name, path in CONSTANTS.DATA_LOCATIONS.items():
+            for name, path in get_data_locations().items():
                 prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
                 prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
         
         
             from archivebox.misc.checks import check_data_dir_permissions
             from archivebox.misc.checks import check_data_dir_permissions
@@ -395,7 +412,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
     print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
     
     
     # from django.contrib.auth.models import User
     # from django.contrib.auth.models import User
-    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exists():
+    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
     #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
     #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
     #     call_command("createsuperuser", interactive=True)
     #     call_command("createsuperuser", interactive=True)
 
 
@@ -486,9 +503,13 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
         html_index.rename(f"{index_name}.html")
         html_index.rename(f"{index_name}.html")
     
     
     CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
     CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
-    CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
-    CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
-
+    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
+    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
+    
+    from archivebox.config.common import STORAGE_CONFIG
+    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
+    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
+    
     if install:
     if install:
         run_subcommand('install', pwd=out_dir)
         run_subcommand('install', pwd=out_dir)
 
 
@@ -1115,7 +1136,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     from django.contrib.auth import get_user_model
     from django.contrib.auth import get_user_model
     User = get_user_model()
     User = get_user_model()
 
 
-    if not User.objects.filter(is_superuser=True).exists():
+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
         stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
         stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
         stderr('    archivebox manage createsuperuser')
         stderr('    archivebox manage createsuperuser')
         # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
         # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
@@ -1399,46 +1420,43 @@ def server(runserver_args: Optional[List[str]]=None,
     from django.core.management import call_command
     from django.core.management import call_command
     from django.contrib.auth.models import User
     from django.contrib.auth.models import User
     
     
-    
-
-    print('[green][+] Starting ArchiveBox webserver...[/green]')
-    print('    > Logging errors to ./logs/errors.log')
-    if not User.objects.filter(is_superuser=True).exists():
-        print('[yellow][!] No admin users exist yet, you will not be able to edit links in the UI.[/yellow]')
+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
         print()
         print()
-        print('    [violet]Hint:[/violet] To create an admin user, run:')
-        print('        archivebox manage createsuperuser')
+        # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
+        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
+        print('      [green]archivebox manage createsuperuser[/green]')
         print()
         print()
     
     
 
 
+    host = '127.0.0.1'
+    port = '8000'
+    
+    try:
+        host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
+        if ':' in host_and_port:
+            host, port = host_and_port.split(':')
+        else:
+            if '.' in host_and_port:
+                host = host_and_port
+            else:
+                port = host_and_port
+    except IndexError:
+        pass
+
+    print('[green][+] Starting ArchiveBox webserver...[/green]')
+    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
+    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
+    print('    > Writing ArchiveBox error log to ./logs/errors.log')
+
     if SHELL_CONFIG.DEBUG:
     if SHELL_CONFIG.DEBUG:
         if not reload:
         if not reload:
             runserver_args.append('--noreload')  # '--insecure'
             runserver_args.append('--noreload')  # '--insecure'
         call_command("runserver", *runserver_args)
         call_command("runserver", *runserver_args)
     else:
     else:
-        host = '127.0.0.1'
-        port = '8000'
-        
-        try:
-            host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
-            if ':' in host_and_port:
-                host, port = host_and_port.split(':')
-            else:
-                if '.' in host_and_port:
-                    host = host_and_port
-                else:
-                    port = host_and_port
-        except IndexError:
-            pass
-
-        print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
-
         from queues.supervisor_util import start_server_workers
         from queues.supervisor_util import start_server_workers
 
 
         print()
         print()
-        
         start_server_workers(host=host, port=port, daemonize=False)
         start_server_workers(host=host, port=port, daemonize=False)
-
         print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
         print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
 
 
 
 

+ 118 - 9
archivebox/misc/checks.py

@@ -5,16 +5,24 @@ import sys
 from pathlib import Path
 from pathlib import Path
 
 
 from rich import print
 from rich import print
+from rich.panel import Panel
 
 
-# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE
+# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE to anything other than builtin python libraries
 # this file is imported by archivebox/__init__.py
 # this file is imported by archivebox/__init__.py
 # and any imports here will be imported by EVERYTHING else
 # and any imports here will be imported by EVERYTHING else
 # so this file should only be used for pure python checks
 # so this file should only be used for pure python checks
 # that don't need to import other parts of ArchiveBox
 # that don't need to import other parts of ArchiveBox
 
 
+# if a check needs to import other parts of ArchiveBox,
+# the imports should be done inside the check function
+# and you should make sure if you need to import any django stuff
+# that the check is called after django.setup() has been called
+
 
 
 def check_data_folder() -> None:
 def check_data_folder() -> None:
     from archivebox import DATA_DIR, ARCHIVE_DIR
     from archivebox import DATA_DIR, ARCHIVE_DIR
+    from archivebox.config import CONSTANTS
+    from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
     
     
     archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
     archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
     if not archive_dir_exists:
     if not archive_dir_exists:
@@ -30,8 +38,21 @@ def check_data_folder() -> None:
         raise SystemExit(2)
         raise SystemExit(2)
     
     
     
     
+    # Create data dir subdirs
+    create_and_chown_dir(CONSTANTS.SOURCES_DIR)
+    create_and_chown_dir(CONSTANTS.PERSONAS_DIR / 'Default')
+    create_and_chown_dir(CONSTANTS.LOGS_DIR)
+    # create_and_chown_dir(CONSTANTS.CACHE_DIR)
+    
+    # Create /tmp and /lib dirs if they don't exist
+    get_or_create_working_tmp_dir(autofix=True, quiet=False)
+    get_or_create_working_lib_dir(autofix=True, quiet=False)
+    
+    # Check data dir permissions, /tmp, and /lib permissions
+    check_data_dir_permissions()
+    
 def check_migrations():
 def check_migrations():
-    from archivebox import DATA_DIR, CONSTANTS
+    from archivebox import DATA_DIR
     from ..index.sql import list_migrations
     from ..index.sql import list_migrations
 
 
     pending_migrations = [name for status, name in list_migrations() if not status]
     pending_migrations = [name for status, name in list_migrations() if not status]
@@ -45,13 +66,6 @@ def check_migrations():
         print('        archivebox init', file=sys.stderr)
         print('        archivebox init', file=sys.stderr)
         raise SystemExit(3)
         raise SystemExit(3)
 
 
-    CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
-    CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
-    # CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
-    (CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
-    (CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
-
-
 def check_io_encoding():
 def check_io_encoding():
     PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
     PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
             
             
@@ -128,3 +142,98 @@ def check_data_dir_permissions():
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
+
+    from archivebox.config.common import STORAGE_CONFIG
+
+    # Check /tmp dir permissions
+    check_tmp_dir(STORAGE_CONFIG.TMP_DIR, throw=False, must_exist=True)
+
+    # Check /lib dir permissions
+    check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True)
+
+
+def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
+    from archivebox.config.paths import assert_dir_can_contain_unix_sockets, dir_is_writable, get_or_create_working_tmp_dir
+    from archivebox.misc.logging import STDERR
+    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.logging_util import pretty_path
+    
+    tmp_dir = tmp_dir or STORAGE_CONFIG.TMP_DIR
+    socket_file = tmp_dir.absolute().resolve() / "supervisord.sock"
+
+    if not must_exist and not os.path.isdir(tmp_dir):
+        # just check that its viable based on its length (because dir may not exist yet, we cant check if its writable)
+        return len(f'file://{socket_file}') <= 96
+
+    tmp_is_valid = False
+    try:
+        tmp_is_valid = dir_is_writable(tmp_dir)
+        tmp_is_valid = tmp_is_valid and assert_dir_can_contain_unix_sockets(tmp_dir)
+        assert tmp_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to TMP_DIR={tmp_dir}'            
+        assert len(f'file://{socket_file}') <= 96, f'ArchiveBox TMP_DIR={tmp_dir} is too long, dir containing unix socket files must be <90 chars.'
+        return True
+    except Exception as e:
+        if not quiet:
+            STDERR.print()
+            ERROR_TEXT = '\n'.join((
+                '',
+                f'[red]:cross_mark: ArchiveBox is unable to use TMP_DIR={pretty_path(tmp_dir)}[/red]',
+                f'   [yellow]{e}[/yellow]',
+                '',
+                '[blue]Info:[/blue] [grey53]The TMP_DIR is used for the supervisord unix socket file and other temporary files.',
+                '  - It [red]must[/red] be on a local drive (not inside a docker volume, remote network drive, or FUSE mount).',
+                f'  - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
+                '  - It [red]must[/red] be a *short* path (less than 90 characters) due to UNIX path length restrictions for sockets.',
+                '  - It [yellow]should[/yellow] be able to hold at least 200MB of data (in-progress downloads can be large).[/grey53]',
+                '',
+                '[violet]Hint:[/violet] Fix it by setting TMP_DIR to a path that meets these requirements, e.g.:',
+                f'      [green]archivebox config --set TMP_DIR={get_or_create_working_tmp_dir(autofix=False, quiet=True) or "/tmp/archivebox"}[/green]',
+                '',
+            ))
+            STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured TMP_DIR[/red]', subtitle='Background workers may fail to start until fixed.'))
+            STDERR.print()
+        if throw:
+            raise OSError(f'TMP_DIR={tmp_dir} is invalid, ArchiveBox is unable to use it and the server will fail to start!') from e
+    return False
+
+
+def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
+    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+    from archivebox.misc.logging import STDERR
+    from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.logging_util import pretty_path
+    
+    lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
+    
+    if not must_exist and not os.path.isdir(lib_dir):
+        return True
+    
+    lib_is_valid = False
+    try:
+        lib_is_valid = dir_is_writable(lib_dir)
+        assert lib_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to LIB_DIR={lib_dir}'
+        return True
+    except Exception as e:
+        if not quiet:
+            STDERR.print()
+            ERROR_TEXT = '\n'.join((
+                '',
+                f'[red]:cross_mark: ArchiveBox is unable to use LIB_DIR={pretty_path(lib_dir)}[/red]',
+                f'   [yellow]{e}[/yellow]',
+                '',
+                '[blue]Info:[/blue] [grey53]The LIB_DIR is used to store ArchiveBox auto-installed plugin library and binary dependencies.',
+                f'  - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
+                '  - It [yellow]should[/yellow] be on a local (ideally fast) drive like an SSD or HDD (not on a network drive or external HDD).',
+                '  - It [yellow]should[/yellow] be able to hold at least 1GB of data (some dependencies like Chrome can be large).[/grey53]',
+                '',
+                '[violet]Hint:[/violet] Fix it by setting LIB_DIR to a path that meets these requirements, e.g.:',
+                f'      [green]archivebox config --set LIB_DIR={get_or_create_working_lib_dir(autofix=False, quiet=True) or "/usr/local/share/archivebox"}[/green]',
+                '',
+            ))
+            STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured LIB_DIR[/red]', subtitle='[yellow]Dependencies may not auto-install properly until fixed.[/yellow]'))
+            STDERR.print()
+        if throw:
+            raise OSError(f'LIB_DIR={lib_dir} is invalid, ArchiveBox is unable to use it and dependencies will fail to install.') from e
+    return False

+ 1 - 1
archivebox/misc/shell_welcome_message.py

@@ -49,7 +49,7 @@ if __name__ == '__main__':
         
         
     prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
     prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
     prnt('    [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
     prnt('    [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
-    prnt('          [link=https://docs.archivebox.io/en/latest/modules.html]https://docs.archivebox.io/en/latest/modules.html[/link]')
+    prnt('          [link=https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html]https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html[/link]')
     prnt()
     prnt()
     prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
     prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
     prnt('    add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink]                                                                        [grey53]# add ? after anything to get help[/]')
     prnt('    add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink]                                                                        [grey53]# add ? after anything to get help[/]')

+ 4 - 4
archivebox/misc/toml_util.py

@@ -82,10 +82,10 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):
         if isinstance(default, Callable):
         if isinstance(default, Callable):
             return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
             return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
         return to_jsonable_python(
         return to_jsonable_python(
-           default,
-           timedelta_mode=config.ser_json_timedelta,
-           bytes_mode=config.ser_json_bytes,
-           serialize_unknown=True
+            default,
+            timedelta_mode=config.ser_json_timedelta,
+            bytes_mode=config.ser_json_bytes,
+            serialize_unknown=True
         )
         )
 
 
     # for computed_field properties render them like this instead:
     # for computed_field properties render them like this instead:

+ 4 - 1
archivebox/plugins_extractor/chrome/binaries.py

@@ -104,7 +104,10 @@ class ChromeBinary(BaseBinary):
     }
     }
 
 
     @staticmethod
     @staticmethod
-    def symlink_to_lib(binary, bin_dir=CONSTANTS.LIB_BIN_DIR) -> None:
+    def symlink_to_lib(binary, bin_dir=None) -> None:
+        from archivebox.config.common import STORAGE_CONFIG
+        bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
+        
         if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
         if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
             return
             return
         
         

+ 10 - 8
archivebox/plugins_pkg/npm/binproviders.py

@@ -3,8 +3,6 @@ __package__ = 'plugins_pkg.npm'
 from pathlib import Path
 from pathlib import Path
 from typing import Optional
 from typing import Optional
 
 
-from pydantic import model_validator
-
 from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
 from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
 
 
 from archivebox.config import DATA_DIR, CONSTANTS
 from archivebox.config import DATA_DIR, CONSTANTS
@@ -14,7 +12,7 @@ from abx.archivebox.base_binary import BaseBinProvider
 
 
 
 
 OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
 OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
-NEW_NODE_BIN_PATH = CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin'
+NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin'
 
 
 
 
 class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
 class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
@@ -27,12 +25,16 @@ class LibNpmBinProvider(NpmProvider, BaseBinProvider):
     name: BinProviderName = "lib_npm"
     name: BinProviderName = "lib_npm"
     PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
     PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
     
     
-    npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR
+    npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm'
     
     
-    @model_validator(mode='after')
-    def validate_path(self):
-        assert self.npm_prefix == NEW_NODE_BIN_PATH.parent.parent
-        return self
+    def setup(self) -> None:
+        # update paths from config if they arent the default
+        from archivebox.config.common import STORAGE_CONFIG
+        if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
+            self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm'
+            self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
+
+        super().setup()
 
 
 
 
 SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
 SYS_NPM_BINPROVIDER = SystemNpmBinProvider()

+ 9 - 1
archivebox/plugins_pkg/pip/binproviders.py

@@ -49,7 +49,15 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
     name: BinProviderName = "lib_pip"
     name: BinProviderName = "lib_pip"
     INSTALLER_BIN: BinName = "pip"
     INSTALLER_BIN: BinName = "pip"
     
     
-    pip_venv: Optional[Path] = CONSTANTS.LIB_PIP_DIR / 'venv'
+    pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv'
+    
+    def setup(self) -> None:
+        # update paths from config if they arent the default
+        from archivebox.config.common import STORAGE_CONFIG
+        if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
+            self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv'
+            
+        super().setup()
 
 
 SYS_PIP_BINPROVIDER = SystemPipBinProvider()
 SYS_PIP_BINPROVIDER = SystemPipBinProvider()
 PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
 PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()

+ 6 - 1
archivebox/plugins_pkg/playwright/binproviders.py

@@ -35,7 +35,7 @@ class PlaywrightBinProvider(BaseBinProvider):
     name: BinProviderName = "playwright"
     name: BinProviderName = "playwright"
     INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
     INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
 
 
-    PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
+    PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
 
 
     playwright_browsers_dir: Path = (
     playwright_browsers_dir: Path = (
         MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
         MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
@@ -56,6 +56,11 @@ class PlaywrightBinProvider(BaseBinProvider):
         return PLAYWRIGHT_BINARY.load().abspath
         return PLAYWRIGHT_BINARY.load().abspath
 
 
     def setup(self) -> None:
     def setup(self) -> None:
+        # update paths from config if they arent the default
+        from archivebox.config.common import STORAGE_CONFIG
+        if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
+            self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
+
         assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
         assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
 
 
         if self.playwright_browsers_dir:
         if self.playwright_browsers_dir:

+ 9 - 7
archivebox/plugins_pkg/puppeteer/binproviders.py

@@ -23,19 +23,16 @@ from abx.archivebox.base_binary import BaseBinProvider
 from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
 from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
 
 
 
 
-LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
-
-
 class PuppeteerBinProvider(BaseBinProvider):
 class PuppeteerBinProvider(BaseBinProvider):
     name: BinProviderName = "puppeteer"
     name: BinProviderName = "puppeteer"
     INSTALLER_BIN: BinName = "npx"
     INSTALLER_BIN: BinName = "npx"
 
 
-    PATH: PATHStr = str(CONSTANTS.LIB_BIN_DIR)
+    PATH: PATHStr = str(CONSTANTS.DEFAULT_LIB_DIR / 'bin')
     
     
     euid: Optional[int] = ARCHIVEBOX_USER
     euid: Optional[int] = ARCHIVEBOX_USER
 
 
-    puppeteer_browsers_dir: Path = LIB_DIR_BROWSERS
-    puppeteer_install_args: List[str] = ['--yes', "@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
+    puppeteer_browsers_dir: Path = CONSTANTS.DEFAULT_LIB_DIR / 'browsers'
+    puppeteer_install_args: List[str] = ['--yes', "@puppeteer/browsers", "install"]
 
 
     packages_handler: BinProviderOverrides = Field(default={
     packages_handler: BinProviderOverrides = Field(default={
         "chrome": lambda:
         "chrome": lambda:
@@ -45,6 +42,11 @@ class PuppeteerBinProvider(BaseBinProvider):
     _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
     _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
     
     
     def setup(self) -> None:
     def setup(self) -> None:
+        # update paths from config
+        from archivebox.config.common import STORAGE_CONFIG
+        self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
+        self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')
+        
         assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
         assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
         
         
         if self.puppeteer_browsers_dir:
         if self.puppeteer_browsers_dir:
@@ -90,7 +92,7 @@ class PuppeteerBinProvider(BaseBinProvider):
 
 
         # print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
         # print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
 
 
-        install_args = [*self.puppeteer_install_args]
+        install_args = [*self.puppeteer_install_args, "--path", str(self.puppeteer_browsers_dir)]
 
 
         proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
         proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
 
 

+ 0 - 40
archivebox/queues/settings.py

@@ -1,40 +0,0 @@
-import tempfile
-from pathlib import Path
-from functools import cache
-
-from archivebox.config import CONSTANTS
-from archivebox.config.paths import get_collection_id
-
-DATA_DIR = CONSTANTS.DATA_DIR
-LOGS_DIR = CONSTANTS.LOGS_DIR
-TMP_DIR = CONSTANTS.TMP_DIR
-
-SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
-PID_FILE = TMP_DIR / "supervisord.pid"
-SOCK_FILE = TMP_DIR / "supervisord.sock"
-LOG_FILE = TMP_DIR / "supervisord.log"
-WORKERS_DIR = TMP_DIR / "workers"
-
-@cache
-def get_sock_file():
-    """Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits"""
-    TMP_DIR.mkdir(parents=True, exist_ok=True)
-    
-    if len(f'file://{SOCK_FILE.absolute().resolve()}') > 98:
-        # socket absolute paths cannot be longer than 104 bytes on macos, and 108 bytes on linux
-        # symlink it to a shorter path and use that instead
-        
-        # place the actual socket file in a shorter tmp dir
-        # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox_supervisord_3d1e544e.sock
-        shorter_sock_file = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock"
-        
-        # symlink ./data/tmp/<collection_id>/supervisord.sock -> /var/folders/qy/abc234235/T/archivebox_supervisord_3d1e544e.sock
-        # for convenience/consistency
-        symlink = SOCK_FILE
-        symlink.unlink(missing_ok=True)
-        symlink.symlink_to(shorter_sock_file)
-        
-        assert len(f'file://{shorter_sock_file}') <= 98, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {shorter_sock_file} (unix only allows 108 characters for socket paths)'
-        return shorter_sock_file
-        
-    return SOCK_FILE

+ 120 - 49
archivebox/queues/supervisor_util.py

@@ -1,23 +1,39 @@
 __package__ = 'archivebox.queues'
 __package__ = 'archivebox.queues'
 
 
+import sys
 import time
 import time
 import signal
 import signal
 import psutil
 import psutil
 import shutil
 import shutil
 import subprocess
 import subprocess
-from pathlib import Path
-from rich import print
 
 
-from typing import Dict, cast
+from typing import Dict, cast, Iterator
+from pathlib import Path
+from functools import cache
 
 
+from rich import print
 from supervisor.xmlrpc import SupervisorTransport
 from supervisor.xmlrpc import SupervisorTransport
 from xmlrpc.client import ServerProxy
 from xmlrpc.client import ServerProxy
 
 
+from archivebox.config import CONSTANTS
+from archivebox.config.paths import get_or_create_working_tmp_dir
 from archivebox.config.permissions import ARCHIVEBOX_USER
 from archivebox.config.permissions import ARCHIVEBOX_USER
+from archivebox.misc.logging import STDERR
+from archivebox.logging_util import pretty_path
 
 
-from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
+LOG_FILE_NAME = "supervisord.log"
+CONFIG_FILE_NAME = "supervisord.conf"
+PID_FILE_NAME = "supervisord.pid"
+WORKERS_DIR_NAME = "workers"
 
 
-from typing import Iterator
+@cache
+def get_sock_file():
+    """Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits"""
+    TMP_DIR = get_or_create_working_tmp_dir(autofix=True, quiet=False)
+    assert TMP_DIR, "Failed to find or create a writable TMP_DIR!"
+    socket_file = TMP_DIR / "supervisord.sock"
+
+    return socket_file
 
 
 def follow(file, sleep_sec=0.1) -> Iterator[str]:
 def follow(file, sleep_sec=0.1) -> Iterator[str]:
     """ Yield each line from a file as they are written.
     """ Yield each line from a file as they are written.
@@ -35,24 +51,30 @@ def follow(file, sleep_sec=0.1) -> Iterator[str]:
 
 
 
 
 def create_supervisord_config():
 def create_supervisord_config():
+    SOCK_FILE = get_sock_file()
+    WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
+    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
+    PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
+    LOG_FILE = CONSTANTS.LOGS_DIR / LOG_FILE_NAME
+    
     config_content = f"""
     config_content = f"""
 [supervisord]
 [supervisord]
 nodaemon = true
 nodaemon = true
 environment = IS_SUPERVISORD_PARENT="true"
 environment = IS_SUPERVISORD_PARENT="true"
-pidfile = {TMP_DIR}/{PID_FILE.name}
-logfile = {LOGS_DIR}/{LOG_FILE.name}
-childlogdir = {LOGS_DIR}
-directory = {DATA_DIR}
+pidfile = {PID_FILE}
+logfile = {LOG_FILE}
+childlogdir = {CONSTANTS.LOGS_DIR}
+directory = {CONSTANTS.DATA_DIR}
 strip_ansi = true
 strip_ansi = true
 nocleanup = true
 nocleanup = true
 user = {ARCHIVEBOX_USER}
 user = {ARCHIVEBOX_USER}
 
 
 [unix_http_server]
 [unix_http_server]
-file = {get_sock_file()}
+file = {SOCK_FILE}
 chmod = 0700
 chmod = 0700
 
 
 [supervisorctl]
 [supervisorctl]
-serverurl = unix://{get_sock_file()}
+serverurl = unix://{SOCK_FILE}
 
 
 [rpcinterface:supervisor]
 [rpcinterface:supervisor]
 supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
 supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
@@ -61,9 +83,14 @@ supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
 files = {WORKERS_DIR}/*.conf
 files = {WORKERS_DIR}/*.conf
 
 
 """
 """
-    SUPERVISORD_CONFIG_FILE.write_text(config_content)
+    CONFIG_FILE.write_text(config_content)
+    Path.mkdir(WORKERS_DIR, exist_ok=True)
+    (WORKERS_DIR / 'initial_startup.conf').write_text('')   # hides error about "no files found to include" when supervisord starts
 
 
 def create_worker_config(daemon):
 def create_worker_config(daemon):
+    SOCK_FILE = get_sock_file()
+    WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
+    
     Path.mkdir(WORKERS_DIR, exist_ok=True)
     Path.mkdir(WORKERS_DIR, exist_ok=True)
     
     
     name = daemon['name']
     name = daemon['name']
@@ -80,13 +107,14 @@ def create_worker_config(daemon):
 
 
 
 
 def get_existing_supervisord_process():
 def get_existing_supervisord_process():
+    SOCK_FILE = get_sock_file()
     try:
     try:
-        transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}")
+        transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}")
         server = ServerProxy("http://localhost", transport=transport)
         server = ServerProxy("http://localhost", transport=transport)
         current_state = cast(Dict[str, int | str], server.supervisor.getState())
         current_state = cast(Dict[str, int | str], server.supervisor.getState())
         if current_state["statename"] == "RUNNING":
         if current_state["statename"] == "RUNNING":
             pid = server.supervisor.getPID()
             pid = server.supervisor.getPID()
-            print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(DATA_DIR), '.')}.")
+            print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{pretty_path(SOCK_FILE)}.")
             return server.supervisor
             return server.supervisor
     except FileNotFoundError:
     except FileNotFoundError:
         return None
         return None
@@ -95,58 +123,83 @@ def get_existing_supervisord_process():
         return None
         return None
 
 
 def stop_existing_supervisord_process():
 def stop_existing_supervisord_process():
+    SOCK_FILE = get_sock_file()
+    PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
+    
     try:
     try:
-        pid = int(PID_FILE.read_text())
-    except FileNotFoundError:
-        return
-    except ValueError:
-        PID_FILE.unlink()
-        return
+        try:
+            pid = int(PID_FILE.read_text())
+        except (FileNotFoundError, ValueError):
+            return
 
 
-    try:
-        print(f"[🦸‍♂️] Stopping supervisord process (pid={pid})...")
-        proc = psutil.Process(pid)
-        proc.terminate()
-        proc.wait()
-    except Exception:
-        pass
-    try:
-        PID_FILE.unlink()
-    except FileNotFoundError:
-        pass
+        try:
+            print(f"[🦸‍♂️] Stopping supervisord process (pid={pid})...")
+            proc = psutil.Process(pid)
+            proc.terminate()
+            proc.wait()
+        except (Exception, BrokenPipeError, IOError):
+            pass
+    finally:
+        try:
+            # clear PID file and socket file
+            PID_FILE.unlink(missing_ok=True)
+            get_sock_file().unlink(missing_ok=True)
+        except Exception:
+            pass
 
 
 def start_new_supervisord_process(daemonize=False):
 def start_new_supervisord_process(daemonize=False):
+    SOCK_FILE = get_sock_file()
+    WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
+    LOG_FILE = CONSTANTS.LOGS_DIR / LOG_FILE_NAME
+    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
+    PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
+    
     print(f"[🦸‍♂️] Supervisord starting{' in background' if daemonize else ''}...")
     print(f"[🦸‍♂️] Supervisord starting{' in background' if daemonize else ''}...")
-    # Create a config file in the current working directory
+    pretty_log_path = pretty_path(LOG_FILE)
+    print(f"    > Writing supervisord logs to: {pretty_log_path}")
+    print(f"    > Writing task worker logs to: {pretty_log_path.replace('supervisord.log', 'worker_*.log')}")
+    print(f'    > Using supervisord config file: {pretty_path(CONFIG_FILE)}')
+    print(f"    > Using supervisord UNIX socket: {pretty_path(SOCK_FILE)}")
+    print()
     
     
     # clear out existing stale state files
     # clear out existing stale state files
     shutil.rmtree(WORKERS_DIR, ignore_errors=True)
     shutil.rmtree(WORKERS_DIR, ignore_errors=True)
     PID_FILE.unlink(missing_ok=True)
     PID_FILE.unlink(missing_ok=True)
     get_sock_file().unlink(missing_ok=True)
     get_sock_file().unlink(missing_ok=True)
-    SUPERVISORD_CONFIG_FILE.unlink(missing_ok=True)
+    CONFIG_FILE.unlink(missing_ok=True)
     
     
+    # create the supervisord config file
     create_supervisord_config()
     create_supervisord_config()
 
 
     # Start supervisord
     # Start supervisord
+    # panel = Panel(f"Starting supervisord with config: {SUPERVISORD_CONFIG_FILE}")
+    # with Live(panel, refresh_per_second=1) as live:
+    
     subprocess.Popen(
     subprocess.Popen(
-        f"supervisord --configuration={SUPERVISORD_CONFIG_FILE}",
+        f"supervisord --configuration={CONFIG_FILE}",
         stdin=None,
         stdin=None,
         shell=True,
         shell=True,
         start_new_session=daemonize,
         start_new_session=daemonize,
     )
     )
 
 
     def exit_signal_handler(signum, frame):
     def exit_signal_handler(signum, frame):
-        if signum != 13:
-            print(f"\n[🦸‍♂️] Supervisord got stop signal ({signal.strsignal(signum)}). Terminating child processes...")
+        if signum == 2:
+            STDERR.print("\n[🛑] Got Ctrl+C. Terminating child processes...")
+        elif signum != 13:
+            STDERR.print(f"\n[🦸‍♂️] Supervisord got stop signal ({signal.strsignal(signum)}). Terminating child processes...")
         stop_existing_supervisord_process()
         stop_existing_supervisord_process()
         raise SystemExit(0)
         raise SystemExit(0)
 
 
     # Monitor for termination signals and cleanup child processes
     # Monitor for termination signals and cleanup child processes
     if not daemonize:
     if not daemonize:
-        signal.signal(signal.SIGINT, exit_signal_handler)
-        signal.signal(signal.SIGHUP, exit_signal_handler)
-        signal.signal(signal.SIGPIPE, exit_signal_handler)
-        signal.signal(signal.SIGTERM, exit_signal_handler)
+        try:
+            signal.signal(signal.SIGINT, exit_signal_handler)
+            signal.signal(signal.SIGHUP, exit_signal_handler)
+            signal.signal(signal.SIGPIPE, exit_signal_handler)
+            signal.signal(signal.SIGTERM, exit_signal_handler)
+        except Exception:
+            # signal handlers only work in main thread
+            pass
     # otherwise supervisord will containue in background even if parent proc is ends (aka daemon mode)
     # otherwise supervisord will containue in background even if parent proc is ends (aka daemon mode)
 
 
     time.sleep(2)
     time.sleep(2)
@@ -154,14 +207,32 @@ def start_new_supervisord_process(daemonize=False):
     return get_existing_supervisord_process()
     return get_existing_supervisord_process()
 
 
 def get_or_create_supervisord_process(daemonize=False):
 def get_or_create_supervisord_process(daemonize=False):
+    SOCK_FILE = get_sock_file()
+    WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
+    
     supervisor = get_existing_supervisord_process()
     supervisor = get_existing_supervisord_process()
     if supervisor is None:
     if supervisor is None:
         stop_existing_supervisord_process()
         stop_existing_supervisord_process()
         supervisor = start_new_supervisord_process(daemonize=daemonize)
         supervisor = start_new_supervisord_process(daemonize=daemonize)
         time.sleep(0.5)
         time.sleep(0.5)
 
 
+    # wait up to 5s in case supervisord is slow to start
+    if not supervisor:
+        for _ in range(10):
+            if supervisor is not None:
+                print()
+                break
+            sys.stdout.write('.')
+            sys.stdout.flush()
+            time.sleep(0.5)
+            supervisor = get_existing_supervisord_process()
+        else:
+            print()
+
     assert supervisor, "Failed to start supervisord or connect to it!"
     assert supervisor, "Failed to start supervisord or connect to it!"
     supervisor.getPID()  # make sure it doesn't throw an exception
     supervisor.getPID()  # make sure it doesn't throw an exception
+
+    (WORKERS_DIR / 'initial_startup.conf').unlink(missing_ok=True)
     
     
     return supervisor
     return supervisor
 
 
@@ -242,9 +313,9 @@ def tail_worker_logs(log_path: str):
                 for line in follow(f):
                 for line in follow(f):
                     if '://' in line:
                     if '://' in line:
                         live.console.print(f"Working on: {line.strip()}")
                         live.console.print(f"Working on: {line.strip()}")
-                    table.add_row("123124234", line.strip())
-    except KeyboardInterrupt:
-        print("\n[🛑] Got Ctrl+C, stopping gracefully...")
+                    # table.add_row("123124234", line.strip())
+    except (KeyboardInterrupt, BrokenPipeError, IOError):
+        STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
     except SystemExit:
     except SystemExit:
         pass
         pass
 
 
@@ -321,12 +392,12 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
     if not daemonize:
     if not daemonize:
         try:
         try:
             watch_worker(supervisor, "worker_daphne")
             watch_worker(supervisor, "worker_daphne")
-        except KeyboardInterrupt:
-            print("\n[🛑] Got Ctrl+C, stopping gracefully...")
+        except (KeyboardInterrupt, BrokenPipeError, IOError):
+            STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
         except SystemExit:
         except SystemExit:
             pass
             pass
         except BaseException as e:
         except BaseException as e:
-            print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
+            STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
             raise
             raise
         finally:
         finally:
             stop_worker(supervisor, "worker_daphne")
             stop_worker(supervisor, "worker_daphne")
@@ -350,12 +421,12 @@ def start_cli_workers(watch=False):
     if watch:
     if watch:
         try:
         try:
             watch_worker(supervisor, "worker_system_tasks")
             watch_worker(supervisor, "worker_system_tasks")
-        except KeyboardInterrupt:
-            print("\n[🛑] Got Ctrl+C, stopping gracefully...")
+        except (KeyboardInterrupt, BrokenPipeError, IOError):
+            STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
         except SystemExit:
         except SystemExit:
             pass
             pass
         except BaseException as e:
         except BaseException as e:
-            print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
+            STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
             raise
             raise
         finally:
         finally:
             stop_worker(supervisor, "worker_system_tasks")
             stop_worker(supervisor, "worker_system_tasks")