Browse Source

fuck it go back to nested lib and tmp dirs with supervisord sock workaround

Nick Sweeting 1 year ago
parent
commit
4b34b729ab

+ 9 - 6
archivebox/config/constants.py

@@ -16,9 +16,9 @@ from .paths import (
     PACKAGE_DIR,
     DATA_DIR,
     ARCHIVE_DIR,
-    get_collection_id,
-    get_LIB_DIR,
-    get_TMP_DIR,
+    # get_collection_id,
+    # get_LIB_DIR,
+    # get_TMP_DIR,
 )
 from .permissions import (
     IS_ROOT,
@@ -39,13 +39,14 @@ class ConstantsDict(Mapping):
     PACKAGE_DIR: Path                   = PACKAGE_DIR
     DATA_DIR: Path                      = DATA_DIR
     ARCHIVE_DIR: Path                   = ARCHIVE_DIR
-    COLLECTION_ID: str                  = get_collection_id(DATA_DIR)
+    # COLLECTION_ID: str                  = get_collection_id(DATA_DIR)
     
     # Host system
     VERSION: str                        = detect_installed_version(PACKAGE_DIR)
     OS: str                             = platform.system().lower()    # darwin, linux, etc.
     ARCH: str                           = platform.machine().lower()   # arm64, x86_64, aarch64, etc.
     IN_DOCKER: bool                     = IN_DOCKER
+    LIB_DIR_SCOPE: str                  = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
     
     # Permissions
     IS_ROOT: bool                       = IS_ROOT
@@ -95,9 +96,11 @@ class ConstantsDict(Mapping):
     
     # Runtime dirs
     TMP_DIR_NAME: str                   = 'tmp'
-    TMP_DIR: Path                       = get_TMP_DIR()
+    # TMP_DIR: Path                     = get_TMP_DIR()
+    TMP_DIR: Path                       = DATA_DIR / TMP_DIR_NAME
     LIB_DIR_NAME: str                   = 'lib'
-    LIB_DIR: Path                       = get_LIB_DIR()
+    # LIB_DIR: Path                     = get_LIB_DIR()
+    LIB_DIR: Path                       = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
     LIB_PIP_DIR: Path                   = LIB_DIR / 'pip'
     LIB_NPM_DIR: Path                   = LIB_DIR / 'npm'
     LIB_BROWSERS_DIR: Path              = LIB_DIR / 'browsers'

+ 5 - 4
archivebox/config/paths.py

@@ -5,9 +5,10 @@ import sys
 import tempfile
 import hashlib
 from pathlib import Path
-
 from functools import cache
+
 from platformdirs import PlatformDirs
+from rich import print
 
 from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
 
@@ -91,7 +92,7 @@ def get_LIB_DIR():
             lib_dir = HOST_DIRS.user_data_path
             lib_dir.mkdir(parents=True, exist_ok=True)
         
-        if not dir_is_writable(lib_dir):
+        if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
             if IS_ROOT:
                 # make sure lib dir is owned by the archivebox user, not root
                 with SudoPermission(uid=0):
@@ -130,7 +131,7 @@ def get_TMP_DIR():
             run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
             with SudoPermission(uid=0, fallback=True):
                 run_dir.mkdir(parents=True, exist_ok=True)
-            if not dir_is_writable(run_dir):
+            if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
                 if IS_ROOT:
                     with SudoPermission(uid=0, fallback=False):
                         if ARCHIVEBOX_USER == 0:
@@ -153,7 +154,7 @@ def get_TMP_DIR():
         with SudoPermission(uid=0, fallback=True):
             run_dir.mkdir(parents=True, exist_ok=True)
             
-        if not dir_is_writable(run_dir):
+        if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
             if IS_ROOT:
                 with SudoPermission(uid=0):
                     if ARCHIVEBOX_USER == 0:

+ 11 - 2
archivebox/main.py

@@ -450,6 +450,9 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
         json_index.rename(f"{index_name}.json")
     if os.access(html_index, os.F_OK):
         html_index.rename(f"{index_name}.html")
+    
+    CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
+    CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
 
     if install:
         run_subcommand('install', pwd=out_dir)
@@ -1004,14 +1007,20 @@ def install(out_dir: Path=DATA_DIR) -> None:
             print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
             if IS_ROOT:
                 with SudoPermission(uid=0):
-                    os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
+                    if ARCHIVEBOX_USER == 0:
+                        os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
+                    else:    
+                        os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
         except Exception as e:
             if IS_ROOT:
                 print(f'[yellow]:warning:  Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
                 with SudoPermission(uid=0):
                     try:
                         print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
-                        os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
+                        if ARCHIVEBOX_USER == 0:
+                            os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
+                        else:    
+                            os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
                     except Exception as e:
                         print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
             else:

+ 2 - 2
archivebox/misc/checks.py

@@ -109,8 +109,8 @@ def check_data_dir_permissions():
     data_owned_by_root = data_dir_uid == 0
     
     # data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
-    data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
-    data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
+    data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) if not IS_ROOT else False
+    data_not_writable = not (os.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK))     #  and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
     if data_owned_by_root:
         STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
     elif data_owner_doesnt_match or data_not_writable:

+ 20 - 1
archivebox/queues/settings.py

@@ -1,14 +1,33 @@
+import tempfile
 from pathlib import Path
 
 from archivebox.config import CONSTANTS
+from archivebox.config.paths import get_collection_id
 
 DATA_DIR = CONSTANTS.DATA_DIR
 LOGS_DIR = CONSTANTS.LOGS_DIR
 TMP_DIR = CONSTANTS.TMP_DIR
 
-Path.mkdir(TMP_DIR, exist_ok=True)
 SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
 PID_FILE = TMP_DIR / "supervisord.pid"
 SOCK_FILE = TMP_DIR / "supervisord.sock"
 LOG_FILE = TMP_DIR / "supervisord.log"
 WORKERS_DIR = TMP_DIR / "workers"
+
+
+def get_sock_file():
+    TMP_DIR.mkdir(parents=True, exist_ok=True)
+    
+    if len(str(SOCK_FILE)) > 100:
+        # socket absolute paths cannot be longer than 108 characters on some systems
+        # symlink it to a shorter path and use that instead
+        
+        # use tmpfile to atomically overwrite any existing symlink
+        symlink = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock.tmp"
+        symlink.unlink(missing_ok=True)
+        symlink.symlink_to(SOCK_FILE)
+        symlink.rename(str(symlink).replace('.sock.tmp', '.sock'))
+        assert len(str(symlink)) <= 100, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {symlink} (unix only allows 108 characters for socket paths)'
+        return symlink
+        
+    return SOCK_FILE

+ 5 - 6
archivebox/queues/supervisor_util.py

@@ -1,6 +1,5 @@
 __package__ = 'archivebox.queues'
 
-import os
 import time
 import signal
 import psutil
@@ -15,7 +14,7 @@ from xmlrpc.client import ServerProxy
 
 from archivebox.config.permissions import ARCHIVEBOX_USER
 
-from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
+from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
 
 from typing import Iterator
 
@@ -48,11 +47,11 @@ nocleanup = true
 user = {ARCHIVEBOX_USER}
 
 [unix_http_server]
-file = {TMP_DIR}/{SOCK_FILE.name}
+file = {get_sock_file()}
 chmod = 0700
 
 [supervisorctl]
-serverurl = unix://{TMP_DIR}/{SOCK_FILE.name}
+serverurl = unix://{get_sock_file()}
 
 [rpcinterface:supervisor]
 supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
@@ -81,12 +80,12 @@ def create_worker_config(daemon):
 
 def get_existing_supervisord_process():
     try:
-        transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}")
+        transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}")
         server = ServerProxy("http://localhost", transport=transport)
         current_state = cast(Dict[str, int | str], server.supervisor.getState())
         if current_state["statename"] == "RUNNING":
             pid = server.supervisor.getPID()
-            print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(SOCK_FILE).replace(str(TMP_DIR), 'tmp')}.")
+            print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(TMP_DIR), 'tmp')}.")
             return server.supervisor
     except FileNotFoundError:
         return None