Browse Source

properly handle chowning DATA_DIR on init when using sudo

Nick Sweeting 1 year ago
parent
commit
ad675a8e7c

+ 13 - 1
archivebox/config/legacy.py

@@ -570,6 +570,18 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
         output_dir = out_dir or CONSTANTS.DATA_DIR
 
         assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path)
+        
+        from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
+        from archivebox.config.paths import _get_collection_id
+    
+        # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user
+        if IS_ROOT:
+            with SudoPermission(uid=0):
+                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"')
+        _get_collection_id(DATA_DIR=CONSTANTS.DATA_DIR, force_create=True)
+        if IS_ROOT:
+            with SudoPermission(uid=0):
+                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/*')
 
         bump_startup_progress_bar()
         try:
@@ -596,7 +608,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
                 except Exception as e:
                     bump_startup_progress_bar(advance=1000)
                     
-                    is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
+                    is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init'))
                     if not is_using_meta_cmd:
                         # show error message to user only if they're not running a meta command / just trying to get help
                         STDERR.print()

+ 7 - 4
archivebox/config/paths.py

@@ -21,9 +21,7 @@ DATABASE_FILE = DATA_DIR / 'index.sqlite3'
 
 #############################################################################################
 
-@cache
-def get_collection_id(DATA_DIR=DATA_DIR) -> str:
-    """Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
+def _get_collection_id(DATA_DIR=DATA_DIR, force_create=False) -> str:
     collection_id_file = DATA_DIR / '.archivebox_id'
     
     try:
@@ -43,7 +41,7 @@ def get_collection_id(DATA_DIR=DATA_DIR) -> str:
     try:
         # only persist collection_id file if we already have an index.sqlite3 file present
         # otherwise we might be running in a directory that is not a collection, no point creating cruft files
-        if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK):
+        if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK) or force_create:
             collection_id_file.write_text(collection_id)
             
             # if we're running as root right now, make sure the collection_id file is owned by the archivebox user
@@ -57,6 +55,11 @@ def get_collection_id(DATA_DIR=DATA_DIR) -> str:
         pass
     return collection_id
 
+@cache
+def get_collection_id(DATA_DIR=DATA_DIR) -> str:
+    """Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
+    return _get_collection_id(DATA_DIR=DATA_DIR)
+
 @cache
 def get_machine_id() -> str:
     """Get a short, stable, unique ID for the current machine (e.g. abc45678)"""

+ 18 - 9
archivebox/main.py

@@ -324,16 +324,25 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     
     from core.models import Snapshot
     from rich import print
+    
+    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+    from archivebox.config.paths import _get_collection_id
 
-    out_dir.mkdir(exist_ok=True)
-    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
-
-    if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
-        print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
-        print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
-
-    existing_index = os.access(CONSTANTS.DATABASE_FILE, os.F_OK)
+    # if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user
+    if IS_ROOT:
+        with SudoPermission(uid=0):
+            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"')
+    _get_collection_id()
+    if IS_ROOT:
+        with SudoPermission(uid=0):
+            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/*')
+    
+    # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
+    #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
+    #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
 
+    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
+    existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
     if is_empty and not existing_index:
         print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
         print('[green]----------------------------------------------------------------------[/green]')
@@ -376,7 +385,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     for migration_line in apply_migrations(out_dir):
         sys.stdout.write(f'    {migration_line}\n')
 
-    assert os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
+    assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
     print()
     print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
     

+ 5 - 2
archivebox/plugins_auth/ldap/apps.py

@@ -19,13 +19,16 @@ from .settings import LDAP_CONFIG, get_ldap_lib
 
 ###################### Config ##########################
 
-def get_LDAP_LIB_path(paths):
+def get_LDAP_LIB_path(paths=()):
     LDAP_LIB = get_ldap_lib()[0]
     if not LDAP_LIB:
         return None
     
     # check that LDAP_LIB path is in one of the specified site packages dirs
     lib_path = Path(inspect.getfile(LDAP_LIB))
+    if not paths:
+        return lib_path
+    
     for site_packges_dir in paths:
         if str(lib_path.parent.parent.resolve()) == str(Path(site_packges_dir).resolve()):
             return lib_path
@@ -57,7 +60,7 @@ class LdapBinary(BaseBinary):
             "packages": lambda: ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
         },
         apt.name: {
-            "abspath": lambda: get_LDAP_LIB_path((*USER_SITE_PACKAGES, *SYS_SITE_PACKAGES)),
+            "abspath": lambda: get_LDAP_LIB_path(),
             "version": lambda: get_LDAP_LIB_version(),
             "packages": lambda: ['libssl-dev', 'libldap2-dev', 'libsasl2-dev', 'python3-ldap', 'python3-msgpack', 'python3-mutagen'],
         },

+ 1 - 1
archivebox/vendor/pydantic-pkgr

@@ -1 +1 @@
-Subproject commit ec4c2d5f5a034ea6c10a5337c3115fbe1504f52b
+Subproject commit e2f6b10550f41e64817908eef3feb0aa33071969

+ 1 - 1
pyproject.toml

@@ -1,6 +1,6 @@
 [project]
 name = "archivebox"
-version = "0.8.5rc28"
+version = "0.8.5rc31"
 requires-python = ">=3.10"
 description = "Self-hosted internet archiving solution."
 authors = [{name = "Nick Sweeting", email = "[email protected]"}]

+ 1 - 1
uv.lock

@@ -41,7 +41,7 @@ wheels = [
 
 [[package]]
 name = "archivebox"
-version = "0.8.5rc28"
+version = "0.8.5rc31"
 source = { editable = "." }
 dependencies = [
     { name = "atomicwrites" },