浏览代码

remove platformdirs dependency

Nick Sweeting 1 年之前
父节点
当前提交
9f274cf9f4
共有 4 个文件被更改,包括 151 次插入124 次删除
  1. 9 11
      archivebox/config/constants.py
  2. 138 108
      archivebox/config/paths.py
  3. 3 3
      archivebox/misc/checks.py
  4. 1 2
      pyproject.toml

+ 9 - 11
archivebox/config/constants.py

@@ -16,9 +16,9 @@ from .paths import (
     PACKAGE_DIR,
     DATA_DIR,
     ARCHIVE_DIR,
-    # get_collection_id,
-    # get_LIB_DIR,
-    # get_TMP_DIR,
+    get_collection_id,
+    get_machine_id,
+    get_machine_type,
 )
 from .permissions import (
     IS_ROOT,
@@ -39,14 +39,14 @@ class ConstantsDict(Mapping):
     PACKAGE_DIR: Path                   = PACKAGE_DIR
     DATA_DIR: Path                      = DATA_DIR
     ARCHIVE_DIR: Path                   = ARCHIVE_DIR
-    # COLLECTION_ID: str                  = get_collection_id(DATA_DIR)
+    
+    MACHINE_TYPE: str                   = get_machine_type()
+    MACHINE_ID: str                     = get_machine_id()
+    COLLECTION_ID: str                  = get_collection_id(DATA_DIR)
     
     # Host system
     VERSION: str                        = detect_installed_version(PACKAGE_DIR)
-    OS: str                             = platform.system().lower()    # darwin, linux, etc.
-    ARCH: str                           = platform.machine().lower()   # arm64, x86_64, aarch64, etc.
     IN_DOCKER: bool                     = IN_DOCKER
-    LIB_DIR_SCOPE: str                  = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
     
     # Permissions
     IS_ROOT: bool                       = IS_ROOT
@@ -96,11 +96,9 @@ class ConstantsDict(Mapping):
     
     # Runtime dirs
     TMP_DIR_NAME: str                   = 'tmp'
-    # TMP_DIR: Path                     = get_TMP_DIR()
-    TMP_DIR: Path                       = DATA_DIR / TMP_DIR_NAME
+    TMP_DIR: Path                       = DATA_DIR / TMP_DIR_NAME / MACHINE_ID
     LIB_DIR_NAME: str                   = 'lib'
-    # LIB_DIR: Path                     = get_LIB_DIR()
-    LIB_DIR: Path                       = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
+    LIB_DIR: Path                       = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE
     LIB_PIP_DIR: Path                   = LIB_DIR / 'pip'
     LIB_NPM_DIR: Path                   = LIB_DIR / 'npm'
     LIB_BROWSERS_DIR: Path              = LIB_DIR / 'browsers'

+ 138 - 108
archivebox/config/paths.py

@@ -1,16 +1,12 @@
 __package__ = 'archivebox.config'
 
 import os
-import sys
-import tempfile
 import hashlib
+import platform
 from pathlib import Path
 from functools import cache
 
-from platformdirs import PlatformDirs
-from rich import print
-
-from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+from .permissions import SudoPermission
 
 #############################################################################################
 
@@ -18,11 +14,15 @@ PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent    # archivebox sourc
 DATA_DIR: Path = Path(os.getcwd()).resolve()                  # archivebox user data dir
 ARCHIVE_DIR: Path = DATA_DIR / 'archive'                      # archivebox snapshot data dir
 
+IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+
+DATABASE_FILE = DATA_DIR / 'index.sqlite3'
+
 #############################################################################################
 
 @cache
-def get_collection_id(DATA_DIR=DATA_DIR):
-    """Get a short, stable, unique ID for the current collection"""
+def get_collection_id(DATA_DIR=DATA_DIR) -> str:
+    """Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
     collection_id_file = DATA_DIR / '.archivebox_id'
     
     try:
@@ -32,12 +32,42 @@ def get_collection_id(DATA_DIR=DATA_DIR):
     
     hash_key = str(DATA_DIR.resolve()).encode()
     collection_id = hashlib.sha256(hash_key).hexdigest()[:8]
+    
     try:
-        collection_id_file.write_text(collection_id)
+        # only persist collection_id file if we already have an index.sqlite3 file present
+        # otherwise we might be running in a directory that is not a collection, no point creating cruft files
+        if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK):
+            collection_id_file.write_text(collection_id)
     except (OSError, FileNotFoundError, PermissionError):
         pass
     return collection_id
 
+@cache
+def get_machine_id() -> str:
+    """Get a short, stable, unique ID for the current machine (e.g. abc45678)"""
+    
+    MACHINE_ID = 'unknown'
+    try:
+        import machineid
+        MACHINE_ID = machineid.hashed_id('archivebox')[:8]
+    except Exception:
+        try:
+            import uuid
+            import hashlib
+            MACHINE_ID = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()[:8]
+        except Exception:
+            pass
+    return MACHINE_ID
+
+@cache
+def get_machine_type() -> str:
+    """Get a short, stable, unique type identifier for the current machine (e.g. linux-x86_64-docker)"""
+    
+    OS: str                             = platform.system().lower()    # darwin, linux, etc.
+    ARCH: str                           = platform.machine().lower()   # arm64, x86_64, aarch64, etc.
+    LIB_DIR_SCOPE: str                  = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
+    return LIB_DIR_SCOPE
+
 
 def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
     """Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
@@ -58,116 +88,116 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
 
 
 
-@cache
-def get_LIB_DIR():
-    """
-    - should be shared with other collections on the same host
-    - must be scoped by CPU architecture, OS family, and archivebox version
-    - should not be shared with other hosts/archivebox versions
-    - must be writable by any archivebox user
-    - should be persistent across reboots
-    - can be on a docker bin mount but probably shouldnt be
-    - ok to have a long path (doesnt contain SOCKETS)
-    """
-    from .version import detect_installed_version
+# @cache
+# def get_LIB_DIR():
+#     """
+#     - should be shared with other collections on the same host
+#     - must be scoped by CPU architecture, OS family, and archivebox version
+#     - should not be shared with other hosts/archivebox versions
+#     - must be writable by any archivebox user
+#     - should be persistent across reboots
+#     - can be on a docker bin mount but probably shouldnt be
+#     - ok to have a long path (doesnt contain SOCKETS)
+#     """
+#     from .version import detect_installed_version
     
-    HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
+#     HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
     
-    lib_dir = tempfile.gettempdir()
-    try:
-        if 'SYSTEM_LIB_DIR' in os.environ:
-            lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
-        else:
-            with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
-                lib_dir = HOST_DIRS.site_data_path
+#     lib_dir = tempfile.gettempdir()
+#     try:
+#         if 'SYSTEM_LIB_DIR' in os.environ:
+#             lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
+#         else:
+#             with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
+#                 lib_dir = HOST_DIRS.site_data_path
         
-        # Docker: /usr/local/share/archivebox/0.8.5
-        # Ubuntu: /usr/local/share/archivebox/0.8.5
-        # macOS: /Library/Application Support/archivebox
-        try:
-            with SudoPermission(uid=0, fallback=True):
-                lib_dir.mkdir(parents=True, exist_ok=True)
-        except PermissionError:
-            # our user cannot 
-            lib_dir = HOST_DIRS.user_data_path
-            lib_dir.mkdir(parents=True, exist_ok=True)
+#         # Docker: /usr/local/share/archivebox/0.8.5
+#         # Ubuntu: /usr/local/share/archivebox/0.8.5
+#         # macOS: /Library/Application Support/archivebox
+#         try:
+#             with SudoPermission(uid=0, fallback=True):
+#                 lib_dir.mkdir(parents=True, exist_ok=True)
+#         except PermissionError:
+#             # our user cannot 
+#             lib_dir = HOST_DIRS.user_data_path
+#             lib_dir.mkdir(parents=True, exist_ok=True)
         
-        if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
-            if IS_ROOT:
-                # make sure lib dir is owned by the archivebox user, not root
-                with SudoPermission(uid=0):
-                    if ARCHIVEBOX_USER == 0:
-                        # print(f'[yellow]:warning:  Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
-                        os.system(f'chmod -R 777 "{lib_dir}"')
-                    else:
-                        os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
-            else:
-                raise PermissionError()
-    except (PermissionError, AssertionError):
-        # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
-        print(f'[red]:cross_mark:  ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
+#         if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
+#             if IS_ROOT:
+#                 # make sure lib dir is owned by the archivebox user, not root
+#                 with SudoPermission(uid=0):
+#                     if ARCHIVEBOX_USER == 0:
+#                         # print(f'[yellow]:warning:  Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
+#                         os.system(f'chmod -R 777 "{lib_dir}"')
+#                     else:
+#                         os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
+#             else:
+#                 raise PermissionError()
+#     except (PermissionError, AssertionError):
+#         # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
+#         print(f'[red]:cross_mark:  ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
         
-    return lib_dir
+#     return lib_dir
     
-@cache
-def get_TMP_DIR():
-    """
-    - must NOT be inside DATA_DIR / inside a docker volume bind mount
-    - must NOT have a long PATH (UNIX socket path length restrictions)
-    - must NOT be shared with other collections/hosts
-    - must be writable by archivebox user & root
-    - must be cleared on every boot / not persisted
-    - must be cleared on every archivebox version upgrade
-    """
-    from .version import detect_installed_version
+# @cache
+# def get_TMP_DIR():
+#     """
+#     - must NOT be inside DATA_DIR / inside a docker volume bind mount
+#     - must NOT have a long PATH (UNIX socket path length restrictions)
+#     - must NOT be shared with other collections/hosts
+#     - must be writable by archivebox user & root
+#     - must be cleared on every boot / not persisted
+#     - must be cleared on every archivebox version upgrade
+#     """
+#     from .version import detect_installed_version
     
-    HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
+#     HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
     
-    # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
-    # print('RUNNING AS:', self.PUID, self.PGID)
-    run_dir = tempfile.gettempdir()
-    try:
-        if 'SYSTEM_TMP_DIR' in os.environ:
-            run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
-            with SudoPermission(uid=0, fallback=True):
-                run_dir.mkdir(parents=True, exist_ok=True)
-            if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
-                if IS_ROOT:
-                    with SudoPermission(uid=0, fallback=False):
-                        if ARCHIVEBOX_USER == 0:
-                            # print(f'[yellow]:warning:  Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
-                            os.system(f'chmod -R 777 "{run_dir}"')
-                        else:
-                            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
-                else:
-                    raise PermissionError()
-            assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
-            return run_dir
+#     # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
+#     # print('RUNNING AS:', self.PUID, self.PGID)
+#     run_dir = tempfile.gettempdir()
+#     try:
+#         if 'SYSTEM_TMP_DIR' in os.environ:
+#             run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
+#             with SudoPermission(uid=0, fallback=True):
+#                 run_dir.mkdir(parents=True, exist_ok=True)
+#             if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
+#                 if IS_ROOT:
+#                     with SudoPermission(uid=0, fallback=False):
+#                         if ARCHIVEBOX_USER == 0:
+#                             # print(f'[yellow]:warning:  Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
+#                             os.system(f'chmod -R 777 "{run_dir}"')
+#                         else:
+#                             os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
+#                 else:
+#                     raise PermissionError()
+#             assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
+#             return run_dir
         
-        run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
-        try:
-            assert len(str(run_dir)) + len('/supervisord.sock') < 95
-        except AssertionError:
-            run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
-            assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
+#         run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
+#         try:
+#             assert len(str(run_dir)) + len('/supervisord.sock') < 95
+#         except AssertionError:
+#             run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
+#             assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
         
-        with SudoPermission(uid=0, fallback=True):
-            run_dir.mkdir(parents=True, exist_ok=True)
+#         with SudoPermission(uid=0, fallback=True):
+#             run_dir.mkdir(parents=True, exist_ok=True)
             
-        if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
-            if IS_ROOT:
-                with SudoPermission(uid=0):
-                    if ARCHIVEBOX_USER == 0:
-                        # print(f'[yellow]:warning:  Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
-                        os.system(f'chmod -R 777 "{run_dir}"')
-                    else:
-                        os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
-            else:
-                raise PermissionError()
+#         if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
+#             if IS_ROOT:
+#                 with SudoPermission(uid=0):
+#                     if ARCHIVEBOX_USER == 0:
+#                         # print(f'[yellow]:warning:  Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
+#                         os.system(f'chmod -R 777 "{run_dir}"')
+#                     else:
+#                         os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
+#             else:
+#                 raise PermissionError()
             
-    except (PermissionError, AssertionError):
-        # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
-        print(f'[red]:cross_mark:  ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
+#     except (PermissionError, AssertionError):
+#         # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
+#         print(f'[red]:cross_mark:  ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
         
-    return run_dir
+#     return run_dir
 

+ 3 - 3
archivebox/misc/checks.py

@@ -100,7 +100,7 @@ def check_not_root():
 
 
 def check_data_dir_permissions():
-    from archivebox import DATA_DIR, CONSTANTS
+    from archivebox import DATA_DIR
     from archivebox.misc.logging import STDERR
     from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
     
@@ -119,8 +119,8 @@ def check_data_dir_permissions():
     if data_owned_by_root or data_owner_doesnt_match or data_not_writable:
         STDERR.print(f'[violet]Hint:[/violet] Change the current ownership [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to a non-user & group that will run ArchiveBox, e.g.:')
         STDERR.print(f'    [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
-        STDERR.print(f'    [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
-        STDERR.print(f'    [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
+        # STDERR.print(f'    [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
+        # STDERR.print(f'    [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
         STDERR.print()
         STDERR.print('[blue]More info:[/blue]')
         STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')

+ 1 - 2
pyproject.toml

@@ -1,6 +1,6 @@
 [project]
 name = "archivebox"
-version = "0.8.5rc18"
+version = "0.8.5rc19"
 requires-python = ">=3.10"
 description = "Self-hosted internet archiving solution."
 authors = [{name = "Nick Sweeting", email = "[email protected]"}]
@@ -77,7 +77,6 @@ dependencies = [
     "atomicwrites==1.4.1",
     "django-taggit==1.3.0",
     "base32-crockford==0.3.0",
-    "platformdirs>=4.3.6",
     # "pocket@git+https://github.com/tapanpandita/[email protected]",
     "pydantic-pkgr>=0.4.13",
     ############# Plugin Dependencies ################