|
|
@@ -1,16 +1,12 @@
|
|
|
__package__ = 'archivebox.config'
|
|
|
|
|
|
import os
|
|
|
-import sys
|
|
|
-import tempfile
|
|
|
import hashlib
|
|
|
+import platform
|
|
|
from pathlib import Path
|
|
|
from functools import cache
|
|
|
|
|
|
-from platformdirs import PlatformDirs
|
|
|
-from rich import print
|
|
|
-
|
|
|
-from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
|
|
+from .permissions import SudoPermission
|
|
|
|
|
|
#############################################################################################
|
|
|
|
|
|
@@ -18,11 +14,15 @@ PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox sourc
|
|
|
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
|
|
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
|
|
|
|
|
+IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
|
|
+
|
|
|
+DATABASE_FILE = DATA_DIR / 'index.sqlite3'
|
|
|
+
|
|
|
#############################################################################################
|
|
|
|
|
|
@cache
|
|
|
-def get_collection_id(DATA_DIR=DATA_DIR):
|
|
|
- """Get a short, stable, unique ID for the current collection"""
|
|
|
+def get_collection_id(DATA_DIR=DATA_DIR) -> str:
|
|
|
+ """Get a short, stable, unique ID for the current collection (e.g. abc45678)"""
|
|
|
collection_id_file = DATA_DIR / '.archivebox_id'
|
|
|
|
|
|
try:
|
|
|
@@ -32,12 +32,42 @@ def get_collection_id(DATA_DIR=DATA_DIR):
|
|
|
|
|
|
hash_key = str(DATA_DIR.resolve()).encode()
|
|
|
collection_id = hashlib.sha256(hash_key).hexdigest()[:8]
|
|
|
+
|
|
|
try:
|
|
|
- collection_id_file.write_text(collection_id)
|
|
|
+ # only persist collection_id file if we already have an index.sqlite3 file present
|
|
|
+ # otherwise we might be running in a directory that is not a collection, no point creating cruft files
|
|
|
+ if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK):
|
|
|
+ collection_id_file.write_text(collection_id)
|
|
|
except (OSError, FileNotFoundError, PermissionError):
|
|
|
pass
|
|
|
return collection_id
|
|
|
|
|
|
+@cache
|
|
|
+def get_machine_id() -> str:
|
|
|
+ """Get a short, stable, unique ID for the current machine (e.g. abc45678)"""
|
|
|
+
|
|
|
+ MACHINE_ID = 'unknown'
|
|
|
+ try:
|
|
|
+ import machineid
|
|
|
+ MACHINE_ID = machineid.hashed_id('archivebox')[:8]
|
|
|
+ except Exception:
|
|
|
+ try:
|
|
|
+ import uuid
|
|
|
+ import hashlib
|
|
|
+ MACHINE_ID = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()[:8]
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ return MACHINE_ID
|
|
|
+
|
|
|
+@cache
|
|
|
+def get_machine_type() -> str:
|
|
|
+ """Get a short, stable, unique type identifier for the current machine (e.g. linux-x86_64-docker)"""
|
|
|
+
|
|
|
+ OS: str = platform.system().lower() # darwin, linux, etc.
|
|
|
+ ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
|
|
|
+ LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
|
|
|
+ return LIB_DIR_SCOPE
|
|
|
+
|
|
|
|
|
|
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
|
|
|
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
|
|
|
@@ -58,116 +88,116 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
|
|
|
|
|
|
|
|
|
|
|
|
-@cache
|
|
|
-def get_LIB_DIR():
|
|
|
- """
|
|
|
- - should be shared with other collections on the same host
|
|
|
- - must be scoped by CPU architecture, OS family, and archivebox version
|
|
|
- - should not be shared with other hosts/archivebox versions
|
|
|
- - must be writable by any archivebox user
|
|
|
- - should be persistent across reboots
|
|
|
- - can be on a docker bin mount but probably shouldnt be
|
|
|
- - ok to have a long path (doesnt contain SOCKETS)
|
|
|
- """
|
|
|
- from .version import detect_installed_version
|
|
|
+# @cache
|
|
|
+# def get_LIB_DIR():
|
|
|
+# """
|
|
|
+# - should be shared with other collections on the same host
|
|
|
+# - must be scoped by CPU architecture, OS family, and archivebox version
|
|
|
+# - should not be shared with other hosts/archivebox versions
|
|
|
+# - must be writable by any archivebox user
|
|
|
+# - should be persistent across reboots
|
|
|
+# - can be on a docker bin mount but probably shouldnt be
|
|
|
+# - ok to have a long path (doesnt contain SOCKETS)
|
|
|
+# """
|
|
|
+# from .version import detect_installed_version
|
|
|
|
|
|
- HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
|
|
+# HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
|
|
|
|
|
- lib_dir = tempfile.gettempdir()
|
|
|
- try:
|
|
|
- if 'SYSTEM_LIB_DIR' in os.environ:
|
|
|
- lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
|
|
|
- else:
|
|
|
- with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
|
|
|
- lib_dir = HOST_DIRS.site_data_path
|
|
|
+# lib_dir = tempfile.gettempdir()
|
|
|
+# try:
|
|
|
+# if 'SYSTEM_LIB_DIR' in os.environ:
|
|
|
+# lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
|
|
|
+# else:
|
|
|
+# with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
|
|
|
+# lib_dir = HOST_DIRS.site_data_path
|
|
|
|
|
|
- # Docker: /usr/local/share/archivebox/0.8.5
|
|
|
- # Ubuntu: /usr/local/share/archivebox/0.8.5
|
|
|
- # macOS: /Library/Application Support/archivebox
|
|
|
- try:
|
|
|
- with SudoPermission(uid=0, fallback=True):
|
|
|
- lib_dir.mkdir(parents=True, exist_ok=True)
|
|
|
- except PermissionError:
|
|
|
- # our user cannot
|
|
|
- lib_dir = HOST_DIRS.user_data_path
|
|
|
- lib_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+# # Docker: /usr/local/share/archivebox/0.8.5
|
|
|
+# # Ubuntu: /usr/local/share/archivebox/0.8.5
|
|
|
+# # macOS: /Library/Application Support/archivebox
|
|
|
+# try:
|
|
|
+# with SudoPermission(uid=0, fallback=True):
|
|
|
+# lib_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+# except PermissionError:
|
|
|
+# # our user cannot
|
|
|
+# lib_dir = HOST_DIRS.user_data_path
|
|
|
+# lib_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
- if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
|
|
|
- if IS_ROOT:
|
|
|
- # make sure lib dir is owned by the archivebox user, not root
|
|
|
- with SudoPermission(uid=0):
|
|
|
- if ARCHIVEBOX_USER == 0:
|
|
|
- # print(f'[yellow]:warning: Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
|
|
|
- os.system(f'chmod -R 777 "{lib_dir}"')
|
|
|
- else:
|
|
|
- os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
|
|
|
- else:
|
|
|
- raise PermissionError()
|
|
|
- except (PermissionError, AssertionError):
|
|
|
- # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
|
|
- print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
|
|
|
+# if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
|
|
|
+# if IS_ROOT:
|
|
|
+# # make sure lib dir is owned by the archivebox user, not root
|
|
|
+# with SudoPermission(uid=0):
|
|
|
+# if ARCHIVEBOX_USER == 0:
|
|
|
+# # print(f'[yellow]:warning: Waring: Creating SYSTEM_LIB_DIR {lib_dir} with mode 777 so that non-root archivebox users can share it.[/yellow] (caches shared libs used by archivebox for performance)', file=sys.stderr)
|
|
|
+# os.system(f'chmod -R 777 "{lib_dir}"')
|
|
|
+# else:
|
|
|
+# os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
|
|
|
+# else:
|
|
|
+# raise PermissionError()
|
|
|
+# except (PermissionError, AssertionError):
|
|
|
+# # raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
|
|
+# print(f'[red]:cross_mark: ERROR: SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
|
|
|
|
|
|
- return lib_dir
|
|
|
+# return lib_dir
|
|
|
|
|
|
-@cache
|
|
|
-def get_TMP_DIR():
|
|
|
- """
|
|
|
- - must NOT be inside DATA_DIR / inside a docker volume bind mount
|
|
|
- - must NOT have a long PATH (UNIX socket path length restrictions)
|
|
|
- - must NOT be shared with other collections/hosts
|
|
|
- - must be writable by archivebox user & root
|
|
|
- - must be cleared on every boot / not persisted
|
|
|
- - must be cleared on every archivebox version upgrade
|
|
|
- """
|
|
|
- from .version import detect_installed_version
|
|
|
+# @cache
|
|
|
+# def get_TMP_DIR():
|
|
|
+# """
|
|
|
+# - must NOT be inside DATA_DIR / inside a docker volume bind mount
|
|
|
+# - must NOT have a long PATH (UNIX socket path length restrictions)
|
|
|
+# - must NOT be shared with other collections/hosts
|
|
|
+# - must be writable by archivebox user & root
|
|
|
+# - must be cleared on every boot / not persisted
|
|
|
+# - must be cleared on every archivebox version upgrade
|
|
|
+# """
|
|
|
+# from .version import detect_installed_version
|
|
|
|
|
|
- HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
|
|
+# HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
|
|
|
|
|
- # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
|
|
|
- # print('RUNNING AS:', self.PUID, self.PGID)
|
|
|
- run_dir = tempfile.gettempdir()
|
|
|
- try:
|
|
|
- if 'SYSTEM_TMP_DIR' in os.environ:
|
|
|
- run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
|
|
|
- with SudoPermission(uid=0, fallback=True):
|
|
|
- run_dir.mkdir(parents=True, exist_ok=True)
|
|
|
- if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
|
|
|
- if IS_ROOT:
|
|
|
- with SudoPermission(uid=0, fallback=False):
|
|
|
- if ARCHIVEBOX_USER == 0:
|
|
|
- # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
|
|
|
- os.system(f'chmod -R 777 "{run_dir}"')
|
|
|
- else:
|
|
|
- os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
|
|
- else:
|
|
|
- raise PermissionError()
|
|
|
- assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
|
|
- return run_dir
|
|
|
+# # print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
|
|
|
+# # print('RUNNING AS:', self.PUID, self.PGID)
|
|
|
+# run_dir = tempfile.gettempdir()
|
|
|
+# try:
|
|
|
+# if 'SYSTEM_TMP_DIR' in os.environ:
|
|
|
+# run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
|
|
|
+# with SudoPermission(uid=0, fallback=True):
|
|
|
+# run_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+# if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
|
|
|
+# if IS_ROOT:
|
|
|
+# with SudoPermission(uid=0, fallback=False):
|
|
|
+# if ARCHIVEBOX_USER == 0:
|
|
|
+# # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
|
|
|
+# os.system(f'chmod -R 777 "{run_dir}"')
|
|
|
+# else:
|
|
|
+# os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
|
|
+# else:
|
|
|
+# raise PermissionError()
|
|
|
+# assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
|
|
+# return run_dir
|
|
|
|
|
|
- run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
|
|
|
- try:
|
|
|
- assert len(str(run_dir)) + len('/supervisord.sock') < 95
|
|
|
- except AssertionError:
|
|
|
- run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
|
|
|
- assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
|
|
+# run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
|
|
|
+# try:
|
|
|
+# assert len(str(run_dir)) + len('/supervisord.sock') < 95
|
|
|
+# except AssertionError:
|
|
|
+# run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
|
|
|
+# assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
|
|
|
|
|
- with SudoPermission(uid=0, fallback=True):
|
|
|
- run_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+# with SudoPermission(uid=0, fallback=True):
|
|
|
+# run_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
- if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
|
|
|
- if IS_ROOT:
|
|
|
- with SudoPermission(uid=0):
|
|
|
- if ARCHIVEBOX_USER == 0:
|
|
|
- # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
|
|
|
- os.system(f'chmod -R 777 "{run_dir}"')
|
|
|
- else:
|
|
|
- os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
|
|
- else:
|
|
|
- raise PermissionError()
|
|
|
+# if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
|
|
|
+# if IS_ROOT:
|
|
|
+# with SudoPermission(uid=0):
|
|
|
+# if ARCHIVEBOX_USER == 0:
|
|
|
+# # print(f'[yellow]:warning: Waring: Creating SYSTEM_TMP_DIR {run_dir} with mode 777 so that non-root archivebox users can access it.[/yellow]', file=sys.stderr)
|
|
|
+# os.system(f'chmod -R 777 "{run_dir}"')
|
|
|
+# else:
|
|
|
+# os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
|
|
+# else:
|
|
|
+# raise PermissionError()
|
|
|
|
|
|
- except (PermissionError, AssertionError):
|
|
|
- # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
|
|
- print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
|
|
|
+# except (PermissionError, AssertionError):
|
|
|
+# # raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
|
|
+# print(f'[red]:cross_mark: ERROR: SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/red]', file=sys.stderr)
|
|
|
|
|
|
- return run_dir
|
|
|
+# return run_dir
|
|
|
|