2
0
Эх сурвалжийг харах

move tmp dir to machine-id scoped dir

Nick Sweeting 1 жил өмнө
parent
commit
396a7ffcd8

+ 0 - 6
Dockerfile

@@ -74,12 +74,7 @@ ENV CODE_DIR=/app \
     PLAYWRIGHT_BROWSERS_PATH=/browsers
     # TODO: add TMP_DIR and LIB_DIR?
 
-# Application-level paths
-ENV APP_VENV=/app/.venv \
-    NODE_MODULES=/app/node_modules
-
 # Build shell config
-ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin"
 SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "errtrace", "-o", "nounset", "-c"] 
 
 ######### System Environment ####################################
@@ -99,7 +94,6 @@ RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
     && echo "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
     && echo "BUILD_START_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ} LANG=${LANG}" \
     && echo \
-    && echo "GLOBAL_VENV=${GLOBAL_VENV} APP_VENV=${APP_VENV} NODE_MODULES=${NODE_MODULES}" \
     && echo "PYTHON=${PYTHON_VERSION} NODE=${NODE_VERSION} PATH=${PATH}" \
     && echo "CODE_DIR=${CODE_DIR} DATA_DIR=${DATA_DIR}" \
     && echo \

+ 16 - 12
archivebox/__init__.py

@@ -1,12 +1,5 @@
 #!/usr/bin/env python3
-ASCII_LOGO = """
- █████╗ ██████╗  ██████╗██╗  ██╗██╗██╗   ██╗███████╗ ██████╗  ██████╗ ██╗  ██╗
-██╔══██╗██╔══██╗██╔════╝██║  ██║██║██║   ██║██╔════╝ ██╔══██╗██╔═══██╗╚██╗██╔╝
-███████║██████╔╝██║     ███████║██║██║   ██║█████╗   ██████╔╝██║   ██║ ╚███╔╝ 
-██╔══██║██╔══██╗██║     ██╔══██║██║╚██╗ ██╔╝██╔══╝   ██╔══██╗██║   ██║ ██╔██╗ 
-██║  ██║██║  ██║╚██████╗██║  ██║██║ ╚████╔╝ ███████╗ ██████╔╝╚██████╔╝██╔╝ ██╗
-╚═╝  ╚═╝╚═╝  ╚═╝ ╚═════╝╚═╝  ╚═╝╚═╝  ╚═══╝  ╚══════╝ ╚═════╝  ╚═════╝ ╚═╝  ╚═╝
-"""
+
 # Welcome to the ArchiveBox source code! Thanks for checking it out!
 #
 # "We are swimming upstream against a great torrent of disorganization.
@@ -23,14 +16,25 @@ import sys
 import tempfile
 from pathlib import Path
 
-USING_TMP_DATA_DIR = None
+ASCII_LOGO = """
+ █████╗ ██████╗  ██████╗██╗  ██╗██╗██╗   ██╗███████╗ ██████╗  ██████╗ ██╗  ██╗
+██╔══██╗██╔══██╗██╔════╝██║  ██║██║██║   ██║██╔════╝ ██╔══██╗██╔═══██╗╚██╗██╔╝
+███████║██████╔╝██║     ███████║██║██║   ██║█████╗   ██████╔╝██║   ██║ ╚███╔╝ 
+██╔══██║██╔══██╗██║     ██╔══██║██║╚██╗ ██╔╝██╔══╝   ██╔══██╗██║   ██║ ██╔██╗ 
+██║  ██║██║  ██║╚██████╗██║  ██║██║ ╚████╔╝ ███████╗ ██████╔╝╚██████╔╝██╔╝ ██╗
+╚═╝  ╚═╝╚═╝  ╚═╝ ╚═════╝╚═╝  ╚═╝╚═╝  ╚═══╝  ╚══════╝ ╚═════╝  ╚═════╝ ╚═╝  ╚═╝
+"""
+
+SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox'
+SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True)
+os.environ['SYSTEM_TMP_DIR'] = str(SYSTEM_TMP_DIR)
 
+# if we are outside a data dir, cd into an ephemeral tmp dir so that
+# we can run version/help without polluting cwd with an index.sqlite3
 if len(sys.argv) > 1 and sys.argv[1] in ('version', 'help'):
     current_dir = Path(os.getcwd()).resolve()
     if not (current_dir / 'index.sqlite3').exists():
-        USING_TMP_DATA_DIR = Path(tempfile.gettempdir()) / 'archivebox'
-        USING_TMP_DATA_DIR.mkdir(parents=True, exist_ok=True)
-        os.chdir(USING_TMP_DATA_DIR)
+        os.chdir(SYSTEM_TMP_DIR)
 
 # make sure PACKAGE_DIR is in sys.path so we can import all subfolders
 # without necessarily waiting for django to load them thorugh INSTALLED_APPS

+ 2 - 0
archivebox/__main__.py

@@ -2,7 +2,9 @@
 """This is the main entry point for the ArchiveBox CLI."""
 __package__ = 'archivebox'
 
+import archivebox      # noqa # make sure monkey patches are applied before anything else
 import sys
+
 from .cli import main
 
 ASCII_LOGO_MINI = r"""

+ 17 - 1
archivebox/config/constants.py

@@ -4,6 +4,8 @@ __package__ = 'archivebox.config'
 import os
 import re
 import platform
+import machineid
+import tempfile
 
 from typing import Dict
 from pathlib import Path
@@ -53,6 +55,17 @@ def _detect_installed_version(PACKAGE_DIR: Path):
 VERSION: str = _detect_installed_version(PACKAGE_DIR)
 
 
+# Ensure system tmp dir and data dir exist as we need them to run almost everything
+if 'SYSTEM_TMP_DIR' in os.environ:
+    SYSTEM_TMP_DIR = Path(os.environ['SYSTEM_TMP_DIR'])
+else:
+    SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox'
+    SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True)
+
+DATA_DIR_TMP_DIR = DATA_DIR / 'tmp' / machineid.hashed_id('archivebox')[:16]
+DATA_DIR_TMP_DIR.mkdir(parents=True, exist_ok=True)
+
+
 class ConstantsDict(Mapping):
     IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'yes')
     OS = platform.system().lower()      # darwin, linux, etc.
@@ -81,13 +94,16 @@ class ConstantsDict(Mapping):
     LIB_DIR_NAME: str = 'lib'
     TMP_DIR_NAME: str = 'tmp'
 
+    SYSTEM_TMP_DIR: Path                = SYSTEM_TMP_DIR
+    DATA_DIR_TMP_DIR: Path              = DATA_DIR_TMP_DIR
+
     ARCHIVE_DIR: Path                   = DATA_DIR / ARCHIVE_DIR_NAME
     SOURCES_DIR: Path                   = DATA_DIR / SOURCES_DIR_NAME
     PERSONAS_DIR: Path                  = DATA_DIR / PERSONAS_DIR_NAME
     CACHE_DIR: Path                     = DATA_DIR / CACHE_DIR_NAME
     LOGS_DIR: Path                      = DATA_DIR / LOGS_DIR_NAME
     LIB_DIR: Path                       = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE   # e.g. data/lib/arm64-darwin-docker
-    TMP_DIR: Path                       = (Path('/tmp') if IN_DOCKER else DATA_DIR) / TMP_DIR_NAME
+    TMP_DIR: Path                       = SYSTEM_TMP_DIR if IN_DOCKER else DATA_DIR_TMP_DIR  # e.g. /var/folders/bk/63jsns1s.../T/archivebox or ./data/tmp/abcwe324234
     CUSTOM_TEMPLATES_DIR: Path          = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
     USER_PLUGINS_DIR: Path              = DATA_DIR / USER_PLUGINS_DIR_NAME
 

+ 1 - 1
archivebox/vendor/pydantic-pkgr

@@ -1 +1 @@
-Subproject commit 0f610c2ab688d81711acec73c73d4286ba14d033
+Subproject commit 5bb42056bda9269e600885d83369b89f8dd916a5

+ 4 - 4
pdm.lock

@@ -5,7 +5,7 @@
 groups = ["default", "all", "ldap", "sonic"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:be2740879f6045b108b48e90997be10a6a670805e7682a0c86fc3cd1e98811c4"
+content_hash = "sha256:0aa8e150b08d1c571c1f8bfa844fdb3ad0995f9b01eb9199b6449ed0230acbd5"
 
 [[metadata.targets]]
 requires_python = "==3.12.*"
@@ -1158,7 +1158,7 @@ files = [
 
 [[package]]
 name = "pydantic-pkgr"
-version = "0.3.9"
+version = "0.4.2"
 requires_python = ">=3.10"
 summary = "System package manager APIs in strongly typed Python"
 groups = ["default"]
@@ -1169,8 +1169,8 @@ dependencies = [
     "typing-extensions>=4.11.0",
 ]
 files = [
-    {file = "pydantic_pkgr-0.3.9-py3-none-any.whl", hash = "sha256:8cdece5142b79127cdf96baf84cefdc0b811297e3b0e13526f88e10019ae60c1"},
-    {file = "pydantic_pkgr-0.3.9.tar.gz", hash = "sha256:f811600e9222b98b7d52df27375cab92ccfa702020f80a46076c4e5eeb099dc4"},
+    {file = "pydantic_pkgr-0.4.2-py3-none-any.whl", hash = "sha256:b78e421a58c1777098792236ed6da50b70167e18579c6e4353309ab121972f7b"},
+    {file = "pydantic_pkgr-0.4.2.tar.gz", hash = "sha256:879654052a22122484bebd2616c4ade6887f2f6fb3afae397937a5bb23473f76"},
 ]
 
 [[package]]

+ 1 - 1
pyproject.toml

@@ -83,7 +83,7 @@ dependencies = [
     ############# VENDORED LIBS ######################
     # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian)
     # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py
-    "pydantic-pkgr>=0.3.9",
+    "pydantic-pkgr>=0.4.2",
     "atomicwrites==1.4.1",
     "pocket@git+https://github.com/tapanpandita/[email protected]",
     "django-taggit==1.3.0",

+ 1 - 1
requirements.txt

@@ -77,7 +77,7 @@ pycparser==2.22; platform_python_implementation != "PyPy" and python_version ==
 pycryptodomex==3.21.0; python_version == "3.12"
 pydantic==2.9.2; python_version == "3.12"
 pydantic-core==2.23.4; python_version == "3.12"
-pydantic-pkgr==0.3.9; python_version == "3.12"
+pydantic-pkgr==0.4.2; python_version == "3.12"
 pydantic-settings==2.5.2; python_version == "3.12"
 pygments==2.18.0; python_version == "3.12"
 pyopenssl==24.2.1; python_version == "3.12"