Jelajahi Sumber

Merge branch 'dev' into side-fixes

Nick Sweeting 2 tahun lalu
induk
melakukan
89bdda85e9
12 mengubah file dengan 238 tambahan dan 925 penghapusan
  1. 1 0
      .dockerignore
  2. 13 6
      .github/workflows/docker.yml
  3. 12 12
      Dockerfile
  4. 8 3
      README.md
  5. 25 6
      archivebox/config.py
  6. 27 16
      archivebox/main.py
  7. 6 1
      bin/build_docker.sh
  8. 65 24
      bin/docker_entrypoint.sh
  9. 6 1
      bin/release_docker.sh
  10. 33 815
      pdm.lock
  11. 20 27
      pyproject.toml
  12. 22 14
      requirements.txt

+ 1 - 0
.dockerignore

@@ -16,6 +16,7 @@ venv/
 .docker-venv/
 node_modules/
 
+docs/
 build/
 dist/
 brew_dist/

+ 13 - 6
.github/workflows/docker.yml

@@ -11,8 +11,7 @@ on:
 
 env:
   DOCKER_IMAGE: archivebox-ci
-
-
+      
 jobs:
   buildx:
     runs-on: ubuntu-latest
@@ -60,13 +59,11 @@ jobs:
         uses: docker/metadata-action@v5
         with:
           images: archivebox/archivebox,nikisweeting/archivebox
-          flavor: |
-              latest=auto
           tags: |
               type=ref,event=branch
               type=semver,pattern={{version}}
               type=semver,pattern={{major}}.{{minor}}
-              type=sha
+              type=raw,value=latest,enable={{is_default_branch}}
       
       - name: Build and push
         id: docker_build
@@ -78,8 +75,18 @@ jobs:
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.docker_meta.outputs.tags }}
           cache-from: type=local,src=/tmp/.buildx-cache
-          cache-to: type=local,dest=/tmp/.buildx-cache
+          cache-to: type=local,dest=/tmp/.buildx-cache-new
           platforms: linux/amd64,linux/arm64,linux/arm/v7
 
       - name: Image digest
         run: echo ${{ steps.docker_build.outputs.digest }}
+       
+      # This ugly bit is necessary if you don't want your cache to grow forever
+      # until it hits GitHub's limit of 5GB.
+      # Temp fix
+      # https://github.com/docker/build-push-action/issues/252
+      # https://github.com/moby/buildkit/issues/1896
+      - name: Move cache
+        run: |
+          rm -rf /tmp/.buildx-cache
+          mv /tmp/.buildx-cache-new /tmp/.buildx-cache

+ 12 - 12
Dockerfile

@@ -73,7 +73,8 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
 RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
 
 # Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
-RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
+RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
+    && rm -f /etc/apt/apt.conf.d/docker-clean
 
 # Print debug info about build and save it to disk, for human eyes only, not used by anything else
 RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
@@ -123,7 +124,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
     echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
     && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
-    && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
+    && curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
     && apt-get update -qq \
     && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
         nodejs libatomic1 python3-minimal \
@@ -202,7 +203,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
     && chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
     # Save version info
     && ( \
-        which chromium-browser && /usr/bin/chromium-browser --version \
+        which chromium-browser && /usr/bin/chromium-browser --version || /usr/lib/chromium/chromium --version \
         && echo -e '\n\n' \
     ) | tee -a /VERSION.txt
 
@@ -246,15 +247,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
     echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
-    && apt-get update -qq \
+    # && apt-get update -qq \
     # install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
-    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
-        build-essential  \
+    # && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+    #     build-essential  \
     # INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
     && pip install -e "$CODE_DIR"[sonic,ldap] \
     # save docker image size and always remove compilers / build tools after building is complete
-    && apt-get purge -y build-essential \
-    && apt-get autoremove -y \
+    # && apt-get purge -y build-essential \
+    # && apt-get autoremove -y \
     && rm -rf /var/lib/apt/lists/*
 
 ####################################################
@@ -276,11 +277,10 @@ ENV IN_DOCKER=True
 
 # Print version for nice docker finish summary
 RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
-    && echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
-    && echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
-    && "$CODE_DIR/bin/docker_entrypoint.sh" \
-        archivebox version 2>&1 \
+    && echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})\n" \
+    && echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \
     ) | tee -a /VERSION.txt
+RUN "$CODE_DIR"/bin/docker_entrypoint.sh version 2>&1 | tee -a /VERSION.txt
 
 ####################################################
 

+ 8 - 3
README.md

@@ -1,5 +1,5 @@
 <div align="center">
-<em><img src="icon.png" height="90px"></em>
+<em><img src="https://archivebox.io/icon.png" height="90px"></em>
 <h1>ArchiveBox<br/><sub>Open-source self-hosted web archiving.</sub></h1>
 
 <br/>
@@ -33,9 +33,9 @@ curl -sSL 'https://get.archivebox.io' | sh    # (or see pip/brew/Docker instruct
 
 **ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline.**
 
-Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they cant save every type of content.
+Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they can't save every type of content.
 
-*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save sharable copies of browser bookmarks, preserve evidence for legal cases, backup photos on FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers in academic citations, and more...*
+*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save copies of browser bookmarks, preserve evidence for legal cases, backup photos from FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers & academic citations, and more...*
 
 > ➡️ *Use ArchiveBox as a [command-line package](#quickstart) and/or [self-hosted web app](#quickstart) on Linux, macOS, or in [Docker](#quickstart).*
 
@@ -320,6 +320,10 @@ See the <a href="https://github.com/ArchiveBox/pip-archivebox"><code>pip-archive
 <details>
 <summary><img src="https://user-images.githubusercontent.com/511499/118077361-f0616580-b381-11eb-973c-ee894a3349fb.png" alt="Arch" height="28px" align="top"/> <code>pacman</code> / <img src="https://user-images.githubusercontent.com/511499/118077946-29e6a080-b383-11eb-94f0-d4871da08c3f.png" alt="FreeBSD" height="28px" align="top"/> <code>pkg</code> / <img src="https://user-images.githubusercontent.com/511499/118077861-002d7980-b383-11eb-86a7-5936fad9190f.png" alt="Nix" height="28px" align="top"/> <code>nix</code> (Arch/FreeBSD/NixOS/more)</summary>
 <br/>
+
+> [!WARNING]  
+> *These are contributed by external volunteers and may lag behind the official `pip` channel.*
+
 <ul>
 <li>Arch: <a href="https://aur.archlinux.org/packages/archivebox/"><code>yay -S archivebox</code></a> (contributed by <a href="https://github.com/imlonghao"><code>@imlonghao</code></a>)</li>
 <li>FreeBSD: <a href="https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup"><code>curl -sSL 'https://get.archivebox.io' | sh</code></a> (uses <code>pkg</code> + <code>pip3</code> under-the-hood)</li>
@@ -366,6 +370,7 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, W
 <br/>
 Other providers of paid ArchiveBox hosting (not officially endorsed):<br/>
 <br/><br/>
+<li><a href="https://elest.io/open-source/archivebox"><img src="https://img.shields.io/badge/Managed_Hosting-Elest.io-%23193f7e.svg?style=flat" height="22px"/></a></li>
 <li><a href="https://www.stellarhosted.com/archivebox/"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-StellarHosted.com-%23193f7e.svg?style=flat" height="22px"/></a> (USD $29-250/mo, <a href="https://www.stellarhosted.com/archivebox/#pricing">pricing</a>)</li>
 <li><a href="https://www.pikapods.com/pods?run=archivebox"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-PikaPods.com-%2343a047.svg?style=flat" height="22px"/></a> (from USD $2.6/mo)</li>
 <li><a href="https://m.do.co/c/cbc4c0c17840">

+ 25 - 6
archivebox/config.py

@@ -391,7 +391,7 @@ def get_version(config):
 
     raise Exception('Failed to detect installed archivebox version!')
 
-def get_commit_hash(config):
+def get_commit_hash(config) -> Optional[str]:
     try:
         git_dir = config['PACKAGE_DIR'] / '../'
         ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
@@ -400,6 +400,14 @@ def get_commit_hash(config):
     except Exception:
         return None
 
+def get_build_time(config) -> str:
+    if config['IN_DOCKER']:
+        docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
+        return docker_build_end_time
+
+    src_last_modified_unix_timestamp = (config['PACKAGE_DIR'] / 'config.py').stat().st_mtime
+    return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
+
 ############################## Derived Config ##################################
 
 
@@ -426,8 +434,9 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'DIR_OUTPUT_PERMISSIONS':   {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
 
     'ARCHIVEBOX_BINARY':        {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
-    'VERSION':                  {'default': lambda c: get_version(c)},
+    'VERSION':                  {'default': lambda c: get_version(c).split('+', 1)[0]},
     'COMMIT_HASH':              {'default': lambda c: get_commit_hash(c)},
+    'BUILD_TIME':               {'default': lambda c: get_build_time(c)},
     
     'PYTHON_BINARY':            {'default': lambda c: sys.executable},
     'PYTHON_ENCODING':          {'default': lambda c: sys.stdout.encoding.upper()},
@@ -1113,14 +1122,25 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
 
 def check_system_config(config: ConfigDict=CONFIG) -> None:
     ### Check system environment
-    if config['USER'] == 'root':
+    if config['USER'] == 'root' or str(config['PUID']) == "0":
         stderr('[!] ArchiveBox should never be run as root!', color='red')
         stderr('    For more information, see the security overview documentation:')
         stderr('        https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
+        
+        if config['IN_DOCKER']:
+            attempted_command = ' '.join(sys.argv[:3])
+            stderr('')
+            stderr('    {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
+            stderr(f'        docker compose run archivebox {attempted_command}')
+            stderr(f'        docker compose exec --user=archivebox archivebox {attempted_command}')
+            stderr('        or')
+            stderr(f'        docker run -it -v ... -p ... archivebox/archivebox {attempted_command}')
+            stderr(f'        docker exec -it --user=archivebox <container id> /bin/bash')
+        
         raise SystemExit(2)
 
     ### Check Python environment
-    if sys.version_info[:3] < (3, 6, 0):
+    if sys.version_info[:3] < (3, 7, 0):
         stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
         stderr('    See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
         raise SystemExit(2)
@@ -1284,8 +1304,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
         with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
             command = ' '.join(sys.argv)
             ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
-            f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
-
+            f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
 
         if check_db:
             # Enable WAL mode in sqlite3

+ 27 - 16
archivebox/main.py

@@ -93,11 +93,14 @@ from .config import (
     SQL_INDEX_FILENAME,
     ALLOWED_IN_OUTPUT_DIR,
     SEARCH_BACKEND_ENGINE,
+    LDAP,
+    get_version,
     check_dependencies,
     check_data_folder,
     write_config_file,
     VERSION,
     COMMIT_HASH,
+    BUILD_TIME,
     CODE_LOCATIONS,
     EXTERNAL_LOCATIONS,
     DATA_LOCATIONS,
@@ -218,31 +221,39 @@ def version(quiet: bool=False,
     
     if not quiet:
         # 0.7.1
-        # ArchiveBox v0.7.1 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
-        # DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
+        # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
+        # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
+        # FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
+        # DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
         
         p = platform.uname()
         print(
-            'ArchiveBox v{}'.format(VERSION),
-            *((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
-            sys.implementation.name.title(),
-            p.system,
-            platform.platform(),
-            p.machine,
+            'ArchiveBox v{}'.format(get_version(CONFIG)),
+            *((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
+            f'BUILD_TIME={BUILD_TIME}',
         )
-        OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
         print(
-            f'DEBUG={DEBUG}',
             f'IN_DOCKER={IN_DOCKER}',
             f'IN_QEMU={IN_QEMU}',
-            f'IS_TTY={IS_TTY}',
-            f'TZ={TIMEZONE}',
-            #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
+            f'ARCH={p.machine}',
+            f'OS={p.system}',
+            f'PLATFORM={platform.platform()}',
+            f'PYTHON={sys.implementation.name.title()}',
+        )
+        OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
+        print(
             f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
             f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
             f'FS_USER={PUID}:{PGID}',
             f'FS_PERMS={OUTPUT_PERMISSIONS}',
+        )
+        print(
+            f'DEBUG={DEBUG}',
+            f'IS_TTY={IS_TTY}',
+            f'TZ={TIMEZONE}',
             f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
+            f'LDAP={LDAP}',
+            #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
         )
         print()
 
@@ -271,7 +282,7 @@ def version(quiet: bool=False,
                 print(printable_folder_status(name, path))
         else:
             print()
-            print('{white}[i] Data locations:{reset}'.format(**ANSI))
+            print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
 
         print()
         check_dependencies()
@@ -1005,9 +1016,9 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
 
     stderr('\n    Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
     if not NODE_VERSION:
-        stderr('[X] You must first install node using your system package manager', color='red')
+        stderr('[X] You must first install node & npm using your system package manager', color='red')
         hint([
-            'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
+            'https://github.com/nodesource/distributions#table-of-contents',
             'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
         ])
         raise SystemExit(1)

+ 6 - 1
bin/build_docker.sh

@@ -23,6 +23,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
 TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
 VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
 SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
+GIT_SHA=sha-"$(git rev-parse --short HEAD)"
 SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
 
 echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
@@ -85,12 +86,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
                -t archivebox/archivebox:$TAG_NAME \
                -t archivebox/archivebox:$VERSION \
                -t archivebox/archivebox:$SHORT_VERSION \
+               -t archivebox/archivebox:$GIT_SHA \
                -t archivebox/archivebox:latest \
                -t nikisweeting/archivebox \
                -t nikisweeting/archivebox:$TAG_NAME \
                -t nikisweeting/archivebox:$VERSION \
                -t nikisweeting/archivebox:$SHORT_VERSION \
+               -t nikisweeting/archivebox:$GIT_SHA \
                -t nikisweeting/archivebox:latest \
                -t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
                -t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-               -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
+               -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
+               -t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA \
+               -t ghcr.io/archivebox/archivebox/archivebox:latest

+ 65 - 24
bin/docker_entrypoint.sh

@@ -1,20 +1,55 @@
 #!/bin/bash
 
+# This Docker ENTRYPOINT script is called by `docker run archivebox ...` or `docker compose run archivebox ...`.
+# It takes a CMD as $* shell arguments and runs it following these setup steps:
+
+# - Set the archivebox user to use the correct PUID & PGID
+#     1. highest precedence is for valid PUID and PGID env vars passsed in explicitly
+#     2. fall back to DETECTED_PUID of files found within existing data dir
+#     3. fall back to DEFAULT_PUID if no data dir or its owned by root
+# - Create a new /data dir if necessary and set the correct ownership on it
+# - Create a new /browsers dir if necessary and set the correct ownership on it
+# - Check whether we're running inside QEMU emulation and show a warning if so.
+# - Drop down to archivebox user permisisons and execute passed CMD command.
+
+# Bash Environment Setup
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/
+# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
+# set -o xtrace
+# set -o nounset
+set -o errexit
+set -o errtrace
+set -o pipefail
+# IFS=$'\n'
+
+# Load global invariants (set by Dockerfile during image build time, not intended to be customized by users at runtime)
 export DATA_DIR="${DATA_DIR:-/data}"
 export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
 
-# default PUID and PGID if data dir is empty and no PUID+PGID is set
+# Global default PUID and PGID if data dir is empty and no intended PUID+PGID is set manually by user
 export DEFAULT_PUID=911
 export DEFAULT_PGID=911
 
-# if data directory already exists, autodetect detect owner by looking at files within
-export DETECTED_UID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
-export DETECTED_GID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
+# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
+if [[ "$PUID" == "0" ]] || [[ "$PGID" == "0" ]]; then
+    echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
+    echo -e "    Hint: some NFS/SMB/FUSE/etc. filesystems force-remap all permissions, leave PUID/PGID blank" > /dev/stderr
+    echo -e "          or set PUID/PGID to the same value as the user/group they remap to (e.g. $DEFAULT_PUID:$DEFAULT_PGID)." > /dev/stderr
+    echo -e "    https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
+    exit 3
+fi
+
+# If data directory already exists, autodetect detect owner by looking at files within
+export DETECTED_PUID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
+export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
+
+# If data directory exists but is owned by root, use defaults instead of root because root is not allowed
+[[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
+[[ "$DETECTED_PGID" == "0" ]] && export DETECTED_PGID="$DEFAULT_PGID"
 
-# Set the archivebox user to use the configured UID & GID
-# prefers PUID and PGID env vars passsed in explicitly, falls back to autodetected defaults
-usermod -o -u "${PUID:-$DETECTED_UID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
-groupmod -o -g "${PGID:-$DETECTED_GID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
+# Set archivebox user and group ids to desired PUID/PGID
+usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
+groupmod -o -g "${PGID:-$DETECTED_PGID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
 
 # re-set PUID and PGID to values reported by system instead of values we tried to set,
 # in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
@@ -29,12 +64,12 @@ if [[ -d "$DATA_DIR/archive" ]]; then
         # echo "[√] Permissions are correct"
     else
      # the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
-        echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
-        echo -e "    Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
-        echo -e "       \$ chown -R $PUID:$PGID ./data\n" >&2
-        echo -e "    Configure the PUID & PGID environment variables to change the desired owner:" >&2
-        echo -e "       https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
-        exit 1
+        echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir (currently owned by $(stat -c '%u' "$DATA_DIR"):$(stat -c '%g' "$DATA_DIR")." >&2
+        echo -e "    Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
+        echo -e "       \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
+        echo -e "    Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
+        echo -e "       https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
+        exit 3
     fi
 else
     # create data directory
@@ -46,29 +81,35 @@ fi
 chown $PUID:$PGID "$DATA_DIR"
 chown $PUID:$PGID "$DATA_DIR"/*
 
-# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome
+# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
 PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
+mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
 chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
-chown $PUID:$PGID "${PLAYWRIGHT_BROWSERS_PATH}/*"
+chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
+rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
+
 
 # (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
-export IN_QEMU="$(pmap 1 | grep qemu | wc -l | grep -E '^0$' >/dev/null && echo 'False' || echo 'True')"
-if [[ "$IN_QEMU" == 'True' ]]; then
-    echo -e "\n[!] Warning: Running $(uname -m) emulated container in QEMU, some things will break!" >&2
-    echo -e "    chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." >&2
-    echo -e "    See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" >&2
+export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
+if [[ "$IN_QEMU" == "True" ]]; then
+    echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr
+    echo -e "    chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." > /dev/stderr
+    echo -e "    See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
 fi
 
+
 # Drop permissions to run commands as the archivebox user
 if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
     # handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
-    # e.g. "docker run archivebox /venv/bin/archivebox-alt init"
+    # e.g. "docker run archivebox archivebox init:
+    #      "docker run archivebox /venv/bin/archivebox-alt init"
     #      "docker run archivebox /bin/bash -c '...'"
-    #      "docker run archivebox echo test"
+    #      "docker run archivebox cat /VERSION.txt"
     exec gosu "$PUID" bash -c "$*"
 else
     # handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
-    # e.g. "docker run archivebox add --depth=1 https://example.com"
+    # e.g. "docker run archivebox help"
+    #      "docker run archivebox add --depth=1 https://example.com"
     #      "docker run archivebox manage createsupseruser"
     #      "docker run archivebox server 0.0.0.0:8000"
     exec gosu "$PUID" bash -c "archivebox $*"

+ 6 - 1
bin/release_docker.sh

@@ -18,6 +18,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
 TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
 VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
 SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
+GIT_SHA=sha-"$(git rev-parse --short HEAD)"
 SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
 
 
@@ -34,12 +35,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --push . \
                -t archivebox/archivebox:$TAG_NAME \
                -t archivebox/archivebox:$VERSION \
                -t archivebox/archivebox:$SHORT_VERSION \
+               -t archivebox/archivebox:$GIT_SHA \
                -t archivebox/archivebox:latest \
                -t nikisweeting/archivebox \
                -t nikisweeting/archivebox:$TAG_NAME \
                -t nikisweeting/archivebox:$VERSION \
                -t nikisweeting/archivebox:$SHORT_VERSION \
+               -t nikisweeting/archivebox:$GIT_SHA \
                -t nikisweeting/archivebox:latest \
                -t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
                -t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-               -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
+               -t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
+               -t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA
+

File diff ditekan karena terlalu besar
+ 33 - 815
pdm.lock


+ 20 - 27
pyproject.toml

@@ -3,10 +3,9 @@ name = "archivebox"
 version = "0.7.1"
 description = "Self-hosted internet archiving solution."
 authors = [
-    {name = "Nick Sweeting", email = "setup.py@archivebox.io"},
+    {name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"},
 ]
 dependencies = [
-    # "setuptools>=68.2.2",
     "croniter>=0.3.34",
     "dateparser>=1.0.0",
     "django-extensions>=3.0.3",
@@ -16,8 +15,7 @@ dependencies = [
     "python-crontab>=2.5.1",
     "requests>=2.24.0",
     "w3lib>=1.22.0",
-    # "youtube-dl>=2021.04.17",
-    "yt-dlp>=2021.4.11",
+    "yt-dlp>=2023.10.13",
     # "playwright>=1.39.0; platform_machine != 'armv7l'",
 ]
 requires-python = ">=3.9"
@@ -41,6 +39,9 @@ classifiers = [
     "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Internet :: WWW/HTTP",
     "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
     "Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
@@ -56,34 +57,26 @@ classifiers = [
 # pdm lock -G:all
 # pdm install -G:all
 [tool.pdm.dev-dependencies]
-build = [
-    "setuptools",
+dev = [
+    # build
+    "setuptools>=68.2.2",
     "wheel",
     "pdm",
-    # "bottle",
-    # "stdeb",
-    # "twine",
-]
-lint = [
-    "flake8",
-    "mypy",
-    "django-stubs",
-]
-test = [
-    "pytest",
-]
-debug = [
-    "django-debug-toolbar",
-    "djdt_flamegraph",
-    "ipdb",
-]
-doc = [
+    "homebrew-pypi-poet>=0.10.0",
+    # docs
     "recommonmark",
     "sphinx",
     "sphinx-rtd-theme",
-]
-dev = [
-    "homebrew-pypi-poet>=0.10.0",
+    # debug
+    "django-debug-toolbar",
+    "djdt_flamegraph",
+    "ipdb",
+    # test
+    "pytest",
+    # lint
+    "flake8",
+    "mypy",
+    "django-stubs",
 ]
 
 [tool.pdm.scripts]

+ 22 - 14
requirements.txt

@@ -1,35 +1,41 @@
 # This file is @generated by PDM.
 # Please do not edit it manually.
 
-appnope==0.1.3
 asgiref==3.7.2
 asttokens==2.4.1
-brotli==1.1.0
-certifi==2023.7.22
+brotli==1.1.0; implementation_name == "cpython"
+brotlicffi==1.1.0.0; implementation_name != "cpython"
+certifi==2023.11.17
+cffi==1.16.0; implementation_name != "cpython"
 charset-normalizer==3.3.2
+colorama==0.4.6; sys_platform == "win32"
 croniter==2.0.1
-dateparser==1.1.8
+dateparser==1.2.0
 decorator==5.1.1
 django==3.1.14
+django-auth-ldap==4.1.0
 django-extensions==3.1.5
+exceptiongroup==1.2.0; python_version < "3.11"
 executing==2.0.1
-idna==3.4
-ipython==8.17.2
+idna==3.6
+ipython==8.18.1
 jedi==0.19.1
 matplotlib-inline==0.1.6
 mutagen==1.47.0
 mypy-extensions==1.0.0
 parso==0.8.3
-pexpect==4.8.0
-prompt-toolkit==3.0.40
-ptyprocess==0.7.0
+pexpect==4.9.0; sys_platform != "win32"
+prompt-toolkit==3.0.43
+ptyprocess==0.7.0; sys_platform != "win32"
 pure-eval==0.2.2
-pyasn1==0.5.0
+pyasn1==0.5.1
 pyasn1-modules==0.3.0
+pycparser==2.21; implementation_name != "cpython"
 pycryptodomex==3.19.0
-pygments==2.16.1
+pygments==2.17.2
 python-crontab==3.0.0
 python-dateutil==2.8.2
+python-ldap==3.4.4
 pytz==2023.3.post1
 regex==2023.10.3
 requests==2.31.0
@@ -37,10 +43,12 @@ six==1.16.0
 sonic-client==1.0.0
 sqlparse==0.4.4
 stack-data==0.6.3
-traitlets==5.13.0
+traitlets==5.14.0
+typing-extensions==4.9.0; python_version < "3.11"
+tzdata==2023.3; platform_system == "Windows"
 tzlocal==5.2
 urllib3==2.1.0
 w3lib==2.1.2
-wcwidth==0.2.10
+wcwidth==0.2.12
 websockets==12.0
-yt-dlp==2023.10.13
+yt-dlp==2023.11.16

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini