|
@@ -1,84 +1,77 @@
|
|
|
-# This Dockerfile for ArchiveBox installs the following in a container:
|
|
|
|
|
-# - curl, wget, python3, youtube-dl, google-chrome-beta
|
|
|
|
|
-# - ArchiveBox
|
|
|
|
|
|
|
+# This is the Dockerfile for ArchiveBox, it includes the following major pieces:
|
|
|
|
|
+# git, curl, wget, python3, youtube-dl, google-chrome-stable, ArchiveBox
|
|
|
# Usage:
|
|
# Usage:
|
|
|
-# docker build github.com/pirate/ArchiveBox -t archivebox
|
|
|
|
|
-# echo 'https://example.com' | docker run -i --mount type=bind,source=./data,target=/data archivebox /bin/archive
|
|
|
|
|
-# docker run --mount type=bind,source=./data,target=/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
|
|
|
|
|
|
|
+# docker build . -t archivebox:latest
|
|
|
|
|
+# docker run -v=./data:/data archivebox:latest init
|
|
|
|
|
+# docker run -v=./data:/data archivebox:latest add 'https://example.com'
|
|
|
# Documentation:
|
|
# Documentation:
|
|
|
# https://github.com/pirate/ArchiveBox/wiki/Docker#docker
|
|
# https://github.com/pirate/ArchiveBox/wiki/Docker#docker
|
|
|
|
|
|
|
|
-FROM node:13-slim
|
|
|
|
|
-LABEL maintainer="Nick Sweeting <[email protected]>"
|
|
|
|
|
|
|
+FROM python:3.8-slim-buster
|
|
|
|
|
+LABEL name="archivebox" \
|
|
|
|
|
+ maintainer="Nick Sweeting <[email protected]>" \
|
|
|
|
|
+ version="0.4.3" \
|
|
|
|
|
+ description="All-in-one personal internet archiving container"
|
|
|
|
|
|
|
|
-RUN apt-get update \
|
|
|
|
|
- && apt-get install -yq --no-install-recommends \
|
|
|
|
|
- jq git zlib1g-dev wget curl youtube-dl gnupg2 libgconf-2-4 python3 python3-pip \
|
|
|
|
|
- && rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
+ENV LANG=C.UTF-8 \
|
|
|
|
|
+ LANGUAGE=en_US:en \
|
|
|
|
|
+ LC_ALL=C.UTF-8 \
|
|
|
|
|
+ PYTHONIOENCODING=UTF-8 \
|
|
|
|
|
+ PYTHONUNBUFFERED=1 \
|
|
|
|
|
+ APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
|
|
|
|
+ CODE_PATH=/app \
|
|
|
|
|
+ VENV_PATH=/venv \
|
|
|
|
|
+ DATA_PATH=/data
|
|
|
|
|
|
|
|
# Install latest chrome package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others)
|
|
# Install latest chrome package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others)
|
|
|
-RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
|
|
|
|
- && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
|
|
|
|
|
|
|
+RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
|
|
|
|
|
+ && apt-get update -qq \
|
|
|
|
|
+ && apt-get install -qq -y --no-install-recommends \
|
|
|
|
|
+ apt-transport-https ca-certificates apt-utils gnupg gnupg2 libgconf-2-4 zlib1g-dev dumb-init \
|
|
|
|
|
+ wget curl youtube-dl jq git ffmpeg avconv \
|
|
|
|
|
+ && curl -sSL https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
|
|
|
|
+ && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
|
|
&& apt-get update \
|
|
&& apt-get update \
|
|
|
- && apt-get install -y google-chrome-beta fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst ttf-freefont \
|
|
|
|
|
- --no-install-recommends \
|
|
|
|
|
- && rm -rf /var/lib/apt/lists/* \
|
|
|
|
|
- && rm -rf /src/*.deb
|
|
|
|
|
-
|
|
|
|
|
-# It's a good idea to use dumb-init to help prevent zombie chrome processes.
|
|
|
|
|
-ADD https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64 /usr/local/bin/dumb-init
|
|
|
|
|
-RUN chmod +x /usr/local/bin/dumb-init
|
|
|
|
|
-
|
|
|
|
|
-# Uncomment to skip the chromium download when installing puppeteer. If you do,
|
|
|
|
|
-# you'll need to launch puppeteer with:
|
|
|
|
|
-# browser.launch({executablePath: 'google-chrome-beta'})
|
|
|
|
|
-ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
|
|
|
|
|
-
|
|
|
|
|
-# Install puppeteer so it's available in the container.
|
|
|
|
|
-RUN npm install puppeteer
|
|
|
|
|
-
|
|
|
|
|
-# Add user so we don't need --no-sandbox.
|
|
|
|
|
-RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
|
|
|
|
|
- && mkdir -p /home/pptruser/Downloads \
|
|
|
|
|
- && chown -R pptruser:pptruser /home/pptruser \
|
|
|
|
|
- && chown -R pptruser:pptruser /node_modules
|
|
|
|
|
|
|
+ && apt-get install -qq -y --no-install-recommends \
|
|
|
|
|
+ google-chrome-stable \
|
|
|
|
|
+ fontconfig \
|
|
|
|
|
+ fonts-ipafont-gothic \
|
|
|
|
|
+ fonts-wqy-zenhei \
|
|
|
|
|
+ fonts-thai-tlwg \
|
|
|
|
|
+ fonts-kacst \
|
|
|
|
|
+ fonts-symbola \
|
|
|
|
|
+ fonts-noto \
|
|
|
|
|
+ fonts-freefont-ttf \
|
|
|
|
|
+ && rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
-WORKDIR /home/pptruser/app
|
|
|
|
|
|
|
+# Add user so we don't need --no-sandbox to run chrome
|
|
|
|
|
+RUN groupadd -r archivebox && useradd -r -g archivebox -G audio,video archivebox \
|
|
|
|
|
+ && mkdir -p /home/archivebox/Downloads \
|
|
|
|
|
+ && chown -R archivebox:archivebox /home/archivebox
|
|
|
|
|
|
|
|
-RUN python3 -m pip install --upgrade pip setuptools && python3 -m pip install virtualenv \
|
|
|
|
|
- && python3 -m virtualenv ".docker-venv"
|
|
|
|
|
-ENV PATH="/home/pttruser/app/.docker-venv/bin:${PATH}"
|
|
|
|
|
-COPY ./Pipfile.lock "/home/pttruser/app/Pipfile.lock"
|
|
|
|
|
-RUN jq -r \
|
|
|
|
|
- '.default,.develop | to_entries[] | .key + .value.version' \
|
|
|
|
|
- "/home/pttruser/app/Pipfile.lock" \
|
|
|
|
|
- | /home/pttruser/app/.docker-venv/bin/python -m pip install --no-cache-dir -r /dev/stdin \
|
|
|
|
|
- && rm "/home/pttruser/app/Pipfile.lock"
|
|
|
|
|
|
|
+WORKDIR "$CODE_PATH"
|
|
|
|
|
+ADD . "$CODE_PATH"
|
|
|
|
|
+VOLUME "$CODE_PATH"
|
|
|
|
|
+RUN chown -R archivebox:archivebox "$CODE_PATH"
|
|
|
|
|
|
|
|
-# Install the ArchiveBox repository and pip requirements
|
|
|
|
|
-# RUN git clone https://github.com/pirate/ArchiveBox /home/pptruser/app \
|
|
|
|
|
-ADD . /home/pptruser/app
|
|
|
|
|
-RUN mkdir -p /data \
|
|
|
|
|
- && chown -R pptruser:pptruser /data \
|
|
|
|
|
- && ln -s /data /home/pptruser/app/archivebox/output \
|
|
|
|
|
- && ln -s /home/pptruser/app/bin/* /bin/ \
|
|
|
|
|
- && ln -s /home/pptruser/app/bin/archivebox /bin/archive \
|
|
|
|
|
- && chown -R pptruser:pptruser /home/pptruser/app/archivebox
|
|
|
|
|
|
|
+ENV PATH="$VENV_PATH/bin:${PATH}"
|
|
|
|
|
+RUN python --version \
|
|
|
|
|
+ && python -m venv "$VENV_PATH" \
|
|
|
|
|
+ && pip install --upgrade pip \
|
|
|
|
|
+ && pip install -e . \
|
|
|
|
|
+ && chown -R archivebox:archivebox "$VENV_PATH"
|
|
|
|
|
|
|
|
-VOLUME /data
|
|
|
|
|
-EXPOSE 8000
|
|
|
|
|
|
|
+WORKDIR "$DATA_PATH"
|
|
|
|
|
+VOLUME "$DATA_PATH"
|
|
|
|
|
+RUN chown -R archivebox:archivebox "$DATA_PATH"
|
|
|
|
|
|
|
|
-ENV LANG=C.UTF-8 \
|
|
|
|
|
- LANGUAGE=en_US:en \
|
|
|
|
|
- LC_ALL=C.UTF-8 \
|
|
|
|
|
- PYTHONIOENCODING=UTF-8 \
|
|
|
|
|
|
|
+# Run everything from here on out as non-privileged user
|
|
|
|
|
+USER archivebox
|
|
|
|
|
+ENV CHROME_BINARY=google-chrome \
|
|
|
CHROME_SANDBOX=False \
|
|
CHROME_SANDBOX=False \
|
|
|
- CHROME_BINARY=google-chrome-beta \
|
|
|
|
|
- OUTPUT_DIR=/data
|
|
|
|
|
|
|
+ OUTPUT_DIR="$DATA_PATH"
|
|
|
|
|
|
|
|
-# Run everything from here on out as non-privileged user
|
|
|
|
|
-USER pptruser
|
|
|
|
|
-WORKDIR /home/pptruser/app
|
|
|
|
|
|
|
+RUN archivebox version
|
|
|
|
|
|
|
|
ENTRYPOINT ["dumb-init", "--"]
|
|
ENTRYPOINT ["dumb-init", "--"]
|
|
|
-CMD ["/bin/archivebox"]
|
|
|
|
|
|
|
+CMD ["archivebox"]
|