Dockerfile 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
  2. # python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
  3. # Usage:
  4. # docker build . -t archivebox --no-cache
  5. # docker run -v "$PWD/data":/data archivebox init
  6. # docker run -v "$PWD/data":/data archivebox add 'https://example.com'
  7. # docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
  8. # docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
  9. FROM python:3.8-slim-buster
  10. LABEL name="archivebox" \
  11. maintainer="Nick Sweeting <[email protected]>" \
  12. description="All-in-one personal internet archiving container" \
  13. homepage="https://github.com/pirate/ArchiveBox" \
  14. documentation="https://github.com/pirate/ArchiveBox/wiki/Docker#docker"
  15. # System-level base config
  16. ENV TZ=UTC \
  17. LANGUAGE=en_US:en \
  18. LC_ALL=C.UTF-8 \
  19. LANG=C.UTF-8 \
  20. PYTHONIOENCODING=UTF-8 \
  21. PYTHONUNBUFFERED=1 \
  22. DEBIAN_FRONTEND=noninteractive \
  23. APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
  24. # Application-level base config
  25. ENV CODE_DIR=/app \
  26. VENV_PATH=/venv \
  27. DATA_DIR=/data \
  28. NODE_DIR=/node \
  29. ARCHIVEBOX_USER="archivebox"
  30. # Create non-privileged user for archivebox and chrome
  31. RUN groupadd --system $ARCHIVEBOX_USER \
  32. && useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
  33. # Install system dependencies
  34. RUN apt-get update -qq \
  35. && apt-get install -qq -y --no-install-recommends \
  36. apt-transport-https ca-certificates gnupg2 zlib1g-dev \
  37. dumb-init gosu unzip curl \
  38. && rm -rf /var/lib/apt/lists/*
  39. # Install apt dependencies
  40. RUN apt-get update -qq \
  41. && apt-get install -qq -y --no-install-recommends \
  42. wget curl chromium git ffmpeg youtube-dl \
  43. fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
  44. && rm -rf /var/lib/apt/lists/*
  45. # Install Node environment
  46. RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
  47. && echo 'deb https://deb.nodesource.com/node_14.x buster main' >> /etc/apt/sources.list \
  48. && apt-get update -qq \
  49. && apt-get install -qq -y --no-install-recommends \
  50. nodejs \
  51. && rm -rf /var/lib/apt/lists/*
  52. # Install Python dependencies
  53. WORKDIR "$CODE_DIR"
  54. ENV PATH="${PATH}:$VENV_PATH/bin"
  55. RUN python -m venv --clear --symlinks "$VENV_PATH" \
  56. && pip install --upgrade --quiet pip setuptools
  57. ADD ./archivebox.egg-info/requires.txt "$CODE_DIR/archivebox.egg-info/requires.txt"
  58. RUN apt-get update -qq \
  59. && apt-get install -qq -y --no-install-recommends \
  60. build-essential python-dev python3-dev \
  61. && grep -B 1000 -E '^$' "$CODE_DIR/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
  62. && apt-get purge -y build-essential python-dev python3-dev \
  63. && apt-get autoremove -y \
  64. && rm -rf /var/lib/apt/lists/*
  65. # Install Node dependencies
  66. WORKDIR "$NODE_DIR"
  67. ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
  68. npm_config_loglevel=error
  69. RUN npm install -g npm
  70. ADD ./package.json ./package.json
  71. RUN npm install
  72. # Install ArchiveBox Python package
  73. WORKDIR "$CODE_DIR"
  74. ADD . "$CODE_DIR"
  75. RUN pip install -e .
  76. # Setup ArchiveBox runtime config
  77. WORKDIR "$DATA_DIR"
  78. ENV IN_DOCKER=True \
  79. CHROME_SANDBOX=False \
  80. CHROME_BINARY="chromium" \
  81. SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
  82. READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor"
  83. # Print version for nice docker finish summary
  84. RUN archivebox version
  85. # Open up the interfaces to the outside world
  86. VOLUME "$DATA_DIR"
  87. VOLUME "$CODE_DIR"
  88. EXPOSE 8000
  89. ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
  90. CMD ["archivebox", "server", "0.0.0.0:8000"]