Dockerfile 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. # This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
  2. # python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
  3. # Usage:
  4. # docker build . -t archivebox --no-cache
  5. # docker run -v "$PWD/data":/data archivebox init
  6. # docker run -v "$PWD/data":/data archivebox add 'https://example.com'
  7. # docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
  8. # docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
  9. FROM python:3.9-slim-buster
  10. LABEL name="archivebox" \
  11. maintainer="Nick Sweeting <[email protected]>" \
  12. description="All-in-one personal internet archiving container" \
  13. homepage="https://github.com/ArchiveBox/ArchiveBox" \
  14. documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
  15. # System-level base config
  16. ENV TZ=UTC \
  17. LANGUAGE=en_US:en \
  18. LC_ALL=C.UTF-8 \
  19. LANG=C.UTF-8 \
  20. PYTHONIOENCODING=UTF-8 \
  21. PYTHONUNBUFFERED=1 \
  22. DEBIAN_FRONTEND=noninteractive \
  23. APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
  24. # Application-level base config
  25. ENV CODE_DIR=/app \
  26. VENV_PATH=/venv \
  27. DATA_DIR=/data \
  28. NODE_DIR=/node \
  29. ARCHIVEBOX_USER="archivebox"
  30. # Create non-privileged user for archivebox and chrome
  31. RUN groupadd --system $ARCHIVEBOX_USER \
  32. && useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
  33. # Install system dependencies
  34. RUN apt-get update -qq \
  35. && apt-get install -qq -y --no-install-recommends \
  36. apt-transport-https ca-certificates gnupg2 zlib1g-dev \
  37. dumb-init gosu cron unzip curl \
  38. && rm -rf /var/lib/apt/lists/*
  39. # Install apt dependencies
  40. RUN apt-get update -qq \
  41. && apt-get install -qq -y --no-install-recommends \
  42. wget curl chromium git ffmpeg youtube-dl ripgrep \
  43. fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
  44. && rm -rf /var/lib/apt/lists/*
  45. # Install apt development dependencies
  46. # RUN apt-get install -qq \
  47. # && apt-get install -qq -y --no-install-recommends \
  48. # python3 python3-dev python3-pip python3-venv python3-all \
  49. # dh-python debhelper devscripts dput software-properties-common \
  50. # python3-distutils python3-setuptools python3-wheel python3-stdeb
  51. # Install Node environment
  52. RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
  53. && echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
  54. && apt-get update -qq \
  55. && apt-get install -qq -y --no-install-recommends \
  56. nodejs \
  57. && rm -rf /var/lib/apt/lists/*
  58. # Install Node dependencies
  59. WORKDIR "$NODE_DIR"
  60. ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
  61. npm_config_loglevel=error
  62. ADD ./package.json ./package.json
  63. ADD ./package-lock.json ./package-lock.json
  64. RUN npm ci
  65. # Install Python dependencies
  66. WORKDIR "$CODE_DIR"
  67. ENV PATH="${PATH}:$VENV_PATH/bin"
  68. RUN python -m venv --clear --symlinks "$VENV_PATH" \
  69. && pip install --upgrade --quiet pip setuptools
  70. ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt"
  71. RUN apt-get update -qq \
  72. && apt-get install -qq -y --no-install-recommends \
  73. build-essential python-dev python3-dev \
  74. && grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
  75. && pip install --quiet "sonic-client==0.0.5" \
  76. && apt-get purge -y build-essential python-dev python3-dev \
  77. && apt-get autoremove -y \
  78. && rm -rf /var/lib/apt/lists/*
  79. # Install ArchiveBox Python package and its dependencies
  80. WORKDIR "$CODE_DIR"
  81. ADD . "$CODE_DIR"
  82. RUN pip install -e .
  83. # Setup ArchiveBox runtime config
  84. WORKDIR "$DATA_DIR"
  85. ENV IN_DOCKER=True \
  86. CHROME_SANDBOX=False \
  87. CHROME_BINARY="chromium" \
  88. USE_SINGLEFILE=True \
  89. SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
  90. USE_READABILITY=True \
  91. READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
  92. USE_MERCURY=True \
  93. MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
  94. # Print version for nice docker finish summary
  95. # RUN archivebox version
  96. RUN /app/bin/docker_entrypoint.sh archivebox version
  97. # Open up the interfaces to the outside world
  98. VOLUME "$DATA_DIR"
  99. EXPOSE 8000
  100. ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
  101. CMD ["archivebox", "server", "0.0.0.0:8000"]