Dockerfile 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
  2. # python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
  3. # Usage:
  4. # docker build . -t archivebox --no-cache
  5. # docker run -v "$PWD/data":/data archivebox init
  6. # docker run -v "$PWD/data":/data archivebox add 'https://example.com'
  7. # docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
  8. # docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
  9. FROM python:3.9-slim-buster
  10. LABEL name="archivebox" \
  11. maintainer="Nick Sweeting <[email protected]>" \
  12. description="All-in-one personal internet archiving container" \
  13. homepage="https://github.com/ArchiveBox/ArchiveBox" \
  14. documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
  15. # System-level base config
  16. ENV TZ=UTC \
  17. LANGUAGE=en_US:en \
  18. LC_ALL=C.UTF-8 \
  19. LANG=C.UTF-8 \
  20. PYTHONIOENCODING=UTF-8 \
  21. PYTHONUNBUFFERED=1 \
  22. DEBIAN_FRONTEND=noninteractive \
  23. APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
  24. # Application-level base config
  25. ENV CODE_DIR=/app \
  26. VENV_PATH=/venv \
  27. DATA_DIR=/data \
  28. NODE_DIR=/node \
  29. ARCHIVEBOX_USER="archivebox"
  30. # Create non-privileged user for archivebox and chrome
  31. RUN groupadd --system $ARCHIVEBOX_USER \
  32. && useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
  33. # Install system dependencies
  34. RUN apt-get update -qq \
  35. && apt-get install -qq -y --no-install-recommends \
  36. apt-transport-https ca-certificates gnupg2 zlib1g-dev \
  37. dumb-init gosu cron unzip curl \
  38. && rm -rf /var/lib/apt/lists/*
  39. # Install apt dependencies
  40. RUN apt-get update -qq \
  41. && apt-get install -qq -y --no-install-recommends \
  42. wget curl chromium git ffmpeg youtube-dl ripgrep \
  43. fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
  44. && rm -rf /var/lib/apt/lists/*
  45. # Install apt development dependencies
  46. # RUN apt-get install -qq \
  47. # && apt-get install -qq -y --no-install-recommends \
  48. # python3 python3-dev python3-pip python3-venv python3-all \
  49. # dh-python debhelper devscripts dput software-properties-common \
  50. # python3-distutils python3-setuptools python3-wheel python3-stdeb
  51. # Install Node environment
  52. RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
  53. && echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
  54. && apt-get update -qq \
  55. && apt-get install -qq -y --no-install-recommends \
  56. nodejs \
  57. # && npm install -g npm \
  58. && rm -rf /var/lib/apt/lists/*
  59. # Install Node dependencies
  60. WORKDIR "$NODE_DIR"
  61. ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
  62. npm_config_loglevel=error
  63. ADD ./package.json ./package.json
  64. ADD ./package-lock.json ./package-lock.json
  65. RUN npm ci
  66. # Install Python dependencies
  67. WORKDIR "$CODE_DIR"
  68. ENV PATH="${PATH}:$VENV_PATH/bin"
  69. RUN python -m venv --clear --symlinks "$VENV_PATH" \
  70. && pip install --upgrade --quiet pip setuptools
  71. ADD "./setup.py" "$CODE_DIR/"
  72. ADD "./README.md" "./package.json" "$CODE_DIR/archivebox/"
  73. RUN apt-get update -qq \
  74. && apt-get install -qq -y --no-install-recommends \
  75. build-essential python-dev python3-dev \
  76. && python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
  77. && pip install --quiet -r /tmp/requirements.txt \
  78. && apt-get purge -y build-essential python-dev python3-dev \
  79. && apt-get autoremove -y \
  80. && rm -rf /var/lib/apt/lists/*
  81. # Install ArchiveBox Python package and its dependencies
  82. WORKDIR "$CODE_DIR"
  83. ADD . "$CODE_DIR"
  84. RUN pip install -e .
  85. # Setup ArchiveBox runtime config
  86. WORKDIR "$DATA_DIR"
  87. ENV IN_DOCKER=True \
  88. CHROME_SANDBOX=False \
  89. CHROME_BINARY="chromium" \
  90. USE_SINGLEFILE=True \
  91. SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
  92. USE_READABILITY=True \
  93. READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
  94. USE_MERCURY=True \
  95. MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
  96. # Print version for nice docker finish summary
  97. # RUN archivebox version
  98. RUN /app/bin/docker_entrypoint.sh archivebox version
  99. # Open up the interfaces to the outside world
  100. VOLUME "$DATA_DIR"
  101. EXPOSE 8000
  102. ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
  103. CMD ["archivebox", "server", "0.0.0.0:8000"]