docker_entrypoint.sh 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. #!/bin/bash
  2. # This Docker ENTRYPOINT script is called by `docker run archivebox ...` or `docker compose run archivebox ...`.
  3. # It takes a CMD as $* shell arguments and runs it following these setup steps:
  4. # - Set the archivebox user to use the correct PUID & PGID
  5. # 1. highest precedence is for valid PUID and PGID env vars passsed in explicitly
  6. # 2. fall back to DETECTED_PUID of files found within existing data dir
  7. # 3. fall back to DEFAULT_PUID if no data dir or its owned by root
  8. # - Create a new /data dir if necessary and set the correct ownership on it
  9. # - Create a new /browsers dir if necessary and set the correct ownership on it
  10. # - Check whether we're running inside QEMU emulation and show a warning if so.
  11. # - Check that enough free space is available on / and /data
  12. # - Drop down to archivebox user permisisons and execute passed CMD command.
  13. # Bash Environment Setup
  14. # http://redsymbol.net/articles/unofficial-bash-strict-mode/
  15. # https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
  16. # set -o xtrace
  17. # set -o nounset
  18. shopt -s nullglob
  19. set -o errexit
  20. set -o errtrace
  21. set -o pipefail
  22. # IFS=$'\n'
  23. # Load global invariants (set by Dockerfile during image build time, not intended to be customized by users at runtime)
  24. export DATA_DIR="${DATA_DIR:-/data}"
  25. export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
  26. # Global default PUID and PGID if data dir is empty and no intended PUID+PGID is set manually by user
  27. export DEFAULT_PUID=911
  28. export DEFAULT_PGID=911
  29. # If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
  30. if [[ "$PUID" == "0" ]]; then
  31. echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
  32. echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
  33. echo -e " leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
  34. echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
  35. exit 3
  36. fi
  37. # If data directory already exists, autodetect detect owner by looking at files within
  38. export DETECTED_PUID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
  39. export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
  40. # If data directory exists but is owned by root, use defaults instead of root because root is not allowed
  41. [[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
  42. # (GUID / DETECTED_GUID is allowed to be 0 though)
  43. # Set archivebox user and group ids to desired PUID/PGID
  44. usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
  45. groupmod -o -g "${PGID:-$DETECTED_PGID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
  46. # re-set PUID and PGID to values reported by system instead of values we tried to set,
  47. # in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
  48. export PUID="$(id -u archivebox)"
  49. export PGID="$(id -g archivebox)"
  50. # Check if user attempted to run it in the root of their home folder or hard drive (common mistake)
  51. if [[ -d "$DATA_DIR/Documents" || -d "$DATA_DIR/.config" || -d "$DATA_DIR/usr" || -f "$DATA_DIR/.bashrc" || -f "$DATA_DIR/.zshrc" ]]; then
  52. echo -e "\n[X] ERROR: ArchiveBox was run from inside a home folder"
  53. echo -e " Make sure you are inside an existing collection directory or a new empty directory and try again"
  54. exit 3
  55. fi
  56. # Check the permissions of the data dir (or create if it doesn't exist)
  57. if [[ -d "$DATA_DIR/archive" ]]; then
  58. if touch "$DATA_DIR/archive/.permissions_test_safe_to_delete" 2>/dev/null; then
  59. # It's fine, we are able to write to the data directory (as root inside the container)
  60. rm -f "$DATA_DIR/archive/.permissions_test_safe_to_delete"
  61. # echo "[√] Permissions are correct"
  62. else
  63. # the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
  64. echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." > /dev/stderr
  65. echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
  66. echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
  67. echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
  68. echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
  69. echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
  70. echo -e " leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
  71. echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
  72. exit 3
  73. fi
  74. else
  75. # create data directory (and logs, since its the first dir ArchiveBox needs to write to)
  76. mkdir -p "$DATA_DIR/logs"
  77. fi
  78. # check if novnc x11 $DISPLAY is available
  79. export DISPLAY="${DISPLAY:-"novnc:0.0"}"
  80. if ! xdpyinfo > /dev/null 2>&1; then
  81. # cant connect to x11 display, unset it so that chrome doesn't try to connect to it and hang indefinitely
  82. unset DISPLAY
  83. fi
  84. # force set the ownership of the data dir contents to the archivebox user and group
  85. # this is needed because Docker Desktop often does not map user permissions from the host properly
  86. chown $PUID:$PGID "$DATA_DIR"
  87. if ! chown $PUID:$PGID "$DATA_DIR"/* > /dev/null 2>&1; then
  88. # users may store the ./data/archive folder on a network mount that prevents chmod/chown
  89. # fallback to chowning everything else in ./data and leaving ./data/archive alone
  90. find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
  91. find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
  92. fi
  93. # also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to 'playwright install chromium' at runtime
  94. export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
  95. mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
  96. rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
  97. chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
  98. if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then
  99. chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
  100. chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.*
  101. chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
  102. fi
  103. # also create and chown tmp dir and lib dir (and their default equivalents inside data/)
  104. # mkdir -p "$DATA_DIR"/lib/bin
  105. # chown $PUID:$PGID "$DATA_DIR"/lib "$DATA_DIR"/lib/*
  106. chown $PUID:$PGID "$LIB_DIR" 2>/dev/null
  107. chown $PUID:$PGID "$LIB_DIR/*" 2>/dev/null &
  108. # mkdir -p "$DATA_DIR"/tmp/workers
  109. # chown $PUID:$PGID "$DATA_DIR"/tmp "$DATA_DIR"/tmp/*
  110. chown $PUID:$PGID "$TMP_DIR" 2>/dev/null
  111. chown $PUID:$PGID "$TMP_DIR/*" 2>/dev/null &
  112. # (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
  113. export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
  114. if [[ "$IN_QEMU" == "True" ]]; then
  115. echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr
  116. echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." > /dev/stderr
  117. echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
  118. fi
  119. # check disk space free on /, /data, and /data/archive, warn on <500Mb free, error on <100Mb free
  120. export ROOT_USAGE="$(df --output=pcent,avail / | tail -n 1 | xargs)"
  121. export ROOT_USED_PCT="${ROOT_USAGE%%%*}"
  122. export ROOT_AVAIL_KB="$(echo "$ROOT_USAGE" | awk '{print $2}')"
  123. if [[ "$ROOT_AVAIL_KB" -lt 100000 ]]; then
  124. echo -e "\n[!] Warning: Docker root filesystem is completely out of space! (${ROOT_USED_PCT}% used on /)" > /dev/stderr
  125. echo -e " you need to free up at least 100Mb in your Docker VM to continue:" > /dev/stderr
  126. echo -e " \$ docker system prune\n" > /dev/stderr
  127. df -kh / > /dev/stderr
  128. exit 3
  129. elif [[ "$ROOT_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
  130. echo -e "\n[!] Warning: Docker root filesystem is running out of space! (${ROOT_USED_PCT}% used on /)" > /dev/stderr
  131. echo -e " you may need to free up space in your Docker VM soon:" > /dev/stderr
  132. echo -e " \$ docker system prune\n" > /dev/stderr
  133. df -kh / > /dev/stderr
  134. fi
  135. export DATA_USAGE="$(df --output=pcent,avail "$DATA_DIR" | tail -n 1 | xargs)"
  136. export DATA_USED_PCT="${DATA_USAGE%%%*}"
  137. export DATA_AVAIL_KB="$(echo "$DATA_USAGE" | awk '{print $2}')"
  138. if [[ "$DATA_AVAIL_KB" -lt 100000 ]]; then
  139. echo -e "\n[!] Warning: Docker data volume is completely out of space! (${DATA_USED_PCT}% used on $DATA_DIR)" > /dev/stderr
  140. echo -e " you need to free up at least 100Mb on the drive holding your data directory" > /dev/stderr
  141. echo -e " \$ ncdu -x data\n" > /dev/stderr
  142. df -kh "$DATA_DIR" > /dev/stderr
  143. sleep 5
  144. elif [[ "$DATA_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
  145. echo -e "\n[!] Warning: Docker data volume is running out of space! (${DATA_USED_PCT}% used on $DATA_DIR)" > /dev/stderr
  146. echo -e " you may need to free up space on the drive holding your data directory soon" > /dev/stderr
  147. echo -e " \$ ncdu -x data\n" > /dev/stderr
  148. df -kh "$DATA_DIR" > /dev/stderr
  149. else
  150. # data/ has space available, but check data/archive separately, because it might be on a network mount or external drive
  151. if [[ -d "$DATA_DIR/archive" ]]; then
  152. export ARCHIVE_USAGE="$(df --output=pcent,avail "$DATA_DIR/archive" | tail -n 1 | xargs)"
  153. export ARCHIVE_USED_PCT="${ARCHIVE_USAGE%%%*}"
  154. export ARCHIVE_AVAIL_KB="$(echo "$ARCHIVE_USAGE" | awk '{print $2}')"
  155. if [[ "$ARCHIVE_AVAIL_KB" -lt 100000 ]]; then
  156. echo -e "\n[!] Warning: data/archive folder is completely out of space! (${ARCHIVE_USED_PCT}% used on $DATA_DIR/archive)" > /dev/stderr
  157. echo -e " you need to free up at least 100Mb on the drive holding your data/archive directory" > /dev/stderr
  158. echo -e " \$ ncdu -x data/archive\n" > /dev/stderr
  159. df -kh "$DATA_DIR/archive" > /dev/stderr
  160. sleep 5
  161. elif [[ "$ARCHIVE_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
  162. echo -e "\n[!] Warning: data/archive folder is running out of space! (${ARCHIVE_USED_PCT}% used on $DATA_DIR/archive)" > /dev/stderr
  163. echo -e " you may need to free up space on the drive holding your data/archive directory soon" > /dev/stderr
  164. echo -e " \$ ncdu -x data/archive\n" > /dev/stderr
  165. df -kh "$DATA_DIR/archive" > /dev/stderr
  166. fi
  167. fi
  168. fi
  169. # symlink etc crontabs into place
  170. mkdir -p "$DATA_DIR"/crontabs
  171. if ! test -L /var/spool/cron/crontabs; then
  172. # move files from old location into new data dir location
  173. for existing_file in /var/spool/cron/crontabs/*; do
  174. mv "$existing_file" "$DATA_DIR/crontabs/"
  175. done
  176. # replace old system path with symlink to data dir location
  177. rm -Rf /var/spool/cron/crontabs
  178. ln -sf "$DATA_DIR/crontabs" /var/spool/cron/crontabs
  179. fi
  180. chown -R $PUID "$DATA_DIR"/crontabs
  181. # set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS
  182. # (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger)
  183. # service dbus start >/dev/null 2>&1 &
  184. # export $(dbus-launch --close-stderr)
  185. export ARCHIVEBOX_BIN_PATH="$(which archivebox)"
  186. # Drop permissions to run commands as the archivebox user
  187. if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "whoami" || "$1" == "archivebox" ]]; then
  188. # handle "docker run archivebox /bin/somecommand --with=some args" by passing args directly to bash -c
  189. # e.g. "docker run archivebox archivebox init:
  190. # "docker run archivebox /venv/bin/ipython3"
  191. # "docker run archivebox /bin/bash -c '...'"
  192. # "docker run archivebox cat /VERSION.txt"
  193. exec gosu "$PUID" /bin/bash -c "exec $(printf ' %q' "$@")"
  194. # printf requotes shell parameters properly https://stackoverflow.com/a/39463371/2156113
  195. # gosu spawns an ephemeral bash process owned by archivebox user (bash wrapper is needed to load env vars, PATH, and setup terminal TTY)
  196. # outermost exec hands over current process ID to inner bash process, inner exec hands over inner bash PID to user's command
  197. else
  198. # handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
  199. # e.g. "docker run archivebox help"
  200. # "docker run archivebox add --depth=1 https://example.com"
  201. # "docker run archivebox manage createsupseruser"
  202. # "docker run archivebox server 0.0.0.0:8000"
  203. exec gosu "$PUID" "$ARCHIVEBOX_BIN_PATH" "$@"
  204. fi