2
0
Эх сурвалжийг харах

feat: Add support for singlefile in docker

Cristian 5 жил өмнө
parent
commit
06d0e9de6c

+ 29 - 16
Dockerfile

@@ -10,8 +10,8 @@
 FROM python:3.8-slim-buster
 
 LABEL name="archivebox" \
-      maintainer="Nick Sweeting <[email protected]>" \
-      description="All-in-one personal internet archiving container"
+    maintainer="Nick Sweeting <[email protected]>" \
+    description="All-in-one personal internet archiving container"
 
 ENV TZ=UTC \
     LANGUAGE=en_US:en \
@@ -22,28 +22,41 @@ ENV TZ=UTC \
     APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
     CODE_PATH=/app \
     VENV_PATH=/venv \
-    DATA_PATH=/data
+    DATA_PATH=/data \
+    EXTRA_PATH=/extra
 
-# First install CLI utils and base deps, then Chrome + Fons
+# First install CLI utils and base deps, then Chrome + Fons + nodejs
 RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
     && apt-get update -qq \
     && apt-get install -qq -y --no-install-recommends \
-       apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
-       dumb-init jq git wget curl youtube-dl ffmpeg \
+    apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
+    dumb-init jq git wget curl youtube-dl ffmpeg \
     && curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
     && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
+    && curl -sL https://deb.nodesource.com/setup_14.x | bash - \
     && apt-get update -qq \
     && apt-get install -qq -y --no-install-recommends \
-       google-chrome-stable \
-       fontconfig \
-       fonts-ipafont-gothic \
-       fonts-wqy-zenhei \
-       fonts-thai-tlwg \
-       fonts-kacst \
-       fonts-symbola \
-       fonts-noto \
-       fonts-freefont-ttf \
-    && rm -rf /var/lib/apt/lists/*
+    google-chrome-stable \
+    fontconfig \
+    fonts-ipafont-gothic \
+    fonts-wqy-zenhei \
+    fonts-thai-tlwg \
+    fonts-kacst \
+    fonts-symbola \
+    fonts-noto \
+    fonts-freefont-ttf \
+    nodejs \
+    unzip \
+    && rm -rf /var/lib/apt/lists/* 
+
+# Clone singlefile and move it to the /bin folder so archivebox can find it
+
+WORKDIR "$EXTRA_PATH"
+RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
+    && unzip -q SingleFile.zip \
+    && npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
+    && chmod +x SingleFile-master/cli/single-file \
+    && ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file" 
 
 # Run everything from here on out as non-privileged user
 RUN groupadd --system archivebox \

+ 5 - 0
archivebox/extractors/singlefile.py

@@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors'
 from pathlib import Path
 
 from typing import Optional
+import json
 
 from ..index.schema import Link, ArchiveResult, ArchiveError
 from ..system import run, chmod_file
 from ..util import (
     enforce_types,
+    chrome_args
 )
 from ..config import (
     TIMEOUT,
@@ -34,10 +36,13 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
     out_dir = out_dir or link.link_dir
     output = str(Path(out_dir).absolute() / "singlefile.html")
 
+    browser_args = chrome_args(TIMEOUT=0)
+
     # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
     cmd = [
         SINGLEFILE_BINARY,
         '--browser-executable-path={}'.format(CHROME_BINARY),
+        '--browser-args="{}"'.format(json.dumps(browser_args[1:])),
         link.url,
         output
     ]

+ 1 - 0
archivebox/logging_util.py

@@ -518,6 +518,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
 
 @enforce_types
 def printable_dependency_version(name: str, dependency: Dict) -> str:
+    version = None
     if dependency['enabled']:
         if dependency['is_valid']:
             color, symbol, note, version = 'green', '√', 'valid', ''