소스 검색

use yt-dlp for media archiving instead of youtube-dl

Nick Sweeting 3 년 전
부모
커밋
57df65f28f
3개의 변경된 파일6개의 추가작업 그리고 3개의 파일을 삭제
  1. 3 2
      Dockerfile
  2. 2 1
      archivebox/config.py
  3. 1 0
      archivebox/extractors/media.py

+ 3 - 2
Dockerfile

@@ -81,7 +81,8 @@ RUN apt-get update -qq \
         build-essential python-dev python3-dev \
     && echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
     && python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
-    && pip install --quiet -r /tmp/requirements.txt \
+    && pip install -r /tmp/requirements.txt \
+    && pip install --upgrade youtube-dl yt-dlp \
     && apt-get purge -y build-essential python-dev python3-dev \
     && apt-get autoremove -y \
     && rm -rf /var/lib/apt/lists/*
@@ -104,7 +105,7 @@ RUN pip install -e .
 WORKDIR "$DATA_DIR"
 ENV IN_DOCKER=True \
     CHROME_SANDBOX=False \
-    CHROME_BINARY="chromium" \
+    CHROME_BINARY="/usr/bin/chromium-browser" \
     USE_SINGLEFILE=True \
     SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
     USE_READABILITY=True \

+ 2 - 1
archivebox/config.py

@@ -142,6 +142,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                 '--yes-playlist',
                                                                 '--continue',
                                                                 '--ignore-errors',
+                                                                '--no-abort-on-error',
                                                                 '--geo-bypass',
                                                                 '--add-metadata',
                                                                 '--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
@@ -929,7 +930,7 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
         'TIMEOUT': config['TIMEOUT'],
         'RESOLUTION': config['RESOLUTION'],
         'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'],
-        'CHROME_BINARY': config['CHROME_BINARY'],
+        'CHROME_BINARY': bin_path(config['CHROME_BINARY']),
         'CHROME_HEADLESS': config['CHROME_HEADLESS'],
         'CHROME_SANDBOX': config['CHROME_SANDBOX'],
         'CHROME_USER_AGENT': config['CHROME_USER_AGENT'],

+ 1 - 0
archivebox/extractors/media.py

@@ -43,6 +43,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
         YOUTUBEDL_BINARY,
         *YOUTUBEDL_ARGS,
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
+        # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
         link.url,
     ]
     status = 'succeeded'