Browse Source

feat: Add config for youtubedl (YOUTUBEDL_ARGS)

Cristian 5 years ago
parent
commit
bc02e0ffe3
3 changed files with 24 additions and 22 deletions
  1. 20 3
      archivebox/config/__init__.py
  2. 2 1
      archivebox/config/stubs.py
  3. 2 18
      archivebox/extractors/media.py

+ 20 - 3
archivebox/config/__init__.py

@@ -89,7 +89,6 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'SAVE_WARC':                {'type': bool,  'default': True, 'aliases': ('FETCH_WARC',)},
         'SAVE_GIT':                 {'type': bool,  'default': True, 'aliases': ('FETCH_GIT',)},
         'SAVE_MEDIA':               {'type': bool,  'default': True, 'aliases': ('FETCH_MEDIA',)},
-        'SAVE_PLAYLISTS':           {'type': bool,  'default': True, 'aliases': ('FETCH_PLAYLISTS',)},
         'SAVE_ARCHIVE_DOT_ORG':     {'type': bool,  'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
     },
 
@@ -107,6 +106,21 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
 
         'CHROME_HEADLESS':          {'type': bool,  'default': True},
         'CHROME_SANDBOX':           {'type': bool,  'default': lambda c: not c['IN_DOCKER']},
+        'YOUTUBEDL_ARGS':           {'type': list,  'default': ['--write-description',
+                                                                '--write-info-json',
+                                                                '--write-annotations',
+                                                                '--write-thumbnail',
+                                                                '--no-call-home',
+                                                                '--user-agent',
+                                                                '--all-subs',
+                                                                '--extract-audio',
+                                                                '--keep-video',
+                                                                '--ignore-errors',
+                                                                '--geo-bypass',
+                                                                '--audio-format', 'mp3',
+                                                                '--audio-quality', '320K',
+                                                                '--embed-thumbnail',
+                                                                '--add-metadata']}
     },
 
     'DEPENDENCY_CONFIG': {
@@ -279,7 +293,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'USE_YOUTUBEDL':            {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
     'YOUTUBEDL_VERSION':        {'default': lambda c: bin_version(c['YOUTUBEDL_BINARY']) if c['USE_YOUTUBEDL'] else None},
     'SAVE_MEDIA':               {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
-    'SAVE_PLAYLISTS':           {'default': lambda c: c['SAVE_PLAYLISTS'] and c['SAVE_MEDIA']},
+    'YOUTUBEDL_ARGS':           {'default': lambda c: c['YOUTUBEDL_ARGS'] or []},
 
     'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
     'CHROME_BINARY':            {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
@@ -349,7 +363,10 @@ def load_config_val(key: str,
             raise ValueError(f'Invalid configuration option {key}={val} (expected an integer)')
         return int(val)
 
-    raise Exception('Config values can only be str, bool, or int')
+    elif type is list:
+        return val.split(" ")
+
+    raise Exception('Config values can only be str, bool, int or list')
 
 
 def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:

+ 2 - 1
archivebox/config/stubs.py

@@ -64,7 +64,6 @@ class ConfigDict(BaseConfig, total=False):
     SAVE_WARC: bool
     SAVE_GIT: bool
     SAVE_MEDIA: bool
-    SAVE_PLAYLISTS: bool
     SAVE_ARCHIVE_DOT_ORG: bool
 
     RESOLUTION: str
@@ -95,6 +94,8 @@ class ConfigDict(BaseConfig, total=False):
     YOUTUBEDL_BINARY: str
     CHROME_BINARY: Optional[str]
 
+    YOUTUBEDL_ARGS: Optional[str]
+
 
 ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
 ConfigDefaultValue = Union[ConfigValue, ConfigDefaultValueGetter]

+ 2 - 18
archivebox/extractors/media.py

@@ -12,7 +12,7 @@ from ..util import (
 from ..config import (
     MEDIA_TIMEOUT,
     SAVE_MEDIA,
-    SAVE_PLAYLISTS,
+    YOUTUBEDL_ARGS,
     YOUTUBEDL_BINARY,
     YOUTUBEDL_VERSION,
     CHECK_SSL_VALIDITY
@@ -42,23 +42,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
     output_path.mkdir(exist_ok=True)
     cmd = [
         YOUTUBEDL_BINARY,
-        '--write-description',
-        '--write-info-json',
-        '--write-annotations',
-        '--write-thumbnail',
-        '--no-call-home',
-        '--no-check-certificate',
-        '--user-agent',
-        '--all-subs',
-        '--extract-audio',
-        '--keep-video',
-        '--ignore-errors',
-        '--geo-bypass',
-        '--audio-format', 'mp3',
-        '--audio-quality', '320K',
-        '--embed-thumbnail',
-        '--add-metadata',
-        *(['--yes-playlist'] if SAVE_PLAYLISTS else []),
+        *YOUTUBEDL_ARGS,
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
         link.url,
     ]