瀏覽代碼

feat: Use CURL_ARGS on title extractor

Cristian 5 年之前
父節點
當前提交
c12fe0e3d7
共有 4 個文件被更改,包括 8 次插入5 次删除
  1. 2 1
      archivebox/config/__init__.py
  2. 2 0
      archivebox/config/stubs.py
  3. 2 1
      archivebox/extractors/git.py
  4. 2 3
      archivebox/extractors/title.py

+ 2 - 1
archivebox/config/__init__.py

@@ -134,7 +134,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'CURL_ARGS':                {'type': list,  'default': ['--silent',
         'CURL_ARGS':                {'type': list,  'default': ['--silent',
                                                                 '--location',
                                                                 '--location',
                                                                 '--compressed'
                                                                 '--compressed'
-                                                               ]}
+                                                               ]},
+        'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
     },
     },
 
 
     'DEPENDENCY_CONFIG': {
     'DEPENDENCY_CONFIG': {

+ 2 - 0
archivebox/config/stubs.py

@@ -96,6 +96,8 @@ class ConfigDict(BaseConfig, total=False):
 
 
     YOUTUBEDL_ARGS: Optional[str]
     YOUTUBEDL_ARGS: Optional[str]
     WGET_ARGS: Optional[str]
     WGET_ARGS: Optional[str]
+    CURL_ARGS: Optional[str]
+    GIT_ARGS: Optional[str]
 
 
 
 
 ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
 ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]

+ 2 - 1
archivebox/extractors/git.py

@@ -18,6 +18,7 @@ from ..config import (
     TIMEOUT,
     TIMEOUT,
     SAVE_GIT,
     SAVE_GIT,
     GIT_BINARY,
     GIT_BINARY,
+    GIT_ARGS,
     GIT_VERSION,
     GIT_VERSION,
     GIT_DOMAINS,
     GIT_DOMAINS,
     CHECK_SSL_VALIDITY
     CHECK_SSL_VALIDITY
@@ -56,7 +57,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
     cmd = [
     cmd = [
         GIT_BINARY,
         GIT_BINARY,
         'clone',
         'clone',
-        '--recursive',
+        *GIT_ARGS,
         *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
         *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
         without_query(without_fragment(link.url)),
         without_query(without_fragment(link.url)),
     ]
     ]

+ 2 - 3
archivebox/extractors/title.py

@@ -16,6 +16,7 @@ from ..config import (
     CHECK_SSL_VALIDITY,
     CHECK_SSL_VALIDITY,
     SAVE_TITLE,
     SAVE_TITLE,
     CURL_BINARY,
     CURL_BINARY,
+    CURL_ARGS,
     CURL_VERSION,
     CURL_VERSION,
     CURL_USER_AGENT,
     CURL_USER_AGENT,
     setup_django,
     setup_django,
@@ -51,10 +52,8 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
     output: ArchiveOutput = None
     output: ArchiveOutput = None
     cmd = [
     cmd = [
         CURL_BINARY,
         CURL_BINARY,
-        '--silent',
+        *CURL_ARGS,
         '--max-time', str(timeout),
         '--max-time', str(timeout),
-        '--location',
-        '--compressed',
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         link.url,
         link.url,