misha 5 yıl önce
ebeveyn
işleme
1aa2a5b069

+ 2 - 1
archivebox/archive_methods.py

@@ -30,6 +30,7 @@ from config import (
     OUTPUT_DIR,
     GIT_DOMAINS,
     GIT_SHA,
+    RESTRICT_FILE_NAMES,
     CURL_USER_AGENT,
     WGET_USER_AGENT,
     CHECK_SSL_VALIDITY,
@@ -227,7 +228,7 @@ def fetch_wget(link_dir, link, timeout=TIMEOUT):
         '--span-hosts',
         '--no-parent',
         '-e', 'robots=off',
-        '--restrict-file-names=nocontrol',
+        *(('--restrict-file-names={}'.format(RESTRICT_FILE_NAMES),) if RESTRICT_FILE_NAMES else ()),
         '--timeout={}'.format(timeout),
         *(('--compression=auto',) if WGET_AUTO_COMPRESSION else ()),
         *(() if FETCH_WARC else ('--timestamping',)),

+ 1 - 0
archivebox/config.py

@@ -34,6 +34,7 @@ SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True'
 
 CHECK_SSL_VALIDITY =     os.getenv('CHECK_SSL_VALIDITY',     'True'             ).lower() == 'true'
 RESOLUTION =             os.getenv('RESOLUTION',             '1440,2000'        )
+RESTRICT_FILE_NAMES =    os.getenv('RESTRICT_FILE_NAMES',    'windows'        )
 GIT_DOMAINS =            os.getenv('GIT_DOMAINS',            'github.com,bitbucket.org,gitlab.com').split(',')
 CURL_USER_AGENT =        os.getenv('CURL_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/)')
 WGET_USER_AGENT =        os.getenv('WGET_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')

+ 1 - 0
etc/ArchiveBox.conf.default

@@ -39,6 +39,7 @@
 
 #CHECK_SSL_VALIDITY=True
 #FETCH_WGET_REQUISITES=True
+#RESTRICT_FILE_NAMES="windows"
 #RESOLUTION="1440,900"
 #CURL_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
 #WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"