7 years ago · 7ea36c4adb
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -214,6 +214,7 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC
 
				         '--span-hosts',
			
 
				         '--no-parent',
			
 
				         '--restrict-file-names=unix',
			
 
				+        f'--timeout={timeout}',
			
 
				         *(('--warc-file={}'.format(warc_path),) if warc else ()),
			
 
				         *(('--page-requisites',) if FETCH_WGET_REQUISITES else ()),
			
 
				         *(('--user-agent="{}"'.format(WGET_USER_AGENT),) if WGET_USER_AGENT else ()),
			
@@ -222,7 +223,7 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC
 
				     ]
			
 
				     end = progress(timeout, prefix='      ')
			
 
				     try:
			
 
				-        result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 1)  # index.html
			
 
				+        result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 5)  # index.html
			
 
				         end()
			
 
				         output = wget_output_path(link, look_in=domain_dir)
			
 
				 
			
@@ -265,13 +266,13 @@ def fetch_pdf(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
 
				         *chrome_headless(user_data_dir=user_data_dir),
			
 
				         '--print-to-pdf',
			
 
				         '--hide-scrollbars',
			
 
				-        '--timeout=58000',
			
 
				+        '--timeout={timeout * 1000}',
			
 
				         *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')),
			
 
				         link['url']
			
 
				     ]
			
 
				     end = progress(timeout, prefix='      ')
			
 
				     try:
			
 
				-        result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 1)  # output.pdf
			
 
				+        result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 5)  # output.pdf
			
 
				         end()
			
 
				         if result.returncode:
			
 
				             print('     ', (result.stderr or result.stdout).decode())
			
@@ -304,14 +305,14 @@ def fetch_screenshot(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_
 
				         '--screenshot',
			
 
				         '--window-size={}'.format(resolution),
			
 
				         '--hide-scrollbars',
			
 
				-        '--timeout=58000',
			
 
				+        '--timeout={timeout * 1000}',
			
 
				         *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')),
			
 
				         # '--full-page',   # TODO: make this actually work using ./bin/screenshot fullPage: true
			
 
				         link['url'],
			
 
				     ]
			
 
				     end = progress(timeout, prefix='      ')
			
 
				     try:
			
 
				-        result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 1)  # sreenshot.png
			
 
				+        result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 5)  # sreenshot.png
			
 
				         end()
			
 
				         if result.returncode:
			
 
				             print('     ', (result.stderr or result.stdout).decode())
			
@@ -344,12 +345,13 @@ def fetch_dom(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
 
				     CMD = [
			
 
				         *chrome_headless(user_data_dir=user_data_dir),
			
 
				         '--dump-dom',
			
 
				+        '--timeout={timeout * 1000}',
			
 
				         link['url']
			
 
				     ]
			
 
				     end = progress(timeout, prefix='      ')
			
 
				     try:
			
 
				         with open(output_path, 'w+') as f:
			
 
				-            result = run(CMD, stdout=f, stderr=PIPE, cwd=link_dir, timeout=timeout + 1)  # output.html
			
 
				+            result = run(CMD, stdout=f, stderr=PIPE, cwd=link_dir, timeout=timeout + 5)  # output.html
			
 
				         end()
			
 
				         if result.returncode:
			
 
				             print('     ', (result.stderr).decode())
			
@@ -379,7 +381,15 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
 
				     submit_url = 'https://web.archive.org/save/{}'.format(link['url'])
			
 
				 
			
 
				     success = False
			
 
				-    CMD = ['curl', '-L', '-I', '-X', 'GET', submit_url]
			
 
				+    CMD = [
			
 
				+        'curl',
			
 
				+        '--location',
			
 
				+        '--head',
			
 
				+        '--max-time', str(timeout),
			
 
				+        '--get',
			
 
				+        *(() if CHECK_SSL_VALIDITY else ('--insecure',)),
			
 
				+        submit_url,
			
 
				+    ]
			
 
				     end = progress(timeout, prefix='      ')
			
 
				     try:
			
 
				         result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=link_dir, timeout=timeout + 1)  # archive.org.txt
			
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -123,7 +123,7 @@ def progress(seconds=TIMEOUT, prefix=''):
 
				     chunk = '█' if sys.stdout.encoding == 'UTF-8' else '#'
			
 
				     chunks = TERM_WIDTH - len(prefix) - 20  # number of progress chunks to show (aka max bar width)
			
 
				 
			
 
				-    def progress_bar(seconds=seconds, prefix=prefix):
			
 
				+    def progress_bar(seconds, prefix):
			
 
				         """show timer in the form of progress bar, with percentage and seconds remaining"""
			
 
				         try:
			
 
				             for s in range(seconds * chunks):