Browse Source

Replaced get method

Angel Rey 5 năm trước cách đây
mục cha
commit
f0915a56aa
1 tập tin đã thay đổi với 16 bổ sung6 xóa
  1. 16 6
      archivebox/util.py

+ 16 - 6
archivebox/util.py

@@ -15,6 +15,7 @@ from datetime import datetime
 from dateparser import parse as dateparser
 from dateparser import parse as dateparser
 
 
 import requests
 import requests
+from requests.exceptions import RequestException
 from base32_crockford import encode as base32_encode                            # type: ignore
 from base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 
 
@@ -178,12 +179,21 @@ def get_headers(url: str, timeout: int=None) -> str:
     """Download the contents of a remote url and return the headers"""
     """Download the contents of a remote url and return the headers"""
     from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
     from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
     timeout = timeout or TIMEOUT
     timeout = timeout or TIMEOUT
-    response = requests.get(
-        url,
-        headers={'User-Agent': WGET_USER_AGENT},
-        verify=CHECK_SSL_VALIDITY,
-        timeout=timeout,
-    )
+
+    try:
+        response = requests.head(
+            url,
+            headers={'User-Agent': WGET_USER_AGENT},
+            verify=CHECK_SSL_VALIDITY,
+            timeout=timeout,
+        )
+    except RequestException:
+        response = requests.get(
+            url,
+            headers={'User-Agent': WGET_USER_AGENT},
+            verify=CHECK_SSL_VALIDITY,
+            timeout=timeout,
+        )
     
     
     return pyjson.dumps(dict(response.headers), indent=4)
     return pyjson.dumps(dict(response.headers), indent=4)