|
@@ -160,6 +160,15 @@ def download_url(url: str, timeout: int=None) -> str:
|
|
|
verify=CHECK_SSL_VALIDITY,
|
|
verify=CHECK_SSL_VALIDITY,
|
|
|
timeout=timeout,
|
|
timeout=timeout,
|
|
|
)
|
|
)
|
|
|
|
|
+ if response.headers.get('Content-Type') == 'application/rss+xml':
|
|
|
|
|
+ # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
|
|
|
|
|
+ _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
|
|
|
|
|
+ _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P<xmlcharset>[\w-]+)')
|
|
|
|
|
+ _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE)
|
|
|
|
|
+ _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
|
|
|
|
|
+ match = _BODY_ENCODING_STR_RE.search(response.text[:1024])
|
|
|
|
|
+ if match:
|
|
|
|
|
+ response.encoding = match.group('xmlcharset')
|
|
|
return response.text
|
|
return response.text
|
|
|
|
|
|
|
|
|
|
|