Browse Source

Add FAVICON_PROVIDER option for custom favicon service

Micah R Ledbetter 2 years ago
parent
commit
1e50ca243e
3 changed files with 6 additions and 4 deletions
  1. 2 1
      archivebox/config.py
  2. 2 1
      archivebox/extractors/favicon.py
  3. 2 2
      archivebox/index/schema.py

+ 2 - 1
archivebox/config.py

@@ -183,7 +183,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                 '--compressed'
                                                                 '--compressed'
                                                                ]},
                                                                ]},
         'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
         'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
-        'SINGLEFILE_ARGS':          {'type': list,  'default' : None}
+        'SINGLEFILE_ARGS':          {'type': list,  'default' : None},
+        'FAVICON_PROVIDER':         {'type': str,   'default': 'https://www.google.com/s2/favicons?domain={}'},
     },
     },
 
 
     'SEARCH_BACKEND_CONFIG' : {
     'SEARCH_BACKEND_CONFIG' : {

+ 2 - 1
archivebox/extractors/favicon.py

@@ -10,6 +10,7 @@ from ..util import enforce_types, domain
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
     SAVE_FAVICON,
     SAVE_FAVICON,
+    FAVICON_PROVIDER,
     CURL_BINARY,
     CURL_BINARY,
     CURL_ARGS,
     CURL_ARGS,
     CURL_VERSION,
     CURL_VERSION,
@@ -40,7 +41,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
         '--output', str(output),
         '--output', str(output),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
-        'https://www.google.com/s2/favicons?domain={}'.format(domain(link.url)),
+        FAVICON_PROVIDER.format(domain(link.url)),
     ]
     ]
     status = 'failed'
     status = 'failed'
     timer = TimedProgress(timeout, prefix='      ')
     timer = TimedProgress(timeout, prefix='      ')

+ 2 - 2
archivebox/index/schema.py

@@ -20,7 +20,7 @@ from django.utils.functional import cached_property
 
 
 from ..system import get_dir_size
 from ..system import get_dir_size
 from ..util import ts_to_date_str, parse_date
 from ..util import ts_to_date_str, parse_date
-from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME
+from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
 
 
 class ArchiveError(Exception):
 class ArchiveError(Exception):
     def __init__(self, message, hints=None):
     def __init__(self, message, hints=None):
@@ -423,7 +423,7 @@ class Link:
         canonical = {
         canonical = {
             'index_path': 'index.html',
             'index_path': 'index.html',
             'favicon_path': 'favicon.ico',
             'favicon_path': 'favicon.ico',
-            'google_favicon_path': 'https://www.google.com/s2/favicons?domain={}'.format(self.domain),
+            'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
             'wget_path': wget_output_path(self),
             'wget_path': wget_output_path(self),
             'warc_path': 'warc/',
             'warc_path': 'warc/',
             'singlefile_path': 'singlefile.html',
             'singlefile_path': 'singlefile.html',