2 سال پیش · d8aa84ac98
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -209,6 +209,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
 
				         'SEARCH_BACKEND_HOST_NAME': {'type': str,   'default': 'localhost'},
			
 
				         'SEARCH_BACKEND_PORT':      {'type': int,   'default': 1491},
			
 
				         'SEARCH_BACKEND_PASSWORD':  {'type': str,   'default': 'SecretPassword'},
			
 
				+        'SEARCH_PROCESS_HTML':      {'type': bool,  'default': True},
			
 
				         # SONIC
			
 
				         'SONIC_COLLECTION':         {'type': str,   'default': 'archivebox'},
			
 
				         'SONIC_BUCKET':             {'type': str,   'default': 'snapshots'},
			
--- a/archivebox/search/utils.py
+++ b/archivebox/search/utils.py
@@ -4,7 +4,7 @@ import io
 
				 from django.db.models import QuerySet
			
 
				 
			
 
				 from archivebox.util import enforce_types
			
 
				-from archivebox.config import ANSI
			
 
				+from archivebox.config import ANSI, SEARCH_PROCESS_HTML
			
 
				 
			
 
				 BLOCK_SIZE = 32768
			
 
				 
			
@@ -128,7 +128,8 @@ def get_indexable_content(results: QuerySet):
 
				     if method == 'readability':
			
 
				         return get_file_result_content(res, 'content.txt', use_pwd=True)
			
 
				     elif method == 'singlefile':
			
 
				-        return get_file_result_content(res, '', use_pwd=True, filter=_extract_html_text)
			
 
				+        filter = _extract_html_text if SEARCH_PROCESS_HTML else _read_all
			
 
				+        return get_file_result_content(res, '', use_pwd=True, filter=filter)
			
 
				     elif method == 'dom':
			
 
				         return get_file_result_content(res, '', use_pwd=True)
			
 
				     elif method == 'wget':