Browse Source

Add config for search backend

JDC 5 years ago
parent
commit
c2c01af3ad
3 changed files with 23 additions and 13 deletions
  1. 13 1
      archivebox/config.py
  2. 4 7
      archivebox/search/__init__.py
  3. 6 5
      archivebox/search/backends/sonic.py

+ 13 - 1
archivebox/config.py

@@ -139,6 +139,18 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
     },
 
+    'SEARCH_BACKEND_CONFIG' : {
+        'USE_INDEXING_BACKEND':     {'type': bool,  'default': True},
+        'USE_SEARCHING_BACKEND':    {'type': bool,  'default': True},
+        'SEARCH_BACKEND_ENGINE':    {'type': str,   'default': 'sonic'},
+        'SEARCH_BACKEND_HOST_NAME': {'type': str,   'default': 'localhost'},
+        'SEARCH_BACKEND_PORT':      {'type': int,   'default': 1491},
+        'SEARCH_BACKEND_PASSWORD':  {'type': str,   'default': 'SecretPassword'},
+        # SONIC
+        'SONIC_BUCKET':             {'type': str,   'default': 'archivebox'},
+        'SONIC_COLLECTION':         {'type': str,   'default': 'snapshots'},
+    },
+
     'DEPENDENCY_CONFIG': {
         'USE_CURL':                 {'type': bool,  'default': True},
         'USE_WGET':                 {'type': bool,  'default': True},
@@ -149,7 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'USE_CHROME':               {'type': bool,  'default': True},
         'USE_NODE':                 {'type': bool,  'default': True},
         'USE_YOUTUBEDL':            {'type': bool,  'default': True},
-
+        
         'CURL_BINARY':              {'type': str,   'default': 'curl'},
         'GIT_BINARY':               {'type': str,   'default': 'git'},
         'WGET_BINARY':              {'type': str,   'default': 'wget'},

+ 4 - 7
archivebox/search/__init__.py

@@ -5,19 +5,16 @@ from importlib import import_module
 
 from archivebox.index.schema import Link
 from archivebox.util import enforce_types
-from archivebox.config import setup_django, OUTPUT_DIR
-
+from archivebox.config import setup_django, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
 
 def indexing_enabled():
-    return True
-    # return FULLTEXT_INDEXING_ENABLED
+    return USE_INDEXING_BACKEND
 
 def search_backend_enabled():
-    return True
-    # return FULLTEXT_SEARCH_ENABLED
+    return USE_SEARCHING_BACKEND
 
 def get_backend():
-    return 'search.backends.sonic'
+    return f'search.backends.{SEARCH_BACKEND_ENGINE}'
 
 def import_backend():
     backend_string = get_backend()

+ 6 - 5
archivebox/search/backends/sonic.py

@@ -3,17 +3,18 @@ from typing import List
 from sonic import IngestClient, SearchClient
 
 from archivebox.util import enforce_types
+from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
+
 
 @enforce_types
 def index(snapshot_id: str, texts: List[str]):
-    # TODO add variables to localhost, port, password, bucket, collection
-    with IngestClient("localhost", 1491, "SecretPassword") as ingestcl:
+    with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
         for text in texts:
-            ingestcl.push("archivebox", "snapshots", snapshot_id, str(text))
+            ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text))
 
 @enforce_types
 def search(text: str) -> List:
-    with SearchClient("localhost", 1491, "SecretPassword") as querycl:
-        snap_ids = querycl.query("archivebox", "snapshots", text)
+    with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
+        snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text)
     return snap_ids