Browse Source

Implement flush for search backend after remove command

JDC 5 năm trước cách đây
mục cha
commit
47daa038eb

+ 2 - 2
archivebox/config.py

@@ -147,8 +147,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'SEARCH_BACKEND_PORT':      {'type': int,   'default': 1491},
         'SEARCH_BACKEND_PASSWORD':  {'type': str,   'default': 'SecretPassword'},
         # SONIC
-        'SONIC_BUCKET':             {'type': str,   'default': 'archivebox'},
-        'SONIC_COLLECTION':         {'type': str,   'default': 'snapshots'},
+        'SONIC_COLLECTION':         {'type': str,   'default': 'archivebox'},
+        'SONIC_BUCKET':             {'type': str,   'default': 'snapshots'},
     },
 
     'DEPENDENCY_CONFIG': {

+ 1 - 1
archivebox/core/mixins.py

@@ -18,7 +18,7 @@ class SearchResultsAdminMixin(object):
         except Exception as err:
             messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
         else:
-            qsearch = queryset.filter(id__in=snapshot_ids)
+            qsearch = queryset.filter(pk__in=snapshot_ids)
             qs |= qsearch
 
         finally:

+ 2 - 0
archivebox/main.py

@@ -115,6 +115,7 @@ from .logging_util import (
     printable_dependency_version,
 )
 
+from .search import flush_search_index
 
 ALLOWED_IN_OUTPUT_DIR = {
     'lost+found',
@@ -665,6 +666,7 @@ def remove(filter_str: Optional[str]=None,
     to_remove = snapshots.count()
 
     remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
+    flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)])
     all_snapshots = load_main_index(out_dir=out_dir)
     log_removal_finished(all_snapshots.count(), to_remove)
     

+ 8 - 1
archivebox/search/__init__.py

@@ -45,4 +45,11 @@ def query_search_index(text: str) -> List:
         return backend.search(text)
     else:
         return []
-        
+
+@enforce_types
+def flush_search_index(snapshot_ids: List[str]):
+    if not indexing_enabled() or not snapshot_ids:
+        return
+    backend = import_backend()
+    backend.flush(snapshot_ids)
+    

+ 8 - 3
archivebox/search/backends/sonic.py

@@ -10,11 +10,16 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA
 def index(snapshot_id: str, texts: List[str]):
     with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
         for text in texts:
-            ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text))
+            ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
 
 @enforce_types
 def search(text: str) -> List:
     with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
-        snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text)
+        snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
     return snap_ids
-    
+
+@enforce_types
+def flush(snapshot_ids: List[str]):
+    with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
+        for id in snapshot_ids:
+            ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))