소스 검색

Use a generator for snapshot flush from index

JDC 5 년 전
부모
커밋
f383648ffc
3개의 변경된 파일7개의 추가작업 그리고 8개의 파일을 삭제
  1. 1 1
      archivebox/main.py
  2. 3 4
      archivebox/search/__init__.py
  3. 3 3
      archivebox/search/backends/sonic.py

+ 1 - 1
archivebox/main.py

@@ -666,7 +666,7 @@ def remove(filter_str: Optional[str]=None,
     to_remove = snapshots.count()
 
     remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
-    flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)])
+    flush_search_index(snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True)))
     all_snapshots = load_main_index(out_dir=out_dir)
     log_removal_finished(all_snapshots.count(), to_remove)
     

+ 3 - 4
archivebox/search/__init__.py

@@ -1,4 +1,4 @@
-from typing import List, Union
+from typing import List, Union, Generator
 from pathlib import Path
 from importlib import import_module
 
@@ -39,7 +39,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
             backend.index(snapshot_id=str(snap.id), texts=texts)
 
 @enforce_types
-def query_search_index(text: str) -> List:
+def query_search_index(text: str) -> List[str]:  
     if search_backend_enabled():
         backend = import_backend()
         return backend.search(text)
@@ -47,9 +47,8 @@ def query_search_index(text: str) -> List:
         return []
 
 @enforce_types
-def flush_search_index(snapshot_ids: List[str]):
+def flush_search_index(snapshot_ids: Generator[str, None, None]):
     if not indexing_enabled() or not snapshot_ids:
         return
     backend = import_backend()
     backend.flush(snapshot_ids)
-    

+ 3 - 3
archivebox/search/backends/sonic.py

@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Generator
 
 from sonic import IngestClient, SearchClient
 
@@ -13,13 +13,13 @@ def index(snapshot_id: str, texts: List[str]):
             ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
 
 @enforce_types
-def search(text: str) -> List:
+def search(text: str) -> List[str]:
     with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
         snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
     return snap_ids
 
 @enforce_types
-def flush(snapshot_ids: List[str]):
+def flush(snapshot_ids: Generator[str, None, None]):
     with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
         for id in snapshot_ids:
             ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))