Przeglądaj źródła

feat: add search filter-type to list command

JDC 5 lat temu
rodzic
commit
0f7dba07df
2 zmienionych plików z 34 dodań i 2 usunięć
  1. 1 1
      archivebox/cli/archivebox_list.py
  2. 33 1
      archivebox/index/__init__.py

+ 1 - 1
archivebox/cli/archivebox_list.py

@@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--filter-type',
         type=str,
-        choices=('exact', 'substring', 'domain', 'regex','tag'),
+        choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
         default='exact',
         help='Type of pattern matching to use when filtering URLs',
     )

+ 33 - 1
archivebox/index/__init__.py

@@ -51,6 +51,8 @@ from .sql import (
     write_sql_link_details,
 )
 
+from ..search import search_backend_enabled, query_search_index
+
 ### Link filtering and checking
 
 @enforce_types
@@ -365,7 +367,7 @@ LINK_FILTERS = {
 }
 
 @enforce_types
-def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
+def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
     q_filter = Q()
     for pattern in filter_patterns:
         try:
@@ -380,6 +382,36 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
             raise SystemExit(2)
     return snapshots.filter(q_filter)
 
+def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
+    if not search_backend_enabled():
+        stderr()
+        stderr(
+                '[X] The search backend is not enabled',
+                color='red',
+            )
+        raise SystemExit(2)
+
+    qsearch = get_empty_snapshot_queryset()
+    for pattern in filter_patterns:
+        try:
+            qsearch |= query_search_index(pattern)
+        except Exception as err:
+            stderr()
+            stderr(
+                f'[X] The search backend threw an exception={err}:',
+                color='red',
+            )
+            raise SystemExit(2)
+    
+    return snapshots & qsearch
+
+@enforce_types
+def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
+    if filter_type != 'search':
+        return q_filter(snapshots, filter_patterns, filter_type)
+    else:
+        return search_filter(snapshots, filter_patterns, filter_type)
+
 
 def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links without checking archive status or data directory validity"""