1 年間前 · fbfd16e195
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -381,7 +381,7 @@ def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type:
 
				     from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
			
 
				     from ..search import query_search_index
			
 
				     
			
 
				-    if not SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENABLED:
			
 
				+    if not SEARCH_BACKEND_CONFIG.USE_SEARCHING_BACKEND:
			
 
				         stderr()
			
 
				         stderr(
			
 
				                 '[X] The search backend is not enabled, set config.USE_SEARCHING_BACKEND = True',
			
--- a/archivebox/plugantic/base_hook.py
+++ b/archivebox/plugantic/base_hook.py
@@ -4,12 +4,12 @@ import inspect
 
				 from huey.api import TaskWrapper
			
 
				 
			
 
				 from pathlib import Path
			
 
				-from typing import List, Literal, ClassVar
			
 
				+from typing import Tuple, Literal, ClassVar, get_args
			
 
				 from pydantic import BaseModel, ConfigDict
			
 
				 
			
 
				 
			
 
				-HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE']
			
 
				-hook_type_names: List[HookType] = ['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE']
			
 
				+HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE', 'SEARCHBACKEND']
			
 
				+hook_type_names: Tuple[HookType] = get_args(HookType)
			
 
				 
			
 
				 class BaseHook(BaseModel):
			
 
				     """
			
--- a/archivebox/plugantic/base_searchbackend.py
+++ b/archivebox/plugantic/base_searchbackend.py
@@ -0,0 +1,39 @@
 
				+__package__ = 'archivebox.plugantic'
			
 
				+
			
 
				+from typing import Iterable, List
			
 
				+from benedict import benedict
			
 
				+from pydantic import Field
			
 
				+
			
 
				+
			
 
				+from .base_hook import BaseHook, HookType
			
 
				+
			
 
				+
			
 
				+
			
 
				+class BaseSearchBackend(BaseHook):
			
 
				+    hook_type: HookType = 'SEARCHBACKEND'
			
 
				+
			
 
				+    name: str = Field()       # e.g. 'singlefile'
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def index(snapshot_id: str, texts: List[str]):
			
 
				+        return
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def flush(snapshot_ids: Iterable[str]):
			
 
				+        return
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def search(text: str) -> List[str]:
			
 
				+        raise NotImplementedError("search method must be implemented by subclass")
			
 
				+    
			
 
				+    
			
 
				+    def register(self, settings, parent_plugin=None):
			
 
				+        # self._plugin = parent_plugin                                      # for debugging only, never rely on this!
			
 
				+
			
 
				+        # Install queue into settings.SEARCH_BACKENDS
			
 
				+        settings.SEARCH_BACKENDS = getattr(settings, "SEARCH_BACKENDS", None) or benedict({})
			
 
				+        settings.SEARCH_BACKENDS[self.id] = self
			
 
				+
			
 
				+        # Record installed hook into settings.HOOKS
			
 
				+        super().register(settings, parent_plugin=parent_plugin)
			
 
				+
			
--- a/archivebox/plugins_search/ripgrep/apps.py
+++ b/archivebox/plugins_search/ripgrep/apps.py
@@ -1,6 +1,8 @@
 
				 __package__ = 'archivebox.plugins_search.ripgrep'
			
 
				 
			
 
				-from typing import List, Dict, ClassVar
			
 
				+import re
			
 
				+from subprocess import run
			
 
				+from typing import List, Dict, ClassVar, Iterable
			
 
				 # from typing_extensions import Self
			
 
				 
			
 
				 from django.conf import settings
			
@@ -14,10 +16,10 @@ from plugantic.base_plugin import BasePlugin
 
				 from plugantic.base_configset import BaseConfigSet, ConfigSectionName
			
 
				 from plugantic.base_binary import BaseBinary, env, apt, brew
			
 
				 from plugantic.base_hook import BaseHook
			
 
				-# from plugantic.base_search import BaseSearchBackend
			
 
				+from plugantic.base_searchbackend import BaseSearchBackend
			
 
				 
			
 
				 # Depends on Other Plugins:
			
 
				-# from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
			
 
				+from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
			
 
				 
			
 
				 ###################### Config ##########################
			
 
				 
			
@@ -39,11 +41,59 @@ class RipgrepBinary(BaseBinary):
 
				 
			
 
				 RIPGREP_BINARY = RipgrepBinary()
			
 
				 
			
 
				-# TODO:
			
 
				-# class RipgrepSearchBackend(BaseSearchBackend):
			
 
				-#     name: str = 'ripgrep'
			
 
				 
			
 
				-# RIPGREP_SEARCH_BACKEND = RipgrepSearchBackend()
			
 
				+RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
			
 
				+
			
 
				+RG_ADD_TYPE = '--type-add'
			
 
				+RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}"
			
 
				+RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l)
			
 
				+RG_REGEX_ARGUMENT = '-e'
			
 
				+
			
 
				+TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'
			
 
				+ts_regex =  re.compile(TIMESTAMP_REGEX)
			
 
				+
			
 
				+
			
 
				+class RipgrepSearchBackend(BaseSearchBackend):
			
 
				+    name: str = 'ripgrep'
			
 
				+    docs_url: str = 'https://github.com/BurntSushi/ripgrep'
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def index(snapshot_id: str, texts: List[str]):
			
 
				+        return
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def flush(snapshot_ids: Iterable[str]):
			
 
				+        return
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def search(text: str) -> List[str]:
			
 
				+        rg_bin = RIPGREP_BINARY.load()
			
 
				+        if not rg_bin.version:
			
 
				+            raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
			
 
				+    
			
 
				+        rg_cmd = [
			
 
				+            rg_bin.abspath, 
			
 
				+            RG_ADD_TYPE, 
			
 
				+            RG_IGNORE_ARGUMENTS, 
			
 
				+            RG_DEFAULT_ARGUMENTS, 
			
 
				+            RG_REGEX_ARGUMENT, 
			
 
				+            text, 
			
 
				+            str(settings.ARCHIVE_DIR)
			
 
				+        ]
			
 
				+        rg = run(rg_cmd, timeout=SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_TIMEOUT, capture_output=True, text=True)
			
 
				+        timestamps = set()
			
 
				+        for path in rg.stdout.splitlines():
			
 
				+            ts = ts_regex.findall(path)
			
 
				+            if ts:
			
 
				+                timestamps.add(ts[0])
			
 
				+        
			
 
				+        snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
			
 
				+    
			
 
				+        return snap_ids
			
 
				+    
			
 
				+RIPGREP_SEARCH_BACKEND = RipgrepSearchBackend()
			
 
				+
			
 
				+
			
 
				 
			
 
				 
			
 
				 class RipgrepSearchPlugin(BasePlugin):
			
@@ -53,6 +103,7 @@ class RipgrepSearchPlugin(BasePlugin):
 
				     hooks: List[InstanceOf[BaseHook]] = [
			
 
				         RIPGREP_CONFIG,
			
 
				         RIPGREP_BINARY,
			
 
				+        RIPGREP_SEARCH_BACKEND,
			
 
				     ]
			
 
				 
			
 
				 
			
--- a/archivebox/plugins_search/ripgrep/ripgrep.py
+++ b/archivebox/plugins_search/ripgrep/ripgrep.py
@@ -1,45 +0,0 @@
 
				-import re
			
 
				-from subprocess import run, PIPE
			
 
				-from typing import List, Generator
			
 
				-
			
 
				-from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION, SEARCH_BACKEND_TIMEOUT
			
 
				-from archivebox.util import enforce_types
			
 
				-
			
 
				-RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
			
 
				-
			
 
				-RG_ADD_TYPE = '--type-add'
			
 
				-RG_IGNORE_ARGUMENTS = f"ignore:*.{{{','.join(RG_IGNORE_EXTENSIONS)}}}"
			
 
				-RG_DEFAULT_ARGUMENTS = "-ilTignore" # Case insensitive(i), matching files results(l)
			
 
				-RG_REGEX_ARGUMENT = '-e'
			
 
				-
			
 
				-TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'
			
 
				-
			
 
				-ts_regex =  re.compile(TIMESTAMP_REGEX)
			
 
				-
			
 
				-@enforce_types
			
 
				-def index(snapshot_id: str, texts: List[str]):
			
 
				-    return
			
 
				-
			
 
				-@enforce_types
			
 
				-def flush(snapshot_ids: Generator[str, None, None]):
			
 
				-    return
			
 
				-
			
 
				-@enforce_types
			
 
				-def search(text: str) -> List[str]:
			
 
				-    if not RIPGREP_VERSION:
			
 
				-        raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
			
 
				-
			
 
				-    from core.models import Snapshot
			
 
				-
			
 
				-    rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)]
			
 
				-    rg = run(rg_cmd, stdout=PIPE, stderr=PIPE, timeout=SEARCH_BACKEND_TIMEOUT)
			
 
				-    file_paths = [p.decode() for p in rg.stdout.splitlines()]
			
 
				-    timestamps = set()
			
 
				-    for path in file_paths:
			
 
				-        ts = ts_regex.findall(path)
			
 
				-        if ts:
			
 
				-            timestamps.add(ts[0])
			
 
				-    
			
 
				-    snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
			
 
				-
			
 
				-    return snap_ids
			
--- a/archivebox/plugins_search/sonic/__init__.py
+++ b/archivebox/plugins_search/sonic/__init__.py
--- a/archivebox/plugins_search/sonic/apps.py
+++ b/archivebox/plugins_search/sonic/apps.py
@@ -0,0 +1,132 @@
 
				+__package__ = 'archivebox.plugins_search.sonic'
			
 
				+
			
 
				+import sys
			
 
				+from typing import List, Dict, ClassVar, Generator, cast
			
 
				+
			
 
				+from django.conf import settings
			
 
				+
			
 
				+# Depends on other PyPI/vendor packages:
			
 
				+from pydantic import InstanceOf, Field, model_validator
			
 
				+from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
			
 
				+
			
 
				+# Depends on other Django apps:
			
 
				+from plugantic.base_plugin import BasePlugin
			
 
				+from plugantic.base_configset import BaseConfigSet, ConfigSectionName
			
 
				+from plugantic.base_binary import BaseBinary, env, brew
			
 
				+from plugantic.base_hook import BaseHook
			
 
				+from plugantic.base_searchbackend import BaseSearchBackend
			
 
				+
			
 
				+# Depends on Other Plugins:
			
 
				+from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
			
 
				+
			
 
				+SONIC_LIB = None
			
 
				+try:
			
 
				+    import sonic
			
 
				+    SONIC_LIB = sonic
			
 
				+except ImportError:
			
 
				+    SONIC_LIB = None
			
 
				+
			
 
				+###################### Config ##########################
			
 
				+
			
 
				+class SonicConfig(BaseConfigSet):
			
 
				+    section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
			
 
				+
			
 
				+    SONIC_BINARY: str       = Field(default='sonic')
			
 
				+    
			
 
				+    SONIC_HOST: str         = Field(default='localhost', alias='SEARCH_BACKEND_HOST_NAME')
			
 
				+    SONIC_PORT: int         = Field(default=1491, alias='SEARCH_BACKEND_PORT')
			
 
				+    SONIC_PASSWORD: str     = Field(default='SecretPassword', alias='SEARCH_BACKEND_PASSWORD')
			
 
				+    SONIC_COLLECTION: str   = Field(default='archivebox')
			
 
				+    SONIC_BUCKET: str       = Field(default='archivebox')
			
 
				+
			
 
				+    @model_validator(mode='after')
			
 
				+    def validate_sonic_port(self):
			
 
				+        if SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE == 'sonic':
			
 
				+            if SONIC_LIB is None:
			
 
				+                sys.stderr.write('[!] Sonic search backend is enabled but not installed. Install Sonic to use the Sonic search backend.\n')
			
 
				+        return self
			
 
				+
			
 
				+SONIC_CONFIG = SonicConfig()
			
 
				+
			
 
				+class SonicBinary(BaseBinary):
			
 
				+    name: BinName = SONIC_CONFIG.SONIC_BINARY
			
 
				+    binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env]   # TODO: add cargo
			
 
				+
			
 
				+    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
			
 
				+        brew.name: {'packages': lambda: ['sonic']},
			
 
				+        # cargo.name: {'packages': lambda: ['sonic-server']},             # TODO: add cargo
			
 
				+    }
			
 
				+    
			
 
				+    # def on_get_version(self):
			
 
				+    #     with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
			
 
				+    #         return SemVer.parse(str(ingestcl.protocol))
			
 
				+
			
 
				+SONIC_BINARY = SonicBinary()
			
 
				+
			
 
				+
			
 
				+MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000     # dont index more than 100 million characters per text
			
 
				+MAX_SONIC_TEXT_CHUNK_LENGTH = 2000          # dont index more than 2000 characters per chunk
			
 
				+MAX_SONIC_ERRORS_BEFORE_ABORT = 5
			
 
				+
			
 
				+
			
 
				+
			
 
				+class SonicSearchBackend(BaseSearchBackend):
			
 
				+    name: str = 'sonic'
			
 
				+    docs_url: str = 'https://github.com/valeriansaliou/sonic'
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def index(snapshot_id: str, texts: List[str]):
			
 
				+        error_count = 0
			
 
				+        with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
			
 
				+            for text in texts:
			
 
				+                chunks = (
			
 
				+                    text[i:i+MAX_SONIC_TEXT_CHUNK_LENGTH]
			
 
				+                    for i in range(
			
 
				+                        0,
			
 
				+                        min(len(text), MAX_SONIC_TEXT_TOTAL_LENGTH),
			
 
				+                        MAX_SONIC_TEXT_CHUNK_LENGTH,
			
 
				+                    )
			
 
				+                )
			
 
				+                try:
			
 
				+                    for chunk in chunks:
			
 
				+                        ingestcl.push(SONIC_CONFIG.SONIC_COLLECTION, SONIC_CONFIG.SONIC_BUCKET, snapshot_id, str(chunk))
			
 
				+                except Exception as err:
			
 
				+                    print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}')
			
 
				+                    error_count += 1
			
 
				+                    if error_count > MAX_SONIC_ERRORS_BEFORE_ABORT:
			
 
				+                        raise
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def flush(snapshot_ids: Generator[str, None, None]):
			
 
				+        with sonic.IngestClient(SONIC_CONFIG.SONIC_HOST, str(SONIC_CONFIG.SONIC_PORT), SONIC_CONFIG.SONIC_PASSWORD) as ingestcl:
			
 
				+            for id in snapshot_ids:
			
 
				+                ingestcl.flush_object(SONIC_CONFIG.SONIC_COLLECTION, SONIC_CONFIG.SONIC_BUCKET, str(id))
			
 
				+    
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def search(text: str) -> List[str]:
			
 
				+        with sonic.SearchClient(SONIC_CONFIG.SONIC_HOST, SONIC_CONFIG.SONIC_PORT, SONIC_CONFIG.SONIC_PASSWORD) as querycl:
			
 
				+            snap_ids = cast(List[str], querycl.query(SONIC_CONFIG.SONIC_COLLECTION, SONIC_CONFIG.SONIC_BUCKET, text))
			
 
				+        return [str(id) for id in snap_ids]
			
 
				+    
			
 
				+    
			
 
				+SONIC_SEARCH_BACKEND = SonicSearchBackend()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+class SonicSearchPlugin(BasePlugin):
			
 
				+    app_label: str ='sonic'
			
 
				+    verbose_name: str = 'Sonic'
			
 
				+
			
 
				+    hooks: List[InstanceOf[BaseHook]] = [
			
 
				+        SONIC_CONFIG,
			
 
				+        SONIC_BINARY,
			
 
				+        SONIC_SEARCH_BACKEND,
			
 
				+    ]
			
 
				+
			
 
				+
			
 
				+
			
 
				+PLUGIN = SonicSearchPlugin()
			
 
				+PLUGIN.register(settings)
			
 
				+DJANGO_APP = PLUGIN.AppConfig
			
--- a/archivebox/plugins_search/sonic/sonic.py
+++ b/archivebox/plugins_search/sonic/sonic.py
@@ -1,44 +0,0 @@
 
				-from typing import List, Generator
			
 
				-
			
 
				-from sonic import IngestClient, SearchClient
			
 
				-
			
 
				-from archivebox.util import enforce_types
			
 
				-from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
			
 
				-
			
 
				-MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000     # dont index more than 100 million characters per text
			
 
				-MAX_SONIC_TEXT_CHUNK_LENGTH = 2000          # dont index more than 2000 characters per chunk
			
 
				-MAX_SONIC_ERRORS_BEFORE_ABORT = 5
			
 
				-
			
 
				-@enforce_types
			
 
				-def index(snapshot_id: str, texts: List[str]):
			
 
				-    error_count = 0
			
 
				-    with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
			
 
				-        for text in texts:
			
 
				-            chunks = (
			
 
				-                text[i:i+MAX_SONIC_TEXT_CHUNK_LENGTH]
			
 
				-                for i in range(
			
 
				-                    0,
			
 
				-                    min(len(text), MAX_SONIC_TEXT_TOTAL_LENGTH),
			
 
				-                    MAX_SONIC_TEXT_CHUNK_LENGTH,
			
 
				-                )
			
 
				-            )
			
 
				-            try:
			
 
				-                for chunk in chunks:
			
 
				-                    ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
			
 
				-            except Exception as err:
			
 
				-                print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}')
			
 
				-                error_count += 1
			
 
				-                if error_count > MAX_SONIC_ERRORS_BEFORE_ABORT:
			
 
				-                    raise
			
 
				-
			
 
				-@enforce_types
			
 
				-def search(text: str) -> List[str]:
			
 
				-    with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
			
 
				-        snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
			
 
				-    return snap_ids
			
 
				-
			
 
				-@enforce_types
			
 
				-def flush(snapshot_ids: Generator[str, None, None]):
			
 
				-    with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
			
 
				-        for id in snapshot_ids:
			
 
				-            ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))
			
--- a/archivebox/plugins_search/sqlite/__init__.py
+++ b/archivebox/plugins_search/sqlite/__init__.py
--- a/archivebox/plugins_search/sqlite/apps.py
+++ b/archivebox/plugins_search/sqlite/apps.py
@@ -0,0 +1,257 @@
 
				+__package__ = 'archivebox.plugins_search.sqlite'
			
 
				+
			
 
				+import sqlite3
			
 
				+import codecs
			
 
				+from typing import List, ClassVar, Generator, Callable
			
 
				+
			
 
				+from django.conf import settings
			
 
				+from django.db import connection as database
			
 
				+
			
 
				+# Depends on other PyPI/vendor packages:
			
 
				+from pydantic import InstanceOf, Field, model_validator
			
 
				+
			
 
				+# Depends on other Django apps:
			
 
				+from plugantic.base_plugin import BasePlugin
			
 
				+from plugantic.base_configset import BaseConfigSet, ConfigSectionName
			
 
				+from plugantic.base_hook import BaseHook
			
 
				+from plugantic.base_searchbackend import BaseSearchBackend
			
 
				+
			
 
				+# Depends on Other Plugins:
			
 
				+# from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
			
 
				+
			
 
				+
			
 
				+
			
 
				+###################### Config ##########################
			
 
				+
			
 
				+class SqliteftsConfig(BaseConfigSet):
			
 
				+    section: ClassVar[ConfigSectionName] = 'DEPENDENCY_CONFIG'
			
 
				+
			
 
				+    SQLITEFTS_SEPARATE_DATABASE: bool = Field(default=True, alias='FTS_SEPARATE_DATABASE')
			
 
				+    SQLITEFTS_TOKENIZERS: str = Field(default='porter unicode61 remove_diacritics 2', alias='FTS_TOKENIZERS')
			
 
				+    SQLITEFTS_MAX_LENGTH: int = Field(default=int(1e9), alias='FTS_SQLITE_MAX_LENGTH')
			
 
				+    
			
 
				+    SQLITEFTS_DB: str = Field(default='search.sqlite3')
			
 
				+    SQLITEFTS_TABLE: str = Field(default='snapshot_fts')
			
 
				+    SQLITEFTS_ID_TABLE: str = Field(default='snapshot_id_fts')
			
 
				+    SQLITEFTS_COLUMN: str = Field(default='texts')
			
 
				+    
			
 
				+    @model_validator(mode='after')
			
 
				+    def validate_fts_separate_database(self):
			
 
				+        if self.SQLITEFTS_SEPARATE_DATABASE:
			
 
				+            assert self.SQLITEFTS_DB, "SQLITEFTS_DB must be set if SQLITEFTS_SEPARATE_DATABASE is True"
			
 
				+        return self
			
 
				+    
			
 
				+    @property
			
 
				+    def get_connection(self) -> Callable[[], sqlite3.Connection]:
			
 
				+        # Make get_connection callable, because `django.db.connection.cursor()`
			
 
				+        # has to be called to get a context manager, but sqlite3.Connection
			
 
				+        # is a context manager without being called.
			
 
				+        if self.SQLITEFTS_SEPARATE_DATABASE:
			
 
				+            return lambda: sqlite3.connect(self.SQLITEFTS_DB)
			
 
				+        else:
			
 
				+            return database.cursor
			
 
				+        
			
 
				+    @property
			
 
				+    def SQLITE_BIND(self) -> str:
			
 
				+        if self.SQLITEFTS_SEPARATE_DATABASE:
			
 
				+            return "?"
			
 
				+        else:
			
 
				+            return "%s"
			
 
				+        
			
 
				+    @property
			
 
				+    def SQLITE_LIMIT_LENGTH(self) -> int:
			
 
				+        # Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
			
 
				+        # so fall back to the default if the API to get the real value isn't present
			
 
				+        try:
			
 
				+            limit_id = sqlite3.SQLITE_LIMIT_LENGTH
			
 
				+            try:
			
 
				+                with database.temporary_connection() as cursor:  # type: ignore[attr-defined]
			
 
				+                    return cursor.connection.getlimit(limit_id)
			
 
				+            except AttributeError:
			
 
				+                return database.getlimit(limit_id)
			
 
				+        except AttributeError:
			
 
				+            return self.SQLITEFTS_MAX_LENGTH
			
 
				+
			
 
				+SQLITEFTS_CONFIG = SqliteftsConfig()
			
 
				+
			
 
				+
			
 
				+
			
 
				+def _escape_sqlite3(value: str, *, quote: str, errors='strict') -> str:
			
 
				+    assert isinstance(quote, str), "quote is not a str"
			
 
				+    assert len(quote) == 1, "quote must be a single character"
			
 
				+
			
 
				+    encodable = value.encode('utf-8', errors).decode('utf-8')
			
 
				+
			
 
				+    nul_index = encodable.find("\x00")
			
 
				+    if nul_index >= 0:
			
 
				+        error = UnicodeEncodeError("NUL-terminated utf-8", encodable,
			
 
				+                                   nul_index, nul_index + 1, "NUL not allowed")
			
 
				+        error_handler = codecs.lookup_error(errors)
			
 
				+        replacement, _ = error_handler(error)
			
 
				+        assert isinstance(replacement, str), "handling a UnicodeEncodeError should return a str replacement"
			
 
				+        encodable = encodable.replace("\x00", replacement)
			
 
				+
			
 
				+    return quote + encodable.replace(quote, quote * 2) + quote
			
 
				+
			
 
				+def _escape_sqlite3_value(value: str, errors='strict') -> str:
			
 
				+    return _escape_sqlite3(value, quote="'", errors=errors)
			
 
				+
			
 
				+def _escape_sqlite3_identifier(value: str) -> str:
			
 
				+    return _escape_sqlite3(value, quote='"', errors='strict')
			
 
				+
			
 
				+def _create_tables():
			
 
				+    table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_TABLE)
			
 
				+    # Escape as value, because fts5() expects
			
 
				+    # string literal column names
			
 
				+    column = _escape_sqlite3_value(SQLITEFTS_CONFIG.SQLITEFTS_COLUMN)
			
 
				+    id_table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE)
			
 
				+    tokenizers = _escape_sqlite3_value(SQLITEFTS_CONFIG.SQLITEFTS_TOKENIZERS)
			
 
				+    trigger_name = _escape_sqlite3_identifier(f"{SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE}_ad")
			
 
				+
			
 
				+    with SQLITEFTS_CONFIG.get_connection() as cursor:
			
 
				+        # Create a contentless-delete FTS5 table that indexes
			
 
				+        # but does not store the texts of snapshots
			
 
				+        try:
			
 
				+            cursor.execute(
			
 
				+                f"CREATE VIRTUAL TABLE {table}"
			
 
				+                f" USING fts5({column},"
			
 
				+                f" tokenize={tokenizers},"
			
 
				+                " content='', contentless_delete=1);"
			
 
				+                )
			
 
				+        except Exception as e:
			
 
				+            msg = str(e)
			
 
				+            if 'unrecognized option: "contentlessdelete"' in msg:
			
 
				+                sqlite_version = getattr(sqlite3, "sqlite_version", "Unknown")
			
 
				+                raise RuntimeError(
			
 
				+                    "SQLite full-text search requires SQLite >= 3.43.0;"
			
 
				+                    f" the running version is {sqlite_version}"
			
 
				+                ) from e
			
 
				+            else:
			
 
				+                raise
			
 
				+        # Create a one-to-one mapping between ArchiveBox snapshot_id
			
 
				+        # and FTS5 rowid, because the column type of rowid can't be
			
 
				+        # customized.
			
 
				+        cursor.execute(
			
 
				+            f"CREATE TABLE {id_table}("
			
 
				+            " rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
			
 
				+            " snapshot_id char(32) NOT NULL UNIQUE"
			
 
				+            ");"
			
 
				+            )
			
 
				+        # Create a trigger to delete items from the FTS5 index when
			
 
				+        # the snapshot_id is deleted from the mapping, to maintain
			
 
				+        # consistency and make the `flush()` query simpler.
			
 
				+        cursor.execute(
			
 
				+            f"CREATE TRIGGER {trigger_name}"
			
 
				+            f" AFTER DELETE ON {id_table} BEGIN"
			
 
				+            f" DELETE FROM {table} WHERE rowid=old.rowid;"
			
 
				+            " END;"
			
 
				+            )
			
 
				+
			
 
				+def _handle_query_exception(exc: Exception):
			
 
				+    message = str(exc)
			
 
				+    if message.startswith("no such table:"):
			
 
				+        raise RuntimeError(
			
 
				+            "SQLite full-text search index has not yet"
			
 
				+            " been created; run `archivebox update --index-only`."
			
 
				+        )
			
 
				+    else:
			
 
				+        raise exc
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+class SqliteftsSearchBackend(BaseSearchBackend):
			
 
				+    name: str = 'sqlite'
			
 
				+    docs_url: str = 'https://www.sqlite.org/fts5.html'
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    def index(snapshot_id: str, texts: List[str]):
			
 
				+        text = ' '.join(texts)[:SQLITEFTS_CONFIG.SQLITE_LIMIT_LENGTH]
			
 
				+
			
 
				+        table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_TABLE)
			
 
				+        column = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_COLUMN)
			
 
				+        id_table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE)
			
 
				+
			
 
				+        with SQLITEFTS_CONFIG.get_connection() as cursor:
			
 
				+            retries = 2
			
 
				+            while retries > 0:
			
 
				+                retries -= 1
			
 
				+                try:
			
 
				+                    # If there is already an FTS index rowid to snapshot_id mapping,
			
 
				+                    # then don't insert a new one, silently ignoring the operation.
			
 
				+                    # {id_table}.rowid is AUTOINCREMENT, so will generate an unused
			
 
				+                    # rowid for the index if it is an unindexed snapshot_id.
			
 
				+                    cursor.execute(
			
 
				+                        f"INSERT OR IGNORE INTO {id_table}(snapshot_id) VALUES({SQLITEFTS_CONFIG.SQLITE_BIND})",
			
 
				+                        [snapshot_id])
			
 
				+                    # Fetch the FTS index rowid for the given snapshot_id
			
 
				+                    id_res = cursor.execute(
			
 
				+                        f"SELECT rowid FROM {id_table} WHERE snapshot_id = {SQLITEFTS_CONFIG.SQLITE_BIND}",
			
 
				+                        [snapshot_id])
			
 
				+                    rowid = id_res.fetchone()[0]
			
 
				+                    # (Re-)index the content
			
 
				+                    cursor.execute(
			
 
				+                        "INSERT OR REPLACE INTO"
			
 
				+                        f" {table}(rowid, {column}) VALUES ({SQLITEFTS_CONFIG.SQLITE_BIND}, {SQLITEFTS_CONFIG.SQLITE_BIND})",
			
 
				+                        [rowid, text])
			
 
				+                    # All statements succeeded; return
			
 
				+                    return
			
 
				+                except Exception as e:
			
 
				+                    if str(e).startswith("no such table:") and retries > 0:
			
 
				+                        _create_tables()
			
 
				+                    else:
			
 
				+                        raise
			
 
				+
			
 
				+        raise RuntimeError("Failed to create tables for SQLite FTS5 search")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def search(text: str) -> List[str]:
			
 
				+        table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_TABLE)
			
 
				+        id_table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE)
			
 
				+
			
 
				+        with SQLITEFTS_CONFIG.get_connection() as cursor:
			
 
				+            try:
			
 
				+                res = cursor.execute(
			
 
				+                    f"SELECT snapshot_id FROM {table}"
			
 
				+                    f" INNER JOIN {id_table}"
			
 
				+                    f" ON {id_table}.rowid = {table}.rowid"
			
 
				+                    f" WHERE {table} MATCH {SQLITEFTS_CONFIG.SQLITE_BIND}",
			
 
				+                    [text])
			
 
				+            except Exception as e:
			
 
				+                _handle_query_exception(e)
			
 
				+
			
 
				+            snap_ids = [row[0] for row in res.fetchall()]
			
 
				+        return snap_ids
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def flush(snapshot_ids: Generator[str, None, None]):
			
 
				+        snapshot_ids = list(snapshot_ids)  # type: ignore[assignment]
			
 
				+
			
 
				+        id_table = _escape_sqlite3_identifier(SQLITEFTS_CONFIG.SQLITEFTS_ID_TABLE)
			
 
				+
			
 
				+        with SQLITEFTS_CONFIG.get_connection() as cursor:
			
 
				+            try:
			
 
				+                cursor.executemany(
			
 
				+                    f"DELETE FROM {id_table} WHERE snapshot_id={SQLITEFTS_CONFIG.SQLITE_BIND}",
			
 
				+                    [snapshot_ids])
			
 
				+            except Exception as e:
			
 
				+                _handle_query_exception(e)
			
 
				+    
			
 
				+SQLITEFTS_SEARCH_BACKEND = SqliteftsSearchBackend()
			
 
				+
			
 
				+
			
 
				+
			
 
				+class SqliteftsSearchPlugin(BasePlugin):
			
 
				+    app_label: str ='sqlitefts'
			
 
				+    verbose_name: str = 'Sqlitefts'
			
 
				+
			
 
				+    hooks: List[InstanceOf[BaseHook]] = [
			
 
				+        SQLITEFTS_CONFIG,
			
 
				+        SQLITEFTS_SEARCH_BACKEND,
			
 
				+    ]
			
 
				+
			
 
				+
			
 
				+
			
 
				+PLUGIN = SqliteftsSearchPlugin()
			
 
				+PLUGIN.register(settings)
			
 
				+DJANGO_APP = PLUGIN.AppConfig
			
--- a/archivebox/plugins_search/sqlite/sqlite.py
+++ b/archivebox/plugins_search/sqlite/sqlite.py
@@ -1,195 +0,0 @@
 
				-import codecs
			
 
				-from typing import List, Generator
			
 
				-import sqlite3
			
 
				-
			
 
				-from archivebox.util import enforce_types
			
 
				-from archivebox.config import (
			
 
				-    FTS_SEPARATE_DATABASE,
			
 
				-    FTS_TOKENIZERS,
			
 
				-    FTS_SQLITE_MAX_LENGTH
			
 
				-)
			
 
				-
			
 
				-FTS_TABLE = "snapshot_fts"
			
 
				-FTS_ID_TABLE = "snapshot_id_fts"
			
 
				-FTS_COLUMN = "texts"
			
 
				-
			
 
				-if FTS_SEPARATE_DATABASE:
			
 
				-    database = sqlite3.connect("search.sqlite3")
			
 
				-    # Make get_connection callable, because `django.db.connection.cursor()`
			
 
				-    # has to be called to get a context manager, but sqlite3.Connection
			
 
				-    # is a context manager without being called.
			
 
				-    def get_connection():
			
 
				-        return database
			
 
				-    SQLITE_BIND = "?"
			
 
				-else:
			
 
				-    from django.db import connection as database  # type: ignore[no-redef, assignment]
			
 
				-    get_connection = database.cursor
			
 
				-    SQLITE_BIND = "%s"
			
 
				-
			
 
				-# Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
			
 
				-# so fall back to the default if the API to get the real value isn't present
			
 
				-try:
			
 
				-    limit_id = sqlite3.SQLITE_LIMIT_LENGTH
			
 
				-    try:
			
 
				-        with database.temporary_connection() as cursor:  # type: ignore[attr-defined]
			
 
				-            SQLITE_LIMIT_LENGTH = cursor.connection.getlimit(limit_id)
			
 
				-    except AttributeError:
			
 
				-        SQLITE_LIMIT_LENGTH = database.getlimit(limit_id)
			
 
				-except AttributeError:
			
 
				-    SQLITE_LIMIT_LENGTH = FTS_SQLITE_MAX_LENGTH
			
 
				-
			
 
				-
			
 
				-def _escape_sqlite3(value: str, *, quote: str, errors='strict') -> str:
			
 
				-    assert isinstance(quote, str), "quote is not a str"
			
 
				-    assert len(quote) == 1, "quote must be a single character"
			
 
				-
			
 
				-    encodable = value.encode('utf-8', errors).decode('utf-8')
			
 
				-
			
 
				-    nul_index = encodable.find("\x00")
			
 
				-    if nul_index >= 0:
			
 
				-        error = UnicodeEncodeError("NUL-terminated utf-8", encodable,
			
 
				-                                   nul_index, nul_index + 1, "NUL not allowed")
			
 
				-        error_handler = codecs.lookup_error(errors)
			
 
				-        replacement, _ = error_handler(error)
			
 
				-        assert isinstance(replacement, str), "handling a UnicodeEncodeError should return a str replacement"
			
 
				-        encodable = encodable.replace("\x00", replacement)
			
 
				-
			
 
				-    return quote + encodable.replace(quote, quote * 2) + quote
			
 
				-
			
 
				-def _escape_sqlite3_value(value: str, errors='strict') -> str:
			
 
				-    return _escape_sqlite3(value, quote="'", errors=errors)
			
 
				-
			
 
				-def _escape_sqlite3_identifier(value: str) -> str:
			
 
				-    return _escape_sqlite3(value, quote='"', errors='strict')
			
 
				-
			
 
				-@enforce_types
			
 
				-def _create_tables():
			
 
				-    table = _escape_sqlite3_identifier(FTS_TABLE)
			
 
				-    # Escape as value, because fts5() expects
			
 
				-    # string literal column names
			
 
				-    column = _escape_sqlite3_value(FTS_COLUMN)
			
 
				-    id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
			
 
				-    tokenizers = _escape_sqlite3_value(FTS_TOKENIZERS)
			
 
				-    trigger_name = _escape_sqlite3_identifier(f"{FTS_ID_TABLE}_ad")
			
 
				-
			
 
				-    with get_connection() as cursor:
			
 
				-        # Create a contentless-delete FTS5 table that indexes
			
 
				-        # but does not store the texts of snapshots
			
 
				-        try:
			
 
				-            cursor.execute(
			
 
				-                f"CREATE VIRTUAL TABLE {table}"
			
 
				-                f" USING fts5({column},"
			
 
				-                f" tokenize={tokenizers},"
			
 
				-                " content='', contentless_delete=1);"
			
 
				-                )
			
 
				-        except Exception as e:
			
 
				-            msg = str(e)
			
 
				-            if 'unrecognized option: "contentlessdelete"' in msg:
			
 
				-                sqlite_version = getattr(sqlite3, "sqlite_version", "Unknown")
			
 
				-                raise RuntimeError(
			
 
				-                    "SQLite full-text search requires SQLite >= 3.43.0;"
			
 
				-                    f" the running version is {sqlite_version}"
			
 
				-                ) from e
			
 
				-            else:
			
 
				-                raise
			
 
				-        # Create a one-to-one mapping between ArchiveBox snapshot_id
			
 
				-        # and FTS5 rowid, because the column type of rowid can't be
			
 
				-        # customized.
			
 
				-        cursor.execute(
			
 
				-            f"CREATE TABLE {id_table}("
			
 
				-            " rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
			
 
				-            " snapshot_id char(32) NOT NULL UNIQUE"
			
 
				-            ");"
			
 
				-            )
			
 
				-        # Create a trigger to delete items from the FTS5 index when
			
 
				-        # the snapshot_id is deleted from the mapping, to maintain
			
 
				-        # consistency and make the `flush()` query simpler.
			
 
				-        cursor.execute(
			
 
				-            f"CREATE TRIGGER {trigger_name}"
			
 
				-            f" AFTER DELETE ON {id_table} BEGIN"
			
 
				-            f" DELETE FROM {table} WHERE rowid=old.rowid;"
			
 
				-            " END;"
			
 
				-            )
			
 
				-
			
 
				-def _handle_query_exception(exc: Exception):
			
 
				-    message = str(exc)
			
 
				-    if message.startswith("no such table:"):
			
 
				-        raise RuntimeError(
			
 
				-            "SQLite full-text search index has not yet"
			
 
				-            " been created; run `archivebox update --index-only`."
			
 
				-        )
			
 
				-    else:
			
 
				-        raise exc
			
 
				-
			
 
				-@enforce_types
			
 
				-def index(snapshot_id: str, texts: List[str]):
			
 
				-    text = ' '.join(texts)[:SQLITE_LIMIT_LENGTH]
			
 
				-
			
 
				-    table = _escape_sqlite3_identifier(FTS_TABLE)
			
 
				-    column = _escape_sqlite3_identifier(FTS_COLUMN)
			
 
				-    id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
			
 
				-
			
 
				-    with get_connection() as cursor:
			
 
				-        retries = 2
			
 
				-        while retries > 0:
			
 
				-            retries -= 1
			
 
				-            try:
			
 
				-                # If there is already an FTS index rowid to snapshot_id mapping,
			
 
				-                # then don't insert a new one, silently ignoring the operation.
			
 
				-                # {id_table}.rowid is AUTOINCREMENT, so will generate an unused
			
 
				-                # rowid for the index if it is an unindexed snapshot_id.
			
 
				-                cursor.execute(
			
 
				-                    f"INSERT OR IGNORE INTO {id_table}(snapshot_id) VALUES({SQLITE_BIND})",
			
 
				-                    [snapshot_id])
			
 
				-                # Fetch the FTS index rowid for the given snapshot_id
			
 
				-                id_res = cursor.execute(
			
 
				-                    f"SELECT rowid FROM {id_table} WHERE snapshot_id = {SQLITE_BIND}",
			
 
				-                    [snapshot_id])
			
 
				-                rowid = id_res.fetchone()[0]
			
 
				-                # (Re-)index the content
			
 
				-                cursor.execute(
			
 
				-                    "INSERT OR REPLACE INTO"
			
 
				-                    f" {table}(rowid, {column}) VALUES ({SQLITE_BIND}, {SQLITE_BIND})",
			
 
				-                    [rowid, text])
			
 
				-                # All statements succeeded; return
			
 
				-                return
			
 
				-            except Exception as e:
			
 
				-                if str(e).startswith("no such table:") and retries > 0:
			
 
				-                    _create_tables()
			
 
				-                else:
			
 
				-                    raise
			
 
				-
			
 
				-    raise RuntimeError("Failed to create tables for SQLite FTS5 search")
			
 
				-
			
 
				-@enforce_types
			
 
				-def search(text: str) -> List[str]:
			
 
				-    table = _escape_sqlite3_identifier(FTS_TABLE)
			
 
				-    id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
			
 
				-
			
 
				-    with get_connection() as cursor:
			
 
				-        try:
			
 
				-            res = cursor.execute(
			
 
				-                f"SELECT snapshot_id FROM {table}"
			
 
				-                f" INNER JOIN {id_table}"
			
 
				-                f" ON {id_table}.rowid = {table}.rowid"
			
 
				-                f" WHERE {table} MATCH {SQLITE_BIND}",
			
 
				-                [text])
			
 
				-        except Exception as e:
			
 
				-            _handle_query_exception(e)
			
 
				-
			
 
				-        snap_ids = [row[0] for row in res.fetchall()]
			
 
				-    return snap_ids
			
 
				-
			
 
				-@enforce_types
			
 
				-def flush(snapshot_ids: Generator[str, None, None]):
			
 
				-    snapshot_ids = list(snapshot_ids)  # type: ignore[assignment]
			
 
				-
			
 
				-    id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
			
 
				-
			
 
				-    with get_connection() as cursor:
			
 
				-        try:
			
 
				-            cursor.executemany(
			
 
				-                f"DELETE FROM {id_table} WHERE snapshot_id={SQLITE_BIND}",
			
 
				-                [snapshot_ids])
			
 
				-        except Exception as e:
			
 
				-            _handle_query_exception(e)
			
--- a/archivebox/plugins_sys/config/apps.py
+++ b/archivebox/plugins_sys/config/apps.py
@@ -119,6 +119,7 @@ class SearchBackendConfig(BaseConfigSet):
 
				     SEARCH_BACKEND_PORT: int            = Field(default=1491)
			
 
				     SEARCH_BACKEND_PASSWORD: str        = Field(default='SecretPassword')
			
 
				     SEARCH_PROCESS_HTML: bool           = Field(default=True)
			
 
				+    SEARCH_BACKEND_TIMEOUT: int         = Field(default=10)
			
 
				 
			
 
				 SEARCH_BACKEND_CONFIG = SearchBackendConfig()
			
 
				 
			
--- a/archivebox/search/__init__.py
+++ b/archivebox/search/__init__.py
@@ -1,6 +1,5 @@
 
				 from typing import List, Union
			
 
				 from pathlib import Path
			
 
				-from importlib import import_module
			
 
				 
			
 
				 from django.db.models import QuerySet
			
 
				 from django.conf import settings
			
@@ -15,12 +14,10 @@ from .utils import get_indexable_content, log_index_started
 
				 
			
 
				 
			
 
				 def import_backend():
			
 
				-    backend_string = f'plugins_search.{settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE}.{settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE}'
			
 
				-    try:
			
 
				-        backend = import_module(backend_string)
			
 
				-    except Exception as err:
			
 
				-        raise Exception("Could not load '%s' as a backend: %s" % (backend_string, err))
			
 
				-    return backend
			
 
				+    for backend in settings.SEARCH_BACKENDS:
			
 
				+        if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
			
 
				+            return backend
			
 
				+    raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')
			
 
				 
			
 
				 @enforce_types
			
 
				 def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: Path=settings.DATA_DIR, skip_text_index: bool=False) -> None: