|
@@ -8,6 +8,8 @@ from archivebox.index.schema import Link
|
|
|
from archivebox.util import enforce_types
|
|
from archivebox.util import enforce_types
|
|
|
from archivebox.config import setup_django,stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
|
|
from archivebox.config import setup_django,stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
|
|
|
|
|
|
|
|
|
|
+from .utils import get_indexable_content
|
|
|
|
|
+
|
|
|
def indexing_enabled():
|
|
def indexing_enabled():
|
|
|
return USE_INDEXING_BACKEND
|
|
return USE_INDEXING_BACKEND
|
|
|
|
|
|
|
@@ -83,3 +85,17 @@ def flush_search_index(snapshots: QuerySet):
|
|
|
f'[X] The search backend threw an exception={err}:',
|
|
f'[X] The search backend threw an exception={err}:',
|
|
|
color='red',
|
|
color='red',
|
|
|
)
|
|
)
|
|
|
|
|
+
|
|
|
|
|
+@enforce_types
|
|
|
|
|
+def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR):
|
|
|
|
|
+ if not links:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ setup_django(out_dir=out_dir, check_db=True)
|
|
|
|
|
+ from core.models import Snapshot, ArchiveResult
|
|
|
|
|
+
|
|
|
|
|
+ for link in links:
|
|
|
|
|
+ if snap := Snapshot.objects.filter(url=link.url).first():
|
|
|
|
|
+ results = ArchiveResult.objects.indexable().filter(snapshot=snap)
|
|
|
|
|
+ texts = get_indexable_content(results)
|
|
|
|
|
+ write_search_index(link,texts,out_dir=out_dir)
|