5 年之前 · 95382b3812
--- a/Dockerfile
+++ b/Dockerfile
@@ -46,7 +46,7 @@ RUN apt-get update -qq \
 
				 # Install apt dependencies
			
 
				 RUN apt-get update -qq \
			
 
				     && apt-get install -qq -y --no-install-recommends \
			
 
				-        wget curl chromium git ffmpeg youtube-dl \
			
 
				+        wget curl chromium git ffmpeg youtube-dl ripgrep \
			
 
				         fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
			
 
				     && rm -rf /var/lib/apt/lists/*
			
 
				 
			
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -142,7 +142,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
 
				     'SEARCH_BACKEND_CONFIG' : {
			
 
				         'USE_INDEXING_BACKEND':     {'type': bool,  'default': True},
			
 
				         'USE_SEARCHING_BACKEND':    {'type': bool,  'default': True},
			
 
				-        'SEARCH_BACKEND_ENGINE':    {'type': str,   'default': 'sonic'},
			
 
				+        'SEARCH_BACKEND_ENGINE':    {'type': str,   'default': 'ripgrep'},
			
 
				         'SEARCH_BACKEND_HOST_NAME': {'type': str,   'default': 'localhost'},
			
 
				         'SEARCH_BACKEND_PORT':      {'type': int,   'default': 1491},
			
 
				         'SEARCH_BACKEND_PASSWORD':  {'type': str,   'default': 'SecretPassword'},
			
--- a/archivebox/search/backends/ripgrep.py
+++ b/archivebox/search/backends/ripgrep.py
@@ -0,0 +1,43 @@
 
				+import re
			
 
				+from subprocess import run, PIPE, DEVNULL
			
 
				+from typing import List, Generator
			
 
				+
			
 
				+from archivebox.config import setup_django, ARCHIVE_DIR, ARCHIVE_DIR_NAME
			
 
				+from archivebox.util import enforce_types
			
 
				+
			
 
				+DEFAULT_ARGUMENTS = '-ilt' # Case insensitive, matching files, types
			
 
				+DEFAULT_EXTENSIONS = 'html'
			
 
				+REGEX_ARGUMENT = '-e'
			
 
				+
			
 
				+TIMESTAMP_REGEX = r'\/([\d]+\.[\d]+)\/'
			
 
				+
			
 
				+ts_regex =  re.compile(TIMESTAMP_REGEX)
			
 
				+
			
 
				+@enforce_types
			
 
				+def index(snapshot_id: str, texts: List[str]):
			
 
				+    return
			
 
				+
			
 
				+@enforce_types
			
 
				+def flush(snapshot_ids: Generator[str, None, None]):
			
 
				+    return
			
 
				+
			
 
				+@enforce_types
			
 
				+def search(text: str) -> List[str]:
			
 
				+    is_rg_installed = run(['which', 'rg'], stdout=DEVNULL, stderr=DEVNULL)
			
 
				+    if is_rg_installed.returncode:
			
 
				+        raise Exception("rg binary not found, install ripgrep to use this backend")
			
 
				+
			
 
				+    setup_django(check_db=True)
			
 
				+    from core.models import Snapshot
			
 
				+
			
 
				+    rg = run(['rg',DEFAULT_ARGUMENTS, DEFAULT_EXTENSIONS, REGEX_ARGUMENT, text, str(ARCHIVE_DIR)],stdout=PIPE, stderr=PIPE, timeout=60)
			
 
				+    file_paths = [p.decode().replace(str(ARCHIVE_DIR_NAME), '') for p in rg.stdout.splitlines()]
			
 
				+    timestamps = set()
			
 
				+    for path in file_paths:
			
 
				+        if ts := ts_regex.findall(path):
			
 
				+            timestamps.add(ts[0])
			
 
				+    
			
 
				+    snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
			
 
				+
			
 
				+    return snap_ids
			
 
				+
			
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,21 +21,8 @@ services:
 
				         environment:
			
 
				             - USE_COLOR=True
			
 
				             - SHOW_PROGRESS=False
			
 
				-            - SEARCH_BACKEND_HOST_NAME=sonic
			
 
				-            - SEARCH_BACKEND_PASSWORD=SecretPassword
			
 
				         volumes:
			
 
				             - ./data:/data
			
 
				-        depends_on:
			
 
				-            - sonic
			
 
				-    sonic:
			
 
				-        image: valeriansaliou/sonic:v1.3.0    
			
 
				-        ports:
			
 
				-            - 1491:1491
			
 
				-        environment:
			
 
				-            - SEARCH_BACKEND_PASSWORD=SecretPassword
			
 
				-        volumes:
			
 
				-            - ./etc/sonic/config.cfg:/etc/sonic.cfg
			
 
				-            - ./data:/var/lib/sonic/store/
			
 
				     
			
 
				 
			
 
				 
			
@@ -87,3 +74,14 @@ services:
 
				     #     volumes:
			
 
				     #         ./data:/archivebox
			
 
				     #         ./data/wayback:/webarchive
			
 
				+
			
 
				+    # Example: Run sonic search backend
			
 
				+    # sonic:
			
 
				+    #    image: valeriansaliou/sonic:v1.3.0    
			
 
				+    #    ports:
			
 
				+    #        - 1491:1491
			
 
				+    #    environment:
			
 
				+    #        - SEARCH_BACKEND_PASSWORD=SecretPassword
			
 
				+    #    volumes:
			
 
				+    #        - ./etc/sonic/config.cfg:/etc/sonic.cfg
			
 
				+    #        - ./data:/var/lib/sonic/store/