Kaynağa Gözat

add video subtitles and description to full-text index

Nick Sweeting 4 yıl önce
ebeveyn
işleme
193df5c8d3

+ 1 - 0
archivebox/config.py

@@ -126,6 +126,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                 '--write-sub',
                                                                 '--all-subs',
                                                                 '--write-auto-sub',
+                                                                '--convert-subs=srt',
                                                                 '--yes-playlist',
                                                                 '--continue',
                                                                 '--ignore-errors',

+ 1 - 1
archivebox/core/models.py

@@ -41,7 +41,7 @@ class Tag(models.Model):
     Based on django-taggit model
     """
     id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
-    
+
     name = models.CharField(unique=True, blank=False, max_length=100)
 
     # slug is autoset on save from name, never set it manually

+ 13 - 0
archivebox/extractors/media.py

@@ -70,11 +70,24 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
     finally:
         timer.end()
 
+    # add video description and subtitles to full-text index
+    index_texts = [
+        text_file.read_text(encoding='utf-8').strip()
+        for text_file in (
+            *output_path.glob('*.description'),
+            *output_path.glob('*.srt'),
+            *output_path.glob('*.vtt'),
+            *output_path.glob('*.lrc'),
+            *output_path.glob('*.lrc'),
+        )
+    ]
+
     return ArchiveResult(
         cmd=cmd,
         pwd=str(out_dir),
         cmd_version=YOUTUBEDL_VERSION,
         output=output,
         status=status,
+        index_texts=index_texts,
         **timer.stats,
     )