Selaa lähdekoodia

tag URLs immediately once added instead of waiting until archival completes

Nick Sweeting 1 vuosi sitten
vanhempi
sitoutus
c1fd2cfa42
3 muutettua tiedostoa jossa 19 lisäystä ja 20 poistoa
  1. 0 1
      archivebox/extractors/readability.py
  2. 5 3
      archivebox/index/sql.py
  3. 14 16
      archivebox/main.py

+ 0 - 1
archivebox/extractors/readability.py

@@ -67,7 +67,6 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
             temp_doc.name,
             temp_doc.name,
             link.url,
             link.url,
         ]
         ]
-
         result = run(cmd, cwd=out_dir, timeout=timeout)
         result = run(cmd, cwd=out_dir, timeout=timeout)
         try:
         try:
             result_json = json.loads(result.stdout)
             result_json = json.loads(result.stdout)

+ 5 - 3
archivebox/index/sql.py

@@ -109,11 +109,13 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
         snap = Snapshot.objects.get(url=link.url)
         snap = Snapshot.objects.get(url=link.url)
     except Snapshot.DoesNotExist:
     except Snapshot.DoesNotExist:
         snap = write_link_to_sql_index(link)
         snap = write_link_to_sql_index(link)
+
     snap.title = link.title
     snap.title = link.title
 
 
-    tag_list = list(dict.fromkeys(
-        tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
-    ))
+    tag_list = list(
+        {tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')}
+        | set(snap.tags.values_list('name', flat=True))
+    )
 
 
     snap.save()
     snap.save()
     snap.save_tags(tag_list)
     snap.save_tags(tag_list)

+ 14 - 16
archivebox/main.py

@@ -604,7 +604,7 @@ def add(urls: Union[str, List[str]],
         out_dir: Path=OUTPUT_DIR) -> List[Link]:
         out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Add a new URL or list of URLs to your archive"""
     """Add a new URL or list of URLs to your archive"""
 
 
-    from core.models import Tag
+    from core.models import Snapshot, Tag
 
 
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
 
 
@@ -648,6 +648,19 @@ def add(urls: Union[str, List[str]],
     write_main_index(links=new_links, out_dir=out_dir)
     write_main_index(links=new_links, out_dir=out_dir)
     all_links = load_main_index(out_dir=out_dir)
     all_links = load_main_index(out_dir=out_dir)
 
 
+    tags = [
+        Tag.objects.get_or_create(name=name.strip())[0]
+        for name in tag.split(',')
+        if name.strip()
+    ]
+    if tags:
+        for link in imported_links:
+            snapshot = Snapshot.objects.get(url=link.url)
+            snapshot.tags.add(*tags)
+            snapshot.tags_str(nocache=True)
+            snapshot.save()
+        # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
+
     if index_only:
     if index_only:
         # mock archive all the links using the fake index_only extractor method in order to update their state
         # mock archive all the links using the fake index_only extractor method in order to update their state
         if overwrite:
         if overwrite:
@@ -679,21 +692,6 @@ def add(urls: Union[str, List[str]],
             stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
             stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
             archive_links(new_links, overwrite=False, **archive_kwargs)
             archive_links(new_links, overwrite=False, **archive_kwargs)
 
 
-
-    # add any tags to imported links
-    tags = [
-        Tag.objects.get_or_create(name=name.strip())[0]
-        for name in tag.split(',')
-        if name.strip()
-    ]
-    if tags:
-        for link in imported_links:
-            snapshot = link.as_snapshot()
-            snapshot.tags.add(*tags)
-            snapshot.tags_str(nocache=True)
-            snapshot.save()
-        # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
-
     if CAN_UPGRADE:
     if CAN_UPGRADE:
         hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
         hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")