Browse Source

add tag cli option

Nick Sweeting 4 years ago
parent
commit
fea0b89dbe
3 changed files with 40 additions and 14 deletions
  1. 7 0
      archivebox/cli/archivebox_add.py
  2. 3 0
      archivebox/core/models.py
  3. 30 14
      archivebox/main.py

+ 7 - 0
archivebox/cli/archivebox_add.py

@@ -22,6 +22,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         add_help=True,
         formatter_class=SmartFormatter,
     )
+    parser.add_argument(
+        '--tag', '-t',
+        type=str,
+        default='',
+        help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
+    )
     parser.add_argument(
         '--update-all', #'-n',
         action='store_true',
@@ -89,6 +95,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     add(
         urls=stdin_urls or urls,
         depth=command.depth,
+        tag=command.tag,
         update_all=command.update_all,
         index_only=command.index_only,
         overwrite=command.overwrite,

+ 3 - 0
archivebox/core/models.py

@@ -33,8 +33,11 @@ class Tag(models.Model):
     Based on django-taggit model
     """
     name = models.CharField(unique=True, blank=False, max_length=100)
+
+    # slug is autoset on save from name, never set it manually
     slug = models.SlugField(unique=True, blank=True, max_length=100)
 
+
     class Meta:
         verbose_name = "Tag"
         verbose_name_plural = "Tags"

+ 30 - 14
archivebox/main.py

@@ -561,6 +561,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
 
 @enforce_types
 def add(urls: Union[str, List[str]],
+        tag: str='',
         depth: int=0,
         update_all: bool=not ONLY_NEW,
         index_only: bool=False,
@@ -570,6 +571,8 @@ def add(urls: Union[str, List[str]],
         out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Add a new URL or list of URLs to your archive"""
 
+    from core.models import Tag
+
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
 
     extractors = extractors.split(",") if extractors else []
@@ -602,31 +605,44 @@ def add(urls: Union[str, List[str]],
             new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
 
     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
+    
     new_links = dedupe_links(all_links, imported_links)
 
     write_main_index(links=new_links, out_dir=out_dir)
     all_links = load_main_index(out_dir=out_dir)
 
+    # add any tags to imported links
+    tags = [
+        Tag.objects.get_or_create(name=name.strip())
+        for name in tag.split(',')
+        if name.strip()
+    ]
+    if tags:
+        for link in imported_links:
+            link.as_snapshot().tags.add(*tags)
+
+    
     if index_only:
+        # mock archive all the links using the fake index_only extractor method in order to update their state
         if overwrite:
             archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
         else:
             archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
-        return all_links
-
-    # Run the archive methods for each link
-    archive_kwargs = {
-        "out_dir": out_dir,
-    }
-    if extractors:
-        archive_kwargs["methods"] = extractors
+    else:
+        # fully run the archive extractor methods for each link
+        archive_kwargs = {
+            "out_dir": out_dir,
+        }
+        if extractors:
+            archive_kwargs["methods"] = extractors
+
+        if update_all:
+            archive_links(all_links, overwrite=overwrite, **archive_kwargs)
+        elif overwrite:
+            archive_links(imported_links, overwrite=True, **archive_kwargs)
+        elif new_links:
+            archive_links(new_links, overwrite=False, **archive_kwargs)
 
-    if update_all:
-        archive_links(all_links, overwrite=overwrite, **archive_kwargs)
-    elif overwrite:
-        archive_links(imported_links, overwrite=True, **archive_kwargs)
-    elif new_links:
-        archive_links(new_links, overwrite=False, **archive_kwargs)
 
     return all_links