Bläddra i källkod

tweak wording of parser cli output

Nick Sweeting 6 år sedan
förälder
incheckning
ee93807a0a
2 ändrade filer med 26 tillägg och 18 borttagningar
  1. 3 4
      archivebox/archive.py
  2. 23 14
      archivebox/parse.py

+ 3 - 4
archivebox/archive.py

@@ -67,14 +67,13 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
     if archive_path:
     if archive_path:
         existing_links = parse_json_links_index(archive_path)
         existing_links = parse_json_links_index(archive_path)
         all_links = validate_links(existing_links + all_links)
         all_links = validate_links(existing_links + all_links)
-    
+
     num_new_links = len(all_links) - len(existing_links)
     num_new_links = len(all_links) - len(existing_links)
     if num_new_links and not only_new:
     if num_new_links and not only_new:
-        print('[{green}+{reset}] [{}] Adding {} new links from {} to {}/index.json (detected {} format)'.format(
+        print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format(
             datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
             datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
             num_new_links,
             num_new_links,
             pretty_path(import_path),
             pretty_path(import_path),
-            pretty_path(archive_path),
             parser_name,
             parser_name,
             **ANSI,
             **ANSI,
         ))
         ))
@@ -103,7 +102,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
              **ANSI,
              **ANSI,
         ))
         ))
     else:
     else:
-        print('{green}[▶] [{}] Updating files for {} links in archive...{reset}'.format(
+        print('{green}[▶] [{}] Downloading content for {} pages in archive...{reset}'.format(
              datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
              datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
              len(links),
              len(links),
              **ANSI,
              **ANSI,

+ 23 - 14
archivebox/parse.py

@@ -25,6 +25,7 @@ import xml.etree.ElementTree as etree
 
 
 from datetime import datetime
 from datetime import datetime
 
 
+from config import ANSI
 from util import (
 from util import (
     domain,
     domain,
     base_url,
     base_url,
@@ -39,14 +40,14 @@ def get_parsers(file):
     """return all parsers that work on a given file, defaults to all of them"""
     """return all parsers that work on a given file, defaults to all of them"""
 
 
     return OrderedDict([
     return OrderedDict([
-        ('pocket', parse_pocket_export),
-        ('pinboard', parse_json_export),
-        ('bookmarks', parse_bookmarks_export),
-        ('rss', parse_rss_export),
-        ('pinboard_rss', parse_pinboard_rss_feed),
-        ('shaarli_rss', parse_shaarli_rss_export),
-        ('medium_rss', parse_medium_rss_feed),
-        ('plain_text', parse_plain_text),
+        ('Pocket HTML', parse_pocket_html_export),
+        ('Pinboard JSON', parse_pinboard_json_export),
+        ('Netscape HTML', parse_netscape_html_export),
+        ('RSS', parse_rss_export),
+        ('Pinboard RSS', parse_pinboard_rss_export),
+        ('Shaarli RSS', parse_shaarli_rss_export),
+        ('Medium RSS', parse_medium_rss_export),
+        ('Plain Text', parse_plain_text_export),
     ])
     ])
 
 
 def parse_links(path):
 def parse_links(path):
@@ -54,6 +55,12 @@ def parse_links(path):
     
     
     links = []
     links = []
     with open(path, 'r', encoding='utf-8') as file:
     with open(path, 'r', encoding='utf-8') as file:
+        print('{green}[*] [{}] Parsing new links from output/sources/{} and fetching titles...{reset}'.format(
+            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            path.rsplit('/', 1)[-1],
+            **ANSI,
+        ))
+
         for parser_name, parser_func in get_parsers(file).items():
         for parser_name, parser_func in get_parsers(file).items():
             # otherwise try all parsers until one works
             # otherwise try all parsers until one works
             try:
             try:
@@ -64,10 +71,12 @@ def parse_links(path):
                 # parser not supported on this file
                 # parser not supported on this file
                 pass
                 pass
 
 
+    print()
+
     return links, parser_name
     return links, parser_name
 
 
 
 
-def parse_pocket_export(html_file):
+def parse_pocket_html_export(html_file):
     """Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)"""
     """Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)"""
 
 
     html_file.seek(0)
     html_file.seek(0)
@@ -91,7 +100,7 @@ def parse_pocket_export(html_file):
             info['type'] = get_link_type(info)
             info['type'] = get_link_type(info)
             yield info
             yield info
 
 
-def parse_json_export(json_file):
+def parse_pinboard_json_export(json_file):
     """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
     """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
     json_file.seek(0)
     json_file.seek(0)
     json_content = json.load(json_file)
     json_content = json.load(json_file)
@@ -210,7 +219,7 @@ def parse_shaarli_rss_export(rss_file):
 
 
         yield info
         yield info
 
 
-def parse_bookmarks_export(html_file):
+def parse_netscape_html_export(html_file):
     """Parse netscape-format bookmarks export files (produced by all browsers)"""
     """Parse netscape-format bookmarks export files (produced by all browsers)"""
 
 
     html_file.seek(0)
     html_file.seek(0)
@@ -237,7 +246,7 @@ def parse_bookmarks_export(html_file):
 
 
             yield info
             yield info
 
 
-def parse_pinboard_rss_feed(rss_file):
+def parse_pinboard_rss_export(rss_file):
     """Parse Pinboard RSS feed files into links"""
     """Parse Pinboard RSS feed files into links"""
 
 
     rss_file.seek(0)
     rss_file.seek(0)
@@ -269,7 +278,7 @@ def parse_pinboard_rss_feed(rss_file):
         info['type'] = get_link_type(info)
         info['type'] = get_link_type(info)
         yield info
         yield info
 
 
-def parse_medium_rss_feed(rss_file):
+def parse_medium_rss_export(rss_file):
     """Parse Medium RSS feed files into links"""
     """Parse Medium RSS feed files into links"""
 
 
     rss_file.seek(0)
     rss_file.seek(0)
@@ -295,7 +304,7 @@ def parse_medium_rss_feed(rss_file):
         yield info
         yield info
 
 
 
 
-def parse_plain_text(text_file):
+def parse_plain_text_export(text_file):
     """Parse raw links from each line in a text file"""
     """Parse raw links from each line in a text file"""
 
 
     text_file.seek(0)
     text_file.seek(0)