6 years ago · eff0100971
--- a/archivebox/archive.py
+++ b/archivebox/archive.py
@@ -59,7 +59,6 @@ def load_links(archive_path=OUTPUT_DIR, import_path=None):
 
															     existing_links = []
														
 
															     if archive_path:
														
 
															         existing_links = parse_json_links_index(archive_path)
														
 
															-        existing_links = validate_links(existing_links)
														
 
															     new_links = []
														
 
															     if import_path:
														
@@ -178,6 +177,7 @@ if __name__ == '__main__':
 
															     elif stdin_raw_text:
														
 
															         source = save_source(stdin_raw_text)
														
 
															+
														
 
															     # Step 1: Parse the links and dedupe them with existing archive
														
 
															     all_links, new_links = load_links(archive_path=out_dir, import_path=source)
														
--- a/archivebox/parse.py
+++ b/archivebox/parse.py
@@ -161,7 +161,7 @@ def parse_rss_export(rss_file):
 
															         rows = leading_removed.split('\n')
														
 
															         def get_row(key):
														
 
															-            return [r for r in rows if r.startswith('<{}>'.format(key))][0]
														
 
															+            return [r for r in rows if r.strip().startswith('<{}>'.format(key))][0]
														
 
															         title = str_between(get_row('title'), '<![CDATA[', ']]').strip()
														
 
															         url = str_between(get_row('link'), '<link>', '</link>')
														
@@ -209,7 +209,6 @@ def parse_shaarli_rss_export(rss_file):
 
															         ts_str = str_between(get_row('published'), '<published>', '</published>')
														
 
															         time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
														
 
															-
														
 
															         info = {
														
 
															             'url': url,
														
 
															             'domain': domain(url),