浏览代码

fix pinboard RSS parser

Nick Sweeting 6 年之前
父节点
当前提交
f9a7c530b9
共有 2 个文件被更改,包括 5 次插入5 次删除
  1. 4 4
      archivebox/parse.py
  2. 1 1
      archivebox/util.py

+ 4 - 4
archivebox/parse.py

@@ -228,9 +228,9 @@ def parse_pinboard_rss_export(rss_file):
     items = root.findall("{http://purl.org/rss/1.0/}item")
     items = root.findall("{http://purl.org/rss/1.0/}item")
     for item in items:
     for item in items:
         url = item.find("{http://purl.org/rss/1.0/}link").text
         url = item.find("{http://purl.org/rss/1.0/}link").text
-        tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text
-        title = item.find("{http://purl.org/rss/1.0/}title").text.strip()
-        ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text
+        tags = item.find("{http://purl.org/dc/elements/1.1/}subject").text if item.find("{http://purl.org/dc/elements/1.1/}subject") else None
+        title = item.find("{http://purl.org/rss/1.0/}title").text.strip() if item.find("{http://purl.org/rss/1.0/}title").text.strip() else None
+        ts_str = item.find("{http://purl.org/dc/elements/1.1/}date").text if item.find("{http://purl.org/dc/elements/1.1/}date").text else None
         #       = 🌈🌈🌈🌈
         #       = 🌈🌈🌈🌈
         #        = 🌈🌈🌈🌈
         #        = 🌈🌈🌈🌈
         #         = 🏆🏆🏆🏆
         #         = 🏆🏆🏆🏆
@@ -243,7 +243,7 @@ def parse_pinboard_rss_export(rss_file):
         info = {
         info = {
             'url': url,
             'url': url,
             'timestamp': str(time.timestamp()),
             'timestamp': str(time.timestamp()),
-            'tags': tags,
+            'tags': tags or '',
             'title': title or None,
             'title': title or None,
             'sources': [rss_file.name],
             'sources': [rss_file.name],
         }
         }

+ 1 - 1
archivebox/util.py

@@ -413,7 +413,7 @@ def derived_link_info(link):
         'is_archived': os.path.exists(os.path.join(
         'is_archived': os.path.exists(os.path.join(
             ARCHIVE_DIR,
             ARCHIVE_DIR,
             link['timestamp'],
             link['timestamp'],
-            wget_output_path(link) or domain(url)
+            domain(url),
         )),
         )),
         'num_outputs': len([entry for entry in link['latest'].values() if entry]) if 'latest' in link else 0,
         'num_outputs': len([entry for entry in link['latest'].values() if entry]) if 'latest' in link else 0,
     }
     }