Forráskód Böngészése

also set snapshot title inside of fetch_title directly

Nick Sweeting 5 éve
szülő
commit
55a237a435
1 módosított fájl, 4 hozzáadás és 1 törlés
  1. 4 1
      archivebox/extractors/title.py

+ 4 - 1
archivebox/extractors/title.py

@@ -63,7 +63,10 @@ def save_title(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
         html = download_url(link.url, timeout=timeout)
         html = download_url(link.url, timeout=timeout)
         match = re.search(HTML_TITLE_REGEX, html)
         match = re.search(HTML_TITLE_REGEX, html)
         output = htmldecode(match.group(1).strip()) if match else None
         output = htmldecode(match.group(1).strip()) if match else None
-        if not output:
+        if output:
+            if not link.title or len(output) >= len(link.title):
+                Snapshot.objects.filter(url=link.url, timestamp=link.timestamp).update(title=output)
+        else:
             raise ArchiveError('Unable to detect page title')
             raise ArchiveError('Unable to detect page title')
     except Exception as err:
     except Exception as err:
         status = 'failed'
         status = 'failed'