Browse Source

dont match closing tags in full text

Nick Sweeting 6 years ago
parent
commit
c37941efd1
1 changed files with 1 additions and 1 deletions
  1. 1 1
      archivebox/util.py

+ 1 - 1
archivebox/util.py

@@ -43,7 +43,7 @@ base_url = lambda url: without_scheme(url)  # uniq base url used to dedupe links
 
 
 short_ts = lambda ts: ts.split('.')[0]
 short_ts = lambda ts: ts.split('.')[0]
 
 
-URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
+URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^<]+'
 
 
 
 
 def check_dependencies():
 def check_dependencies():