6 жил өмнө · c9c5b04df0
--- a/archivebox/archive.py
+++ b/archivebox/archive.py
@@ -24,6 +24,7 @@ from config import (
 
				     GIT_SHA,
			
 
				 )
			
 
				 from util import (
			
 
				+    enforce_types,
			
 
				     save_remote_source,
			
 
				     save_stdin_source,
			
 
				 )
			
@@ -100,7 +101,8 @@ def main(*args) -> List[Link]:
 
				     return update_archive_data(import_path=import_path, resume=resume)
			
 
				 
			
 
				 
			
 
				-def update_archive_data(import_path: str=None, resume: float=None) -> List[Link]:
			
 
				+@enforce_types
			
 
				+def update_archive_data(import_path: Optional[str]=None, resume: Optional[float]=None) -> List[Link]:
			
 
				     """The main ArchiveBox entrancepoint. Everything starts here."""
			
 
				 
			
 
				     # Step 1: Load list of links from the existing index
			
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -42,6 +42,7 @@ from config import (
 
				     YOUTUBEDL_VERSION,
			
 
				 )
			
 
				 from util import (
			
 
				+    enforce_types,
			
 
				     domain,
			
 
				     extension,
			
 
				     without_query,
			
@@ -63,6 +64,7 @@ from logs import (
 
				 )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def archive_link(link: Link, page=None) -> Link:
			
 
				     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
			
 
				 
			
@@ -126,6 +128,7 @@ def archive_link(link: Link, page=None) -> Link:
 
				 
			
 
				 ### Archive Method Functions
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_title(link_dir: str, link: Link) -> bool:
			
 
				     # if link already has valid title, skip it
			
 
				     if link.title and not link.title.lower().startswith('http'):
			
@@ -136,6 +139,7 @@ def should_fetch_title(link_dir: str, link: Link) -> bool:
 
				 
			
 
				     return FETCH_TITLE
			
 
				 
			
 
				+@enforce_types
			
 
				 def fetch_title(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """try to guess the page's title from its content"""
			
 
				 
			
@@ -169,12 +173,14 @@ def fetch_title(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResul
 
				     )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_favicon(link_dir: str, link: Link) -> bool:
			
 
				     if os.path.exists(os.path.join(link_dir, 'favicon.ico')):
			
 
				         return False
			
 
				 
			
 
				     return FETCH_FAVICON
			
 
				-
			
 
				+    
			
 
				+@enforce_types
			
 
				 def fetch_favicon(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """download site favicon from google's favicon api"""
			
 
				 
			
@@ -207,6 +213,7 @@ def fetch_favicon(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveRes
 
				         **timer.stats,
			
 
				     )
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_wget(link_dir: str, link: Link) -> bool:
			
 
				     output_path = wget_output_path(link)
			
 
				     if output_path and os.path.exists(os.path.join(link_dir, output_path)):
			
@@ -215,6 +222,7 @@ def should_fetch_wget(link_dir: str, link: Link) -> bool:
 
				     return FETCH_WGET
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def fetch_wget(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """download full site using wget"""
			
 
				 
			
@@ -294,6 +302,7 @@ def fetch_wget(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult
 
				         **timer.stats,
			
 
				     )
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_pdf(link_dir: str, link: Link) -> bool:
			
 
				     if is_static_file(link.url):
			
 
				         return False
			
@@ -304,6 +313,7 @@ def should_fetch_pdf(link_dir: str, link: Link) -> bool:
 
				     return FETCH_PDF
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def fetch_pdf(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """print PDF of site to file using chrome --headless"""
			
 
				 
			
@@ -338,6 +348,7 @@ def fetch_pdf(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
 
				         **timer.stats,
			
 
				     )
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_screenshot(link_dir: str, link: Link) -> bool:
			
 
				     if is_static_file(link.url):
			
 
				         return False
			
@@ -347,6 +358,7 @@ def should_fetch_screenshot(link_dir: str, link: Link) -> bool:
 
				 
			
 
				     return FETCH_SCREENSHOT
			
 
				 
			
 
				+@enforce_types
			
 
				 def fetch_screenshot(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """take screenshot of site using chrome --headless"""
			
 
				 
			
@@ -381,6 +393,7 @@ def fetch_screenshot(link_dir: str, link: Link, timeout: int=TIMEOUT) -> Archive
 
				         **timer.stats,
			
 
				     )
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_dom(link_dir: str, link: Link) -> bool:
			
 
				     if is_static_file(link.url):
			
 
				         return False
			
@@ -390,6 +403,7 @@ def should_fetch_dom(link_dir: str, link: Link) -> bool:
 
				 
			
 
				     return FETCH_DOM
			
 
				     
			
 
				+@enforce_types
			
 
				 def fetch_dom(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """print HTML of site to file using chrome --dump-html"""
			
 
				 
			
@@ -426,6 +440,7 @@ def fetch_dom(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
 
				         **timer.stats,
			
 
				     )
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_git(link_dir: str, link: Link) -> bool:
			
 
				     if is_static_file(link.url):
			
 
				         return False
			
@@ -443,6 +458,7 @@ def should_fetch_git(link_dir: str, link: Link) -> bool:
 
				     return FETCH_GIT
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def fetch_git(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """download full site using git"""
			
 
				 
			
@@ -485,6 +501,7 @@ def fetch_git(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
 
				     )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_media(link_dir: str, link: Link) -> bool:
			
 
				     if is_static_file(link.url):
			
 
				         return False
			
@@ -494,6 +511,7 @@ def should_fetch_media(link_dir: str, link: Link) -> bool:
 
				 
			
 
				     return FETCH_MEDIA
			
 
				 
			
 
				+@enforce_types
			
 
				 def fetch_media(link_dir: str, link: Link, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult:
			
 
				     """Download playlists or individual video, audio, and subtitles using youtube-dl"""
			
 
				 
			
@@ -557,6 +575,7 @@ def fetch_media(link_dir: str, link: Link, timeout: int=MEDIA_TIMEOUT) -> Archiv
 
				     )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def should_fetch_archive_dot_org(link_dir: str, link: Link) -> bool:
			
 
				     if is_static_file(link.url):
			
 
				         return False
			
@@ -567,6 +586,7 @@ def should_fetch_archive_dot_org(link_dir: str, link: Link) -> bool:
 
				 
			
 
				     return SUBMIT_ARCHIVE_DOT_ORG
			
 
				 
			
 
				+@enforce_types
			
 
				 def archive_dot_org(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult:
			
 
				     """submit site to archive.org for archiving via their service, save returned archive url"""
			
 
				 
			
@@ -622,6 +642,7 @@ def archive_dot_org(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveR
 
				         **timer.stats,
			
 
				     )
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_archive_dot_org_response(response: bytes) -> Tuple[List[str], List[str]]:
			
 
				     # Parse archive.org response headers
			
 
				     headers: Dict[str, List[str]] = defaultdict(list)
			
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -46,6 +46,10 @@ CHROME_USER_DATA_DIR =   os.getenv('CHROME_USER_DATA_DIR',   None)
 
				 CHROME_HEADLESS =        os.getenv('CHROME_HEADLESS',        'True'             ).lower() == 'true'
			
 
				 CHROME_USER_AGENT =      os.getenv('CHROME_USER_AGENT',      'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36')
			
 
				 
			
 
				+USE_CURL =               os.getenv('USE_CURL',               'True'             ).lower() == 'true'
			
 
				+USE_WGET =               os.getenv('USE_WGET',               'True'             ).lower() == 'true'
			
 
				+USE_CHROME =             os.getenv('USE_CHROME',             'True'             ).lower() == 'true'
			
 
				+
			
 
				 CURL_BINARY =            os.getenv('CURL_BINARY',            'curl')
			
 
				 GIT_BINARY =             os.getenv('GIT_BINARY',             'git')
			
 
				 WGET_BINARY =            os.getenv('WGET_BINARY',            'wget')
			
@@ -195,13 +199,19 @@ try:
 
				         print('        env PYTHONIOENCODING=UTF-8 ./archive.py export.html')
			
 
				 
			
 
				     ### Make sure curl is installed
			
 
				-    USE_CURL = FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG
			
 
				+    if USE_CURL:
			
 
				+        USE_CURL = FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG
			
 
				+    else:
			
 
				+        FETCH_FAVICON = SUBMIT_ARCHIVE_DOT_ORG = False
			
 
				     CURL_VERSION = None
			
 
				     if USE_CURL:
			
 
				         CURL_VERSION = check_version(CURL_BINARY)
			
 
				 
			
 
				     ### Make sure wget is installed and calculate version
			
 
				-    USE_WGET = FETCH_WGET or FETCH_WARC
			
 
				+    if USE_WGET:
			
 
				+        USE_WGET = FETCH_WGET or FETCH_WARC
			
 
				+    else:
			
 
				+        FETCH_WGET = FETCH_WARC = False
			
 
				     WGET_VERSION = None
			
 
				     if USE_WGET:
			
 
				         WGET_VERSION = check_version(WGET_BINARY)
			
@@ -222,17 +232,21 @@ try:
 
				         check_version(YOUTUBEDL_BINARY)
			
 
				 
			
 
				     ### Make sure chrome is installed and calculate version
			
 
				-    USE_CHROME = FETCH_PDF or FETCH_SCREENSHOT or FETCH_DOM
			
 
				+    if USE_CHROME:
			
 
				+        USE_CHROME = FETCH_PDF or FETCH_SCREENSHOT or FETCH_DOM
			
 
				+    else:
			
 
				+        FETCH_PDF = FETCH_SCREENSHOT = FETCH_DOM = False
			
 
				     CHROME_VERSION = None
			
 
				     if USE_CHROME:
			
 
				         if CHROME_BINARY is None:
			
 
				             CHROME_BINARY = find_chrome_binary()
			
 
				-        CHROME_VERSION = check_version(CHROME_BINARY)
			
 
				-        # print('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
			
 
				+        if CHROME_BINARY:
			
 
				+            CHROME_VERSION = check_version(CHROME_BINARY)
			
 
				+            # print('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
			
 
				 
			
 
				-        if CHROME_USER_DATA_DIR is None:
			
 
				-            CHROME_USER_DATA_DIR = find_chrome_data_dir()
			
 
				-        # print('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
			
 
				+            if CHROME_USER_DATA_DIR is None:
			
 
				+                CHROME_USER_DATA_DIR = find_chrome_data_dir()
			
 
				+            # print('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
			
 
				 
			
 
				     CHROME_OPTIONS = {
			
 
				         'TIMEOUT': TIMEOUT,
			
--- a/archivebox/index.py
+++ b/archivebox/index.py
@@ -58,7 +58,7 @@ def write_links_index(out_dir: str, links: List[Link], finished: bool=False) ->
 
				 
			
 
				 
			
 
				 @enforce_types
			
 
				-def load_links_index(out_dir: str=OUTPUT_DIR, import_path: str=None) -> Tuple[List[Link], List[Link]]:
			
 
				+def load_links_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) -> Tuple[List[Link], List[Link]]:
			
 
				     """parse and load existing index with any new links from import_path merged in"""
			
 
				 
			
 
				     existing_links: List[Link] = []
			
--- a/archivebox/parse.py
+++ b/archivebox/parse.py
@@ -32,9 +32,11 @@ from util import (
 
				     check_url_parsing_invariants,
			
 
				     TimedProgress,
			
 
				     Link,
			
 
				+    enforce_types,
			
 
				 )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_links(source_file: str) -> Tuple[List[Link], str]:
			
 
				     """parse a list of URLs with their metadata from an 
			
 
				        RSS feed, bookmarks export, or text file
			
@@ -77,6 +79,7 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]:
 
				 
			
 
				 ### Import Parser Functions
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_pocket_html_export(html_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)"""
			
 
				 
			
@@ -101,6 +104,7 @@ def parse_pocket_html_export(html_file: IO[str]) -> Iterable[Link]:
 
				             )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_json_export(json_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
			
 
				 
			
@@ -153,6 +157,7 @@ def parse_json_export(json_file: IO[str]) -> Iterable[Link]:
 
				             )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_rss_export(rss_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse RSS XML-format files into links"""
			
 
				 
			
@@ -190,6 +195,7 @@ def parse_rss_export(rss_file: IO[str]) -> Iterable[Link]:
 
				         )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_shaarli_rss_export(rss_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse Shaarli-specific RSS XML-format files into links"""
			
 
				 
			
@@ -227,6 +233,7 @@ def parse_shaarli_rss_export(rss_file: IO[str]) -> Iterable[Link]:
 
				         )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_netscape_html_export(html_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse netscape-format bookmarks export files (produced by all browsers)"""
			
 
				 
			
@@ -251,6 +258,7 @@ def parse_netscape_html_export(html_file: IO[str]) -> Iterable[Link]:
 
				             )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_pinboard_rss_export(rss_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse Pinboard RSS feed files into links"""
			
 
				 
			
@@ -282,6 +290,7 @@ def parse_pinboard_rss_export(rss_file: IO[str]) -> Iterable[Link]:
 
				         )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_medium_rss_export(rss_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse Medium RSS feed files into links"""
			
 
				 
			
@@ -303,6 +312,7 @@ def parse_medium_rss_export(rss_file: IO[str]) -> Iterable[Link]:
 
				         )
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_plain_text_export(text_file: IO[str]) -> Iterable[Link]:
			
 
				     """Parse raw links from each line in a text file"""
			
 
				 
			
--- a/archivebox/schema.py
+++ b/archivebox/schema.py
@@ -23,6 +23,10 @@ class ArchiveResult:
 
				     status: str
			
 
				     start_ts: datetime
			
 
				     end_ts: datetime
			
 
				+    schema: str = 'ArchiveResult'
			
 
				+
			
 
				+    def __post_init__(self):
			
 
				+        assert self.schema == self.__class__.__name__
			
 
				 
			
 
				     def _asdict(self):
			
 
				         return asdict(self)
			
@@ -40,9 +44,11 @@ class Link:
 
				     sources: List[str]
			
 
				     history: Dict[str, List[ArchiveResult]] = field(default_factory=lambda: {})
			
 
				     updated: Optional[datetime] = None
			
 
				+    schema: str = 'Link'
			
 
				 
			
 
				     def __post_init__(self):
			
 
				         """fix any history result items to be type-checked ArchiveResults"""
			
 
				+        assert self.schema == self.__class__.__name__
			
 
				         cast_history = {}
			
 
				         for method, method_history in self.history.items():
			
 
				             cast_history[method] = []
			
@@ -67,6 +73,7 @@ class Link:
 
				     
			
 
				     def _asdict(self, extended=False):
			
 
				         info = {
			
 
				+            'schema': 'Link',
			
 
				             'url': self.url,
			
 
				             'title': self.title or None,
			
 
				             'timestamp': self.timestamp,
			
@@ -234,12 +241,18 @@ class ArchiveIndex:
 
				     num_links: int
			
 
				     updated: str
			
 
				     links: List[Link]
			
 
				+    schema: str = 'ArchiveIndex'
			
 
				+
			
 
				+    def __post_init__(self):
			
 
				+        assert self.schema == self.__class__.__name__
			
 
				 
			
 
				     def _asdict(self):
			
 
				         return asdict(self)
			
 
				 
			
 
				 @dataclass
			
 
				 class RuntimeStats:
			
 
				+    """mutable stats counter for logging archiving timing info to CLI output"""
			
 
				+
			
 
				     skipped: int
			
 
				     succeeded: int
			
 
				     failed: int
			
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -91,7 +91,7 @@ STATICFILE_EXTENSIONS = {
 
				     'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
			
 
				     'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
			
 
				     'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', 
			
 
				-    'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8'
			
 
				+    'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
			
 
				     'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
			
 
				     'atom', 'rss', 'css', 'js', 'json',
			
 
				     'dmg', 'iso', 'img',
			
@@ -113,8 +113,9 @@ STATICFILE_EXTENSIONS = {
 
				 
			
 
				 def enforce_types(func):
			
 
				     """
			
 
				-    Checks parameters type signatures against arg and kwarg type hints.
			
 
				+    Enforce function arg and kwarg types at runtime using its python3 type hints
			
 
				     """
			
 
				+    # TODO: check return type as well
			
 
				 
			
 
				     @wraps(func)
			
 
				     def typechecked_function(*args, **kwargs):
			
@@ -183,6 +184,7 @@ def check_url_parsing_invariants() -> None:
 
				 
			
 
				 ### Random Helpers
			
 
				 
			
 
				+@enforce_types
			
 
				 def save_stdin_source(raw_text: str) -> str:
			
 
				     if not os.path.exists(SOURCES_DIR):
			
 
				         os.makedirs(SOURCES_DIR)
			
@@ -196,6 +198,8 @@ def save_stdin_source(raw_text: str) -> str:
 
				 
			
 
				     return source_path
			
 
				 
			
 
				+
			
 
				+@enforce_types
			
 
				 def save_remote_source(url: str, timeout: int=TIMEOUT) -> str:
			
 
				     """download a given url's content into output/sources/domain-<timestamp>.txt"""
			
 
				 
			
@@ -233,6 +237,8 @@ def save_remote_source(url: str, timeout: int=TIMEOUT) -> str:
 
				 
			
 
				     return source_path
			
 
				 
			
 
				+
			
 
				+@enforce_types
			
 
				 def fetch_page_title(url: str, timeout: int=10, progress: bool=SHOW_PROGRESS) -> Optional[str]:
			
 
				     """Attempt to guess a page's title by downloading the html"""
			
 
				     
			
@@ -255,6 +261,8 @@ def fetch_page_title(url: str, timeout: int=10, progress: bool=SHOW_PROGRESS) ->
 
				         # ))
			
 
				         return None
			
 
				 
			
 
				+
			
 
				+@enforce_types
			
 
				 def wget_output_path(link: Link) -> Optional[str]:
			
 
				     """calculate the path to the wgetted .html file, since wget may
			
 
				     adjust some paths to be different than the base_url path.
			
@@ -323,14 +331,17 @@ def wget_output_path(link: Link) -> Optional[str]:
 
				     return None
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def read_js_script(script_name: str) -> str:
			
 
				     script_path = os.path.join(PYTHON_PATH, 'scripts', script_name)
			
 
				 
			
 
				     with open(script_path, 'r') as f:
			
 
				         return f.read().split('// INFO BELOW HERE')[0].strip()
			
 
				 
			
 
				+
			
 
				 ### String Manipulation & Logging Helpers
			
 
				 
			
 
				+@enforce_types
			
 
				 def str_between(string: str, start: str, end: str=None) -> str:
			
 
				     """(<abc>12345</def>, <abc>, </def>)  ->  12345"""
			
 
				 
			
@@ -341,6 +352,7 @@ def str_between(string: str, start: str, end: str=None) -> str:
 
				     return content
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def parse_date(date: Any) -> Optional[datetime]:
			
 
				     """Parse unix timestamps, iso format, and human-readable strings"""
			
 
				     
			
@@ -435,6 +447,8 @@ def merge_links(a: Link, b: Link) -> Link:
 
				         history=history,
			
 
				     )
			
 
				 
			
 
				+
			
 
				+@enforce_types
			
 
				 def is_static_file(url: str) -> bool:
			
 
				     """Certain URLs just point to a single static file, and 
			
 
				        don't need to be re-archived in many formats
			
@@ -443,6 +457,8 @@ def is_static_file(url: str) -> bool:
 
				     # TODO: the proper way is with MIME type detection, not using extension
			
 
				     return extension(url) in STATICFILE_EXTENSIONS
			
 
				 
			
 
				+
			
 
				+@enforce_types
			
 
				 def derived_link_info(link: Link) -> dict:
			
 
				     """extend link info with the archive urls and other derived data"""
			
 
				 
			
@@ -518,6 +534,7 @@ class TimedProgress:
 
				             sys.stdout.flush()
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def progress_bar(seconds: int, prefix: str='') -> None:
			
 
				     """show timer in the form of progress bar, with percentage and seconds remaining"""
			
 
				     chunk = '█' if sys.stdout.encoding == 'UTF-8' else '#'
			
@@ -557,6 +574,7 @@ def progress_bar(seconds: int, prefix: str='') -> None:
 
				         pass
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def download_url(url: str, timeout: int=TIMEOUT) -> str:
			
 
				     """Download the contents of a remote url and return the text"""
			
 
				 
			
@@ -572,6 +590,8 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str:
 
				     encoding = resp.headers.get_content_charset() or 'utf-8'
			
 
				     return resp.read().decode(encoding)
			
 
				 
			
 
				+
			
 
				+@enforce_types
			
 
				 def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, timeout: int=30) -> None:
			
 
				     """chmod -R <permissions> <cwd>/<path>"""
			
 
				 
			
@@ -584,6 +604,7 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, tim
 
				         raise Exception('Failed to chmod {}/{}'.format(cwd, path))
			
 
				 
			
 
				 
			
 
				+@enforce_types
			
 
				 def chrome_args(**options) -> List[str]:
			
 
				     """helper to build up a chrome shell command with arguments"""