@@ -0,0 +1,6 @@
+[flake8]
+ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E272,E701,E731,W293,W503,W291,W391
+select = F,E9,W
+max-line-length = 130
+max-complexity = 10
+exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv
@@ -1,4 +1,6 @@
[flake8]
-ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E127,E131,E241,E252,E266,E272,E701,E731,W293,W503
-select = F,E9
-exclude = migrations,util_scripts,node_modules,venv
+exclude = migrations,tests,node_modules,vendor,static,venv,.venv,.venv2,.docker-venv
@@ -6,5 +6,6 @@ import sys
from .cli import main
+
if __name__ == '__main__':
main(args=sys.argv[1:], stdin=sys.stdin)
@@ -279,6 +279,8 @@ def load_config_val(key: str,
config: Optional[ConfigDict]=None,
env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
+ """parse bool, int, and str key=value pairs from env"""
config_keys_to_check = (key, *(aliases or ()))
for key in config_keys_to_check:
@@ -777,7 +779,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr()
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
- stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
+ stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles')
@@ -24,7 +24,6 @@ class Snapshot(models.Model):
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
-
def __repr__(self) -> str:
title = self.title or '-'
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
@@ -98,6 +98,7 @@ class Link:
updated: Optional[datetime] = None
schema: str = 'Link'
def __str__(self) -> str:
return f'[{self.timestamp}] {self.base_url} "{self.title}"'
@@ -641,8 +641,8 @@ def update(resume: Optional[float]=None,
out_dir: str=OUTPUT_DIR) -> List[Link]:
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
- check_dependencies()
check_data_folder(out_dir=out_dir)
+ check_dependencies()
# Step 1: Load list of links from the existing index
# merge in and dedupe new links from import_path
@@ -990,7 +990,7 @@ def schedule(add: bool=False,
if total_runs > 60 and not quiet:
stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **ANSI))
- stderr(f' Congrats on being an enthusiastic internet archiver! 👌')
+ stderr(' Congrats on being an enthusiastic internet archiver! 👌')
stderr(' Make sure you have enough storage space available to hold all the data.')
stderr(' Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')