Browse Source

Rename URL_(WHITE|BLACK)LIST to URL_(ALLOW|DENY)LIST

Retain aliases for old configuration files
Ross Williams 2 years ago
parent
commit
46e80dd509
4 changed files with 10 additions and 10 deletions
  1. 4 4
      archivebox/config.py
  2. 1 1
      archivebox/config_stubs.py
  3. 1 1
      archivebox/core/forms.py
  4. 4 4
      archivebox/index/__init__.py

+ 4 - 4
archivebox/config.py

@@ -82,8 +82,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
         'MEDIA_TIMEOUT':            {'type': int,   'default': 3600},
         'OUTPUT_PERMISSIONS':       {'type': str,   'default': '644'},
         'RESTRICT_FILE_NAMES':      {'type': str,   'default': 'windows'},
-        'URL_BLACKLIST':            {'type': str,   'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'},  # to avoid downloading code assets as their own pages
-        'URL_WHITELIST':            {'type': str,   'default': None},
+        'URL_DENYLIST':             {'type': str,   'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)},  # to avoid downloading code assets as their own pages
+        'URL_ALLOWLIST':            {'type': str,   'default': None, 'aliases': ('URL_WHITELIST',)},
         'ENFORCE_ATOMIC_WRITES':    {'type': bool,  'default': True},
         'TAG_SEPARATOR_PATTERN':    {'type': str,   'default': r'[,]'},
     },
@@ -371,8 +371,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'CONFIG_FILE':              {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
     'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
     'CHROME_USER_DATA_DIR':     {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)},   # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
-    'URL_BLACKLIST_PTN':        {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
-    'URL_WHITELIST_PTN':        {'default': lambda c: c['URL_WHITELIST'] and re.compile(c['URL_WHITELIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
+    'URL_DENYLIST_PTN':         {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
+    'URL_ALLOWLIST_PTN':        {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
     'DIR_OUTPUT_PERMISSIONS':   {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
 
     'ARCHIVEBOX_BINARY':        {'default': lambda c: sys.argv[0] or bin_path('archivebox')},

+ 1 - 1
archivebox/config_stubs.py

@@ -41,7 +41,7 @@ class ConfigDict(BaseConfig, total=False):
     MEDIA_TIMEOUT: int
     OUTPUT_PERMISSIONS: str
     RESTRICT_FILE_NAMES: str
-    URL_BLACKLIST: str
+    URL_DENYLIST: str
 
     SECRET_KEY: Optional[str]
     BIND_ADDR: str

+ 1 - 1
archivebox/core/forms.py

@@ -41,7 +41,7 @@ class AddLinkForm(forms.Form):
     #     label="Exclude patterns",
     #     min_length='1',
     #     required=False,
-    #     initial=URL_BLACKLIST,
+    #     initial=URL_DENYLIST,
     # )
     # timeout = forms.IntegerField(
     #     initial=TIMEOUT,

+ 4 - 4
archivebox/index/__init__.py

@@ -22,8 +22,8 @@ from ..config import (
     JSON_INDEX_FILENAME,
     OUTPUT_DIR,
     TIMEOUT,
-    URL_BLACKLIST_PTN,
-    URL_WHITELIST_PTN,
+    URL_DENYLIST_PTN,
+    URL_ALLOWLIST_PTN,
     stderr,
     OUTPUT_PERMISSIONS
 )
@@ -142,9 +142,9 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
             continue
         if scheme(link.url) not in ('http', 'https', 'ftp'):
             continue
-        if URL_BLACKLIST_PTN and URL_BLACKLIST_PTN.search(link.url):
+        if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url):
             continue
-        if URL_WHITELIST_PTN and (not URL_WHITELIST_PTN.search(link.url)):
+        if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)):
             continue
 
         yield link